Added support for multiple regex commands, useful for parsing multiple rsyslog formats

author: Andre Lorbach <alorbach@adiscon.com> 2013-07-03 18:07:04 +0200
committer: Andre Lorbach <alorbach@adiscon.com> 2013-07-03 18:07:04 +0200
commit: ebe5266336f1421f20edf844872ad2e2ab80c4b3 (patch)
tree: 8716f7429beccd8824e6a6112e4695bb0b6b5498
parent: 036096c1b4d7cb67b35d9da25f2cc031b1a6bbff (diff)
download: rsyslog-ebe5266336f1421f20edf844872ad2e2ab80c4b3.tar.gz
rsyslog-ebe5266336f1421f20edf844872ad2e2ab80c4b3.tar.bz2
rsyslog-ebe5266336f1421f20edf844872ad2e2ab80c4b3.zip
1 files changed, 79 insertions, 53 deletions
diff --git a/plugins/impstats/statslog-splitter.py b/plugins/impstats/statslog-splitter.py
index e7102c51..6a6870bf 100755
--- a/plugins/impstats/statslog-splitter.py
+++ b/plugins/impstats/statslog-splitter.py
@@ -28,10 +28,19 @@ nLogLineNum = 0
 nLogFileCount = 0
 szChartOptionalArgs = ""
 
-# Create regex for logline
-loglineregex = re.compile(r"(...)(?:.|..)([0-9]{1,2}) ([0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}) ([a-zA-Z0-9_\-\.]{1,256}) ([A-Za-z0-9_\-\/\.]{1,32}): (.*?): (.*?) \n")
-# Array Indexes
-LN_MONTH = 1;LN_DAY = 2;LN_TIME = 3;LN_HOST = 4;LN_SYSLOGTAG = 5;LN_LOGOBJECT = 6;LN_LOGDATA = 7
+# Create regex for loglines
+loglineregexes = []
+loglineindexes = []
+
+# Traditional Format
+# Sample Line: Jun 26 14:21:44 nhpljt084 rsyslogd-pstats: main Q[DA]: size=0 enqueued=0 full=0 discarded.full=0 discarded.nf=0 maxqsize=0 
+loglineregexes.append( re.compile(r"(...)(?:.|..)([0-9]{1,2}) ([0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}) ([a-zA-Z0-9_\-\.]{1,256}) ([A-Za-z0-9_\-\/\.]{1,32}): (.*?): (.*?) \n") )
+loglineindexes.append( {"LN_YEAR":-1, "LN_MONTH":1, "LN_DAY":2, "LN_TIME":3, "LN_HOST":4, "LN_SYSLOGTAG":5, "LN_LOGOBJECT":6, "LN_LOGDATA":7} )
+
+# Newer Format
+# Sample format: 2013-07-03T17:22:55.680078+02:00 devdebian6 rsyslogd-pstats: main Q: size=358 enqueued=358 full=0 discarded.full=0 discarded.nf=0 maxqsize=358 
+loglineregexes.append( re.compile(r"([0-9]{4,4})-([0-9]{1,2})-([0-9]{1,2})T([0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2})\.[0-9]{1,6}.[0-9]{1,2}:[0-9]{1,2} ([a-zA-Z0-9_\-\.]{1,256}) ([A-Za-z0-9_\-\/\.]{1,32}): (.*?): (.*?) \n") )
+loglineindexes.append( {"LN_YEAR":1, "LN_MONTH":2, "LN_DAY":3, "LN_TIME":4, "LN_HOST":5, "LN_SYSLOGTAG":6, "LN_LOGOBJECT":7, "LN_LOGDATA":8} )
 
 # Init result with file handles
 outputFiles = {}
@@ -82,59 +91,76 @@ elif bSingleObjectOutput:
 			# Init variables
 			aFields = []
 			aData = []
-
-			# Parse IMPStats Line!
-			result = loglineregex.split(line)
-			# Found valid logline, save into file! 
-			if len(result) >= LN_LOGDATA and result[LN_SYSLOGTAG] == "rsyslogd-pstats":
-				# Convert Datetime!
-				filedate = datetime.datetime.strptime(result[LN_MONTH] + " " + str(datetime.datetime.now().year) + " " + result[LN_DAY] + " " + result[LN_TIME] ,"%b %Y %d %H:%M:%S")
-
-				# Split logdata into Array
-				aProperties = result[LN_LOGDATA].split(" ")
-				for szProperty in aProperties:
-					aProperty = szProperty.split("=")
-					aFields.append(aProperty[0])	# Append FieldID
-					if len(aProperty) > 1:
-						aData.append(aProperty[1])	# Append FieldData
-					else: 
-						errorlog.write("Corrupted Logline at line " + str(nLogLineNum) + " failed to parse: " + line)
-						break
-
-				# Remove invalid characters for filename!
-				szFileName = re.sub("[^a-zA-Z0-9]", "_", result[LN_LOGOBJECT]) + ".csv"
-
-				# Open file for writing!
-				if szFileName not in outputFiles:
-       					print "Creating file : " + szOutputDir + "/" + szFileName
-					outputFiles[szFileName] = open(szOutputDir + "/" + szFileName, 'w')
-					nLogFileCount += 1
+			iLogRegExIndex = 0
+			bLogProcessed = False
+
+			# Loop through Regex parsers
+			for loglineregex in loglineregexes:
+				# Parse IMPStats Line!
+				result = loglineregex.split(line)
+				# Found valid logline, save into file! 
+				if len(result) >= loglineindexes[iLogRegExIndex]["LN_LOGDATA"] and result[ loglineindexes[iLogRegExIndex]["LN_SYSLOGTAG"] ] == "rsyslogd-pstats":
+					# Convert Datetime!
+					if isinstance( result[ loglineindexes[iLogRegExIndex]["LN_MONTH"] ], int ):
+						filedate = datetime.datetime.strptime(result[ loglineindexes[iLogRegExIndex]["LN_MONTH"] ] + " " + str(datetime.datetime.now().year) + " " + result[ loglineindexes[iLogRegExIndex]["LN_DAY"] ] + " " + result[ loglineindexes[iLogRegExIndex]["LN_TIME"] ] ,"%b %Y %d %H:%M:%S")
+					else:
+						filedate = datetime.datetime.strptime(result[ loglineindexes[iLogRegExIndex]["LN_MONTH"] ] + " " + str(datetime.datetime.now().year) + " " + result[ loglineindexes[iLogRegExIndex]["LN_DAY"] ] + " " + result[ loglineindexes[iLogRegExIndex]["LN_TIME"] ] ,"%m %Y %d %H:%M:%S")
+
+					# Split logdata into Array
+					aProperties = result[ loglineindexes[iLogRegExIndex]["LN_LOGDATA"] ].split(" ")
+					for szProperty in aProperties:
+						aProperty = szProperty.split("=")
+						aFields.append(aProperty[0])	# Append FieldID
+						if len(aProperty) > 1:
+							aData.append(aProperty[1])	# Append FieldData
+						else: 
+							errorlog.write("Corrupted Logline at line " + str(nLogLineNum) + " failed to parse: " + line)
+							break
+
+					# Remove invalid characters for filename!
+					szFileName = re.sub("[^a-zA-Z0-9]", "_", result[ loglineindexes[iLogRegExIndex]["LN_LOGOBJECT"] ]) + ".csv"
+
+					# Open file for writing!
+					if szFileName not in outputFiles:
+						print "Creating file : " + szOutputDir + "/" + szFileName
+						outputFiles[szFileName] = open(szOutputDir + "/" + szFileName, 'w')
+						nLogFileCount += 1
+						
+						# Output CSV Header
+						outputFiles[szFileName].write("Date;")
+						outputFiles[szFileName].write("Host;")
+						outputFiles[szFileName].write("Object;")
+						for szField in aFields:
+							outputFiles[szFileName].write(szField + ";")
+						outputFiles[szFileName].write("\n")
+					#else:
+					#	print "already open: " + szFileName
 					
-					# Output CSV Header
-					outputFiles[szFileName].write("Date;")
-					outputFiles[szFileName].write("Host;")
-					outputFiles[szFileName].write("Object;")
-					for szField in aFields:
-						outputFiles[szFileName].write(szField + ";")
+					# Output CSV Data
+					outputFiles[szFileName].write(filedate.strftime("%Y/%b/%d %H:%M:%S") + ";")
+					outputFiles[szFileName].write(result[ loglineindexes[iLogRegExIndex]["LN_HOST"] ] + ";")
+					outputFiles[szFileName].write(result[ loglineindexes[iLogRegExIndex]["LN_LOGOBJECT"] ] + ";")
+					for szData in aData:
+						outputFiles[szFileName].write(szData + ";")
 					outputFiles[szFileName].write("\n")
-				#else:
-				#	print "already open: " + szFileName
-				
-				# Output CSV Data
-				outputFiles[szFileName].write(filedate.strftime("%Y/%b/%d %H:%M:%S") + ";")
-				outputFiles[szFileName].write(result[LN_HOST] + ";")
-				outputFiles[szFileName].write(result[LN_LOGOBJECT] + ";")
-				for szData in aData:
-					outputFiles[szFileName].write(szData + ";")
-				outputFiles[szFileName].write("\n")
-
-				#print result[LN_LOGOBJECT]
-				#print result[LN_LOGDATA]
-			else:
+
+					# Set Log as processed, abort Loop!
+					bLogProcessed = True
+
+					#print result[LN_LOGOBJECT]
+					#print result[LN_LOGDATA]
+
+				# Increment Logreged Counter
+				iLogRegExIndex += 1
+
+				# Abort Loop if processed
+				if bLogProcessed: 
+					break
+
+			# Debug output if format not detected
+			if bLogProcessed == False: 
 				print "Fail parsing logline: "
 				print result
-				break
-
 
 			# Increment helper counter
 			nLogLineNum += 1
author	Andre Lorbach <alorbach@adiscon.com>	2013-07-03 18:07:04 +0200
committer	Andre Lorbach <alorbach@adiscon.com>	2013-07-03 18:07:04 +0200
commit	ebe5266336f1421f20edf844872ad2e2ab80c4b3 (patch)
tree	8716f7429beccd8824e6a6112e4695bb0b6b5498
parent	036096c1b4d7cb67b35d9da25f2cc031b1a6bbff (diff)
download	rsyslog-ebe5266336f1421f20edf844872ad2e2ab80c4b3.tar.gz rsyslog-ebe5266336f1421f20edf844872ad2e2ab80c4b3.tar.bz2 rsyslog-ebe5266336f1421f20edf844872ad2e2ab80c4b3.zip