From 4e50236f7ed09b5651b90f1411916ffa54f324f2 Mon Sep 17 00:00:00 2001 From: Edgars Voroboks Date: Wed, 2 Dec 2020 22:43:34 +0200 Subject: [PATCH] Initial commit of version 1.1 --- README.md | 0 xmlutils/bin/xmlkvrecursive.py | 117 ++++++++++++++++++++++++++++ xmlutils/bin/xmlprettyprint.py | 92 ++++++++++++++++++++++ xmlutils/bin/xmlsplit.py | 89 +++++++++++++++++++++ xmlutils/bin/xmlstripdeclaration.py | 34 ++++++++ xmlutils/default/app.conf | 18 +++++ xmlutils/default/commands.conf | 27 +++++++ xmlutils/default/searchbnf.conf | 66 ++++++++++++++++ xmlutils/metadata/default.meta | 19 +++++ xmlutils/test/entity.csv | 10 +++ 10 files changed, 472 insertions(+) create mode 100644 README.md create mode 100755 xmlutils/bin/xmlkvrecursive.py create mode 100755 xmlutils/bin/xmlprettyprint.py create mode 100755 xmlutils/bin/xmlsplit.py create mode 100755 xmlutils/bin/xmlstripdeclaration.py create mode 100644 xmlutils/default/app.conf create mode 100644 xmlutils/default/commands.conf create mode 100644 xmlutils/default/searchbnf.conf create mode 100644 xmlutils/metadata/default.meta create mode 100644 xmlutils/test/entity.csv diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/xmlutils/bin/xmlkvrecursive.py b/xmlutils/bin/xmlkvrecursive.py new file mode 100755 index 0000000..bdff1bf --- /dev/null +++ b/xmlutils/bin/xmlkvrecursive.py @@ -0,0 +1,117 @@ +# Copyright (C) 2010 Splunk Inc. All Rights Reserved. Version 4.0 +import sys,splunk.Intersplunk +import re +import urllib +import xml.sax +import xml.sax.saxutils as saxutils +from xml.sax.handler import ContentHandler +from xml.sax.handler import EntityResolver +from xml.sax.xmlreader import InputSource +import StringIO +import types + +class NullInputSource(InputSource): + def getByteStream(self): + return StringIO.StringIO("entity files not supported.") + +class NullEntityResolver(EntityResolver): + def resolveEntity(self,publicId,systemId): + return NullInputSource() + +class XmlHandler(ContentHandler): + def __init__(self, flatten): + self.flatten = flatten + + def reset(self): + self.key_prefix = [] + self.keys_seen = [] + self.new_fields = {} + + def getNewFields(self): + return self.new_fields + + def setValue( self, value, suffix='' ): + dest_key = '_'.join(self.key_prefix) + suffix + + if( len( str(value).strip() ) > 0 ): + #handle multiple values + if dest_key in self.new_fields: + self.new_fields['multi values'] = 'yep' + #this is only the second value, so convert value to a list + if type(self.new_fields[dest_key]) is not types.ListType: + self.new_fields[dest_key] = [self.new_fields[dest_key]] + #append the value to the list + self.new_fields[dest_key].append(str(value)) + else: + #insert the simple value + self.new_fields[dest_key] = str(value) + + def startElement(self, name, attrs): + self.key_prefix.append(name) + + #if flatten is set, then create a new prefix if this prefix has already been used + if flatten and '_'.join(self.key_prefix) in self.keys_seen: + self.key_prefix.pop() + count = 2 + newName = name + '[' + str(count) + ']' + while '_'.join(self.key_prefix) + '_' + newName in self.keys_seen: + count += 1 + newName = name + '[' + str(count) + ']' + self.key_prefix.append(newName) + + self.keys_seen.append( '_'.join(self.key_prefix) ) + + if attrs.getLength() > 0: + for k in attrs.getNames(): + self.setValue( attrs.getValue(k), "-" + k ) + + def characters(self, content): + if content is not None and content.strip() is not '': + self.setValue( content.strip() ) + + def endElement(self, name): + self.key_prefix.pop() + + +try: + results,dummyresults,settings = splunk.Intersplunk.getOrganizedResults() + + keywords, argvals = splunk.Intersplunk.getKeywordsAndOptions() + + flatten = argvals.get("flatten", "False") + if flatten.strip().lower() in ['true','1','yes']: + flatten = True + else: + flatten = False + + handler = XmlHandler(flatten) + + for r in results: + try: + if 'xml' in r: + xml_text = r['xml'] + else: + raw = r["_raw"] + + xml_text = raw[ raw.index( '<' ) : raw.rindex( '>' )+1 ] + + handler.reset() + + parser = xml.sax.make_parser() + parser.setContentHandler(handler) + parser.setEntityResolver(NullEntityResolver()) + parser.parse(StringIO.StringIO(xml_text)) + + for k,v in handler.getNewFields().iteritems(): + r[k] = v + except: + import traceback + stack = traceback.format_exc() + r['_raw'] = "Failed to parse: " + str(stack) + "\n" + r['_raw'] + +except: + import traceback + stack = traceback.format_exc() + results = splunk.Intersplunk.generateErrorResults("Error : Traceback: " + str(stack)) + +splunk.Intersplunk.outputResults( results ) diff --git a/xmlutils/bin/xmlprettyprint.py b/xmlutils/bin/xmlprettyprint.py new file mode 100755 index 0000000..e21792c --- /dev/null +++ b/xmlutils/bin/xmlprettyprint.py @@ -0,0 +1,92 @@ +# Copyright (C) 2010 Splunk Inc. All Rights Reserved. Version 4.0 +import sys,splunk.Intersplunk +import xml.sax +import xml.sax.saxutils as saxutils +from xml.sax.handler import ContentHandler +from xml.sax.handler import EntityResolver +from xml.sax.xmlreader import InputSource +import StringIO + +class NullInputSource(InputSource): + def getByteStream(self): + return StringIO.StringIO("entity files not supported.") + +class NullEntityResolver(EntityResolver): + def resolveEntity(self,publicId,systemId): + return NullInputSource() + +class XmlHandler(ContentHandler): + def __init__(self): + self.indent = 0 + + def reset(self , r): + self.current_output = '' + self.indent = 0 + self.open_tag = '' + + def getOutput(self): + return self.current_output + + def startElement(self, name, attrs): + self.open_tag = name + self.current_output += '\n' + ' ' * self.indent + self.indent += 1 + self.current_output += '<' + name + + if attrs.getLength() > 0: + for k in attrs.getNames(): + self.current_output += ' ' + k + '=' + saxutils.quoteattr(attrs.getValue(k)) + self.current_output += '>' + + def characters(self, content): + if len(content.strip()) > 0: +# self.current_output += ' ' * self.indent + self.current_output += saxutils.escape( content ) #+ '\n' + + def endElement(self, name): + self.indent -= 1 + if self.open_tag != name: + self.current_output += '\n' + ' ' * self.indent + self.current_output += '' + + +try: + results,dummyresults,settings = splunk.Intersplunk.getOrganizedResults() + + handler = XmlHandler() + + for r in results: + try: + if 'xml' in r: + xml_text = r['xml'] + dest_field = 'xml' + else: + raw = r["_raw"] + dest_field = '_raw' + + xml_text = raw[ raw.index( '<' ) : raw.rindex( '>' )+1 ] + + handler.reset(xml_text) + parser = xml.sax.make_parser() + parser.setContentHandler(handler) + parser.setEntityResolver(NullEntityResolver()) + parser.parse(StringIO.StringIO(xml_text)) + + r[dest_field] = handler.getOutput() + + if 'xml' in r: + xml_text = r['xml'] + else: + raw = r["_raw"] + + except: + import traceback + stack = traceback.format_exc() + r['_raw'] = "Failed to parse: " + str(stack) + "\n" + r['_raw'] + +except: + import traceback + stack = traceback.format_exc() + results = splunk.Intersplunk.generateErrorResults("Error : Traceback: " + str(stack)) + +splunk.Intersplunk.outputResults( results ) diff --git a/xmlutils/bin/xmlsplit.py b/xmlutils/bin/xmlsplit.py new file mode 100755 index 0000000..54d03de --- /dev/null +++ b/xmlutils/bin/xmlsplit.py @@ -0,0 +1,89 @@ +# Copyright (C) 2010 Splunk Inc. All Rights Reserved. Version 4.0 +import sys,splunk.Intersplunk +import re +import xml.sax +import xml.sax.saxutils as saxutils +from xml.sax.handler import ContentHandler +from xml.sax.handler import EntityResolver +from xml.sax.xmlreader import InputSource +import copy +import StringIO + + +class NullInputSource(InputSource): + def getByteStream(self): + return StringIO.StringIO("entity files not supported.") + +class NullEntityResolver(EntityResolver): + def resolveEntity(self,publicId,systemId): + return NullInputSource() + +class XmlHandler(ContentHandler): + def __init__(self, field): + self.field = field + + def reset(self , newResults): + self.current_output = '' + self.newResults = newResults + + def startElement(self, name, attrs): + if name == field: + self.current_output = '' + self.current_output += '<' + name + + if attrs.getLength() > 0: + for k in attrs.getNames(): + self.current_output += ' ' + k + '=' + saxutils.quoteattr(attrs.getValue(k)) + self.current_output += '>' + + def characters(self, content): + self.current_output += saxutils.escape( content ) + + def endElement(self, name): + self.current_output += '' + if name == field: + if re.match('^<' + field + '[ >]', self.current_output): + newRow = copy.deepcopy(r) + newRow['_raw'] = self.current_output + self.newResults.append(newRow) + self.current_output = '' + +try: + results,dummyresults,settings = splunk.Intersplunk.getOrganizedResults() + + keywords, argvals = splunk.Intersplunk.getKeywordsAndOptions() + + field = argvals.get("field", None) + if field is None: + raise Exception("Must supply name of field in field=fieldName") + + newResults = [] + + handler = XmlHandler(field) + + for r in results: + try: + if 'xml' in r: + xml_text = r['xml'] + else: + raw = r["_raw"] + xml_text = raw[ raw.index( '<' ) : raw.rindex( '>' )+1 ] + + handler.reset(newResults) + parser = xml.sax.make_parser() + parser.setContentHandler(handler) + parser.setEntityResolver(NullEntityResolver()) + parser.parse(StringIO.StringIO(xml_text)) + except: + import traceback + stack = traceback.format_exc() + r['_raw'] = "Failed to parse: " + str(stack) + r['_raw'] + newResults = [r] + +except: + import traceback + stack = traceback.format_exc() + newResults = splunk.Intersplunk.generateErrorResults("Error : Traceback: " + str(stack)) + +splunk.Intersplunk.outputResults( newResults ) + diff --git a/xmlutils/bin/xmlstripdeclaration.py b/xmlutils/bin/xmlstripdeclaration.py new file mode 100755 index 0000000..a2623d2 --- /dev/null +++ b/xmlutils/bin/xmlstripdeclaration.py @@ -0,0 +1,34 @@ +# Copyright (C) 2010 Splunk Inc. All Rights Reserved. Version 4.0 +import splunk.Intersplunk + + +try: + results,dummyresults,settings = splunk.Intersplunk.getOrganizedResults() + + for r in results: + try: + if 'xml' in r: + xml_text = r['xml'] + dest_field = 'xml' + else: + raw = r["_raw"] + dest_field = '_raw' + + xml_text = raw[ raw.index( '<' ) : raw.rindex( '>' )+1 ] + if xml_text.startswith('' )+1 ] + + r[dest_field] = xml_text + + except: + import traceback + stack = traceback.format_exc() + r['_raw'] = "Failed to parse: " + str(stack) + r['_raw'] + +except: + import traceback + stack = traceback.format_exc() + results = splunk.Intersplunk.generateErrorResults("Error : Traceback: " + str(stack)) + +splunk.Intersplunk.outputResults( results ) diff --git a/xmlutils/default/app.conf b/xmlutils/default/app.conf new file mode 100644 index 0000000..dbdb044 --- /dev/null +++ b/xmlutils/default/app.conf @@ -0,0 +1,18 @@ +[launcher] +version = 1.1 +author = vbumgarner +description = XML utilities + +[package] +id = xmlutils + +[install] +state = enabled +build = 2 + +[ui] +is_visible = false +is_manageable = false +label = xmlutils + + diff --git a/xmlutils/default/commands.conf b/xmlutils/default/commands.conf new file mode 100644 index 0000000..568a04a --- /dev/null +++ b/xmlutils/default/commands.conf @@ -0,0 +1,27 @@ +[xmlkvrecursive] +filename = xmlkvrecursive.py +retainsevents = true +overrides_timeorder = false +streaming = true + +[xmlsplit] +filename = xmlsplit.py +retainsevents = true +overrides_timeorder = false +run_in_preview = false +streaming = true + +[xmlprettyprint] +filename = xmlprettyprint.py +retainsevents = true +overrides_timeorder = false +run_in_preview = false +streaming = true + +[xmlstripdeclaration] +filename = xmlstripdeclaration.py +retainsevents = true +overrides_timeorder = false +run_in_preview = false +streaming = true + diff --git a/xmlutils/default/searchbnf.conf b/xmlutils/default/searchbnf.conf new file mode 100644 index 0000000..6b3eacd --- /dev/null +++ b/xmlutils/default/searchbnf.conf @@ -0,0 +1,66 @@ +[xmlkvrecursive-command] +syntax = xmlkvrecursive (flatten=)? +shortdesc = Builds fields recursively from xml. +description = Given an xml document in either _raw or a field called xml, all cells are extracted into named fields. flatten determines whether fields with the same name create new fields or multivalue fields. +default = xmlkvrecursive +example1 = ... | xmlkvrecursive +example2 = ... | xmlstripdeclaration | xmlkvrecursive flatten=true +commentcheat = Builds fields recursively from xml. +examplecheat = xmlkvrecursive (flatten=true) +category = formatting +maintainer = vbumgarner +usage = public +appears-in=4.1 +tags = xml kv +related = xpath xmlprettyprint xmlsplit xmlstripdeclaration + + +[xmlsplit-command] +syntax = xmlsplit field= +shortdesc = Splits an xml into separate events by node. +description = Given an xml document in either _raw or a field called xml, create an event for each node specified in field. +default = xmlsplit field="field1" +example1 = ... | xmlsplit field="event" +example2 = ... | xmlstripdeclaration | xmlsplit field="event" +commentcheat = Splits an xml into separate events by node. +examplecheat = xmlsplit field="event" +category = formatting +maintainer = vbumgarner +usage = public +appears-in=4.1 +tags = xml split +related = xpath xmlprettyprint xmlsplit xmlstripdeclaration + + +[xmlprettyprint-command] +syntax = xmlprettyprint +shortdesc = Pretty prints xml. +description = Given an xml document in either _raw or a field called xml, pretty print the xml and replace _raw. +default = xmlprettyprint +example1 = ... | xmlprettyprint +example2 = ... | xmlstripdeclaration | xmlprettyprint +commentcheat = Pretty prints xml. +examplecheat = xmlprettyprint +category = formatting +maintainer = vbumgarner +usage = public +appears-in=4.1 +tags = xml pretty +related = xpath xmlprettyprint xmlsplit xmlstripdeclaration + +[xmlstripdeclaration-command] +syntax = xmlstripdeclaration +shortdesc = Removes the xml declaration from the beginning of an xml document. +description = Given an xml document in either _raw or a field called xml, remove the xml declaration, as it may cause a parsing error. +default = xmlstripdeclaration +example1 = ... | xmlstripdeclaration +example2 = ... | xmlstripdeclaration | xmlprettyprint +commentcheat = Removes the xml declaration from the beginning of an xml document. +examplecheat = xmlstripdeclaration +category = formatting +maintainer = vbumgarner +usage = public +appears-in=4.1 +tags = xml +related = xpath xmlprettyprint xmlsplit xmlstripdeclaration + diff --git a/xmlutils/metadata/default.meta b/xmlutils/metadata/default.meta new file mode 100644 index 0000000..f9ecc07 --- /dev/null +++ b/xmlutils/metadata/default.meta @@ -0,0 +1,19 @@ + +# Application-level permissions + +[] +access = read : [ * ], write : [ admin, power ] + +[viewstates] +access = read : [ * ], write : [ * ] + +[lookups] +export = system + +[commands] +export = system +access = read : [ * ], write : [ admin ] + +[searchbnf] +export = system + diff --git a/xmlutils/test/entity.csv b/xmlutils/test/entity.csv new file mode 100644 index 0000000..cb8d569 --- /dev/null +++ b/xmlutils/test/entity.csv @@ -0,0 +1,10 @@ +junk header + +_raw,_time,yep +"]>foo &x;barla",12345,nope +"]>foo &x;bar",12345,not +"]>foo &x;bar",12345,today +"]>foo &x;bar",12345 +"]>foo &x;bar",12345 +"]>foo &x;bar",12345 +"foobarha",12345