Skip to content

Commit

Permalink
Initial commit of version 1.1
Browse files Browse the repository at this point in the history
  • Loading branch information
NullIsNot0 committed Dec 2, 2020
0 parents commit 4e50236
Show file tree
Hide file tree
Showing 10 changed files with 472 additions and 0 deletions.
Empty file added README.md
Empty file.
117 changes: 117 additions & 0 deletions xmlutils/bin/xmlkvrecursive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# Copyright (C) 2010 Splunk Inc. All Rights Reserved. Version 4.0
import sys,splunk.Intersplunk
import re
import urllib
import xml.sax
import xml.sax.saxutils as saxutils
from xml.sax.handler import ContentHandler
from xml.sax.handler import EntityResolver
from xml.sax.xmlreader import InputSource
import StringIO
import types

class NullInputSource(InputSource):
def getByteStream(self):
return StringIO.StringIO("entity files not supported.")

class NullEntityResolver(EntityResolver):
def resolveEntity(self,publicId,systemId):
return NullInputSource()

class XmlHandler(ContentHandler):
def __init__(self, flatten):
self.flatten = flatten

def reset(self):
self.key_prefix = []
self.keys_seen = []
self.new_fields = {}

def getNewFields(self):
return self.new_fields

def setValue( self, value, suffix='' ):
dest_key = '_'.join(self.key_prefix) + suffix

if( len( str(value).strip() ) > 0 ):
#handle multiple values
if dest_key in self.new_fields:
self.new_fields['multi values'] = 'yep'
#this is only the second value, so convert value to a list
if type(self.new_fields[dest_key]) is not types.ListType:
self.new_fields[dest_key] = [self.new_fields[dest_key]]
#append the value to the list
self.new_fields[dest_key].append(str(value))
else:
#insert the simple value
self.new_fields[dest_key] = str(value)

def startElement(self, name, attrs):
self.key_prefix.append(name)

#if flatten is set, then create a new prefix if this prefix has already been used
if flatten and '_'.join(self.key_prefix) in self.keys_seen:
self.key_prefix.pop()
count = 2
newName = name + '[' + str(count) + ']'
while '_'.join(self.key_prefix) + '_' + newName in self.keys_seen:
count += 1
newName = name + '[' + str(count) + ']'
self.key_prefix.append(newName)

self.keys_seen.append( '_'.join(self.key_prefix) )

if attrs.getLength() > 0:
for k in attrs.getNames():
self.setValue( attrs.getValue(k), "-" + k )

def characters(self, content):
if content is not None and content.strip() is not '':
self.setValue( content.strip() )

def endElement(self, name):
self.key_prefix.pop()


try:
results,dummyresults,settings = splunk.Intersplunk.getOrganizedResults()

keywords, argvals = splunk.Intersplunk.getKeywordsAndOptions()

flatten = argvals.get("flatten", "False")
if flatten.strip().lower() in ['true','1','yes']:
flatten = True
else:
flatten = False

handler = XmlHandler(flatten)

for r in results:
try:
if 'xml' in r:
xml_text = r['xml']
else:
raw = r["_raw"]

xml_text = raw[ raw.index( '<' ) : raw.rindex( '>' )+1 ]

handler.reset()

parser = xml.sax.make_parser()
parser.setContentHandler(handler)
parser.setEntityResolver(NullEntityResolver())
parser.parse(StringIO.StringIO(xml_text))

for k,v in handler.getNewFields().iteritems():
r[k] = v
except:
import traceback
stack = traceback.format_exc()
r['_raw'] = "Failed to parse: " + str(stack) + "\n" + r['_raw']

except:
import traceback
stack = traceback.format_exc()
results = splunk.Intersplunk.generateErrorResults("Error : Traceback: " + str(stack))

splunk.Intersplunk.outputResults( results )
92 changes: 92 additions & 0 deletions xmlutils/bin/xmlprettyprint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Copyright (C) 2010 Splunk Inc. All Rights Reserved. Version 4.0
import sys,splunk.Intersplunk
import xml.sax
import xml.sax.saxutils as saxutils
from xml.sax.handler import ContentHandler
from xml.sax.handler import EntityResolver
from xml.sax.xmlreader import InputSource
import StringIO

class NullInputSource(InputSource):
def getByteStream(self):
return StringIO.StringIO("entity files not supported.")

class NullEntityResolver(EntityResolver):
def resolveEntity(self,publicId,systemId):
return NullInputSource()

class XmlHandler(ContentHandler):
def __init__(self):
self.indent = 0

def reset(self , r):
self.current_output = ''
self.indent = 0
self.open_tag = ''

def getOutput(self):
return self.current_output

def startElement(self, name, attrs):
self.open_tag = name
self.current_output += '\n' + ' ' * self.indent
self.indent += 1
self.current_output += '<' + name

if attrs.getLength() > 0:
for k in attrs.getNames():
self.current_output += ' ' + k + '=' + saxutils.quoteattr(attrs.getValue(k))
self.current_output += '>'

def characters(self, content):
if len(content.strip()) > 0:
# self.current_output += ' ' * self.indent
self.current_output += saxutils.escape( content ) #+ '\n'

def endElement(self, name):
self.indent -= 1
if self.open_tag != name:
self.current_output += '\n' + ' ' * self.indent
self.current_output += '</' + name + '>'


try:
results,dummyresults,settings = splunk.Intersplunk.getOrganizedResults()

handler = XmlHandler()

for r in results:
try:
if 'xml' in r:
xml_text = r['xml']
dest_field = 'xml'
else:
raw = r["_raw"]
dest_field = '_raw'

xml_text = raw[ raw.index( '<' ) : raw.rindex( '>' )+1 ]

handler.reset(xml_text)
parser = xml.sax.make_parser()
parser.setContentHandler(handler)
parser.setEntityResolver(NullEntityResolver())
parser.parse(StringIO.StringIO(xml_text))

r[dest_field] = handler.getOutput()

if 'xml' in r:
xml_text = r['xml']
else:
raw = r["_raw"]

except:
import traceback
stack = traceback.format_exc()
r['_raw'] = "Failed to parse: " + str(stack) + "\n" + r['_raw']

except:
import traceback
stack = traceback.format_exc()
results = splunk.Intersplunk.generateErrorResults("Error : Traceback: " + str(stack))

splunk.Intersplunk.outputResults( results )
89 changes: 89 additions & 0 deletions xmlutils/bin/xmlsplit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# Copyright (C) 2010 Splunk Inc. All Rights Reserved. Version 4.0
import sys,splunk.Intersplunk
import re
import xml.sax
import xml.sax.saxutils as saxutils
from xml.sax.handler import ContentHandler
from xml.sax.handler import EntityResolver
from xml.sax.xmlreader import InputSource
import copy
import StringIO


class NullInputSource(InputSource):
def getByteStream(self):
return StringIO.StringIO("entity files not supported.")

class NullEntityResolver(EntityResolver):
def resolveEntity(self,publicId,systemId):
return NullInputSource()

class XmlHandler(ContentHandler):
def __init__(self, field):
self.field = field

def reset(self , newResults):
self.current_output = ''
self.newResults = newResults

def startElement(self, name, attrs):
if name == field:
self.current_output = ''
self.current_output += '<' + name

if attrs.getLength() > 0:
for k in attrs.getNames():
self.current_output += ' ' + k + '=' + saxutils.quoteattr(attrs.getValue(k))
self.current_output += '>'

def characters(self, content):
self.current_output += saxutils.escape( content )

def endElement(self, name):
self.current_output += '</' + name + '>'
if name == field:
if re.match('^<' + field + '[ >]', self.current_output):
newRow = copy.deepcopy(r)
newRow['_raw'] = self.current_output
self.newResults.append(newRow)
self.current_output = ''

try:
results,dummyresults,settings = splunk.Intersplunk.getOrganizedResults()

keywords, argvals = splunk.Intersplunk.getKeywordsAndOptions()

field = argvals.get("field", None)
if field is None:
raise Exception("Must supply name of field in field=fieldName")

newResults = []

handler = XmlHandler(field)

for r in results:
try:
if 'xml' in r:
xml_text = r['xml']
else:
raw = r["_raw"]
xml_text = raw[ raw.index( '<' ) : raw.rindex( '>' )+1 ]

handler.reset(newResults)
parser = xml.sax.make_parser()
parser.setContentHandler(handler)
parser.setEntityResolver(NullEntityResolver())
parser.parse(StringIO.StringIO(xml_text))
except:
import traceback
stack = traceback.format_exc()
r['_raw'] = "Failed to parse: " + str(stack) + r['_raw']
newResults = [r]

except:
import traceback
stack = traceback.format_exc()
newResults = splunk.Intersplunk.generateErrorResults("Error : Traceback: " + str(stack))

splunk.Intersplunk.outputResults( newResults )

34 changes: 34 additions & 0 deletions xmlutils/bin/xmlstripdeclaration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Copyright (C) 2010 Splunk Inc. All Rights Reserved. Version 4.0
import splunk.Intersplunk


try:
results,dummyresults,settings = splunk.Intersplunk.getOrganizedResults()

for r in results:
try:
if 'xml' in r:
xml_text = r['xml']
dest_field = 'xml'
else:
raw = r["_raw"]
dest_field = '_raw'

xml_text = raw[ raw.index( '<' ) : raw.rindex( '>' )+1 ]
if xml_text.startswith('<?'):
#remove the xml declaration. I know, I know, but I ran into a case where charset was wrong, and the parser explodes.
xml_text = xml_text[ raw.index( '<' , 5 ) : raw.rindex( '>' )+1 ]

r[dest_field] = xml_text

except:
import traceback
stack = traceback.format_exc()
r['_raw'] = "Failed to parse: " + str(stack) + r['_raw']

except:
import traceback
stack = traceback.format_exc()
results = splunk.Intersplunk.generateErrorResults("Error : Traceback: " + str(stack))

splunk.Intersplunk.outputResults( results )
18 changes: 18 additions & 0 deletions xmlutils/default/app.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[launcher]
version = 1.1
author = vbumgarner
description = XML utilities

[package]
id = xmlutils

[install]
state = enabled
build = 2

[ui]
is_visible = false
is_manageable = false
label = xmlutils


27 changes: 27 additions & 0 deletions xmlutils/default/commands.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
[xmlkvrecursive]
filename = xmlkvrecursive.py
retainsevents = true
overrides_timeorder = false
streaming = true

[xmlsplit]
filename = xmlsplit.py
retainsevents = true
overrides_timeorder = false
run_in_preview = false
streaming = true

[xmlprettyprint]
filename = xmlprettyprint.py
retainsevents = true
overrides_timeorder = false
run_in_preview = false
streaming = true

[xmlstripdeclaration]
filename = xmlstripdeclaration.py
retainsevents = true
overrides_timeorder = false
run_in_preview = false
streaming = true

Loading

0 comments on commit 4e50236

Please sign in to comment.