-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 4e50236
Showing
10 changed files
with
472 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
# Copyright (C) 2010 Splunk Inc. All Rights Reserved. Version 4.0 | ||
import sys,splunk.Intersplunk | ||
import re | ||
import urllib | ||
import xml.sax | ||
import xml.sax.saxutils as saxutils | ||
from xml.sax.handler import ContentHandler | ||
from xml.sax.handler import EntityResolver | ||
from xml.sax.xmlreader import InputSource | ||
import StringIO | ||
import types | ||
|
||
class NullInputSource(InputSource): | ||
def getByteStream(self): | ||
return StringIO.StringIO("entity files not supported.") | ||
|
||
class NullEntityResolver(EntityResolver): | ||
def resolveEntity(self,publicId,systemId): | ||
return NullInputSource() | ||
|
||
class XmlHandler(ContentHandler): | ||
def __init__(self, flatten): | ||
self.flatten = flatten | ||
|
||
def reset(self): | ||
self.key_prefix = [] | ||
self.keys_seen = [] | ||
self.new_fields = {} | ||
|
||
def getNewFields(self): | ||
return self.new_fields | ||
|
||
def setValue( self, value, suffix='' ): | ||
dest_key = '_'.join(self.key_prefix) + suffix | ||
|
||
if( len( str(value).strip() ) > 0 ): | ||
#handle multiple values | ||
if dest_key in self.new_fields: | ||
self.new_fields['multi values'] = 'yep' | ||
#this is only the second value, so convert value to a list | ||
if type(self.new_fields[dest_key]) is not types.ListType: | ||
self.new_fields[dest_key] = [self.new_fields[dest_key]] | ||
#append the value to the list | ||
self.new_fields[dest_key].append(str(value)) | ||
else: | ||
#insert the simple value | ||
self.new_fields[dest_key] = str(value) | ||
|
||
def startElement(self, name, attrs): | ||
self.key_prefix.append(name) | ||
|
||
#if flatten is set, then create a new prefix if this prefix has already been used | ||
if flatten and '_'.join(self.key_prefix) in self.keys_seen: | ||
self.key_prefix.pop() | ||
count = 2 | ||
newName = name + '[' + str(count) + ']' | ||
while '_'.join(self.key_prefix) + '_' + newName in self.keys_seen: | ||
count += 1 | ||
newName = name + '[' + str(count) + ']' | ||
self.key_prefix.append(newName) | ||
|
||
self.keys_seen.append( '_'.join(self.key_prefix) ) | ||
|
||
if attrs.getLength() > 0: | ||
for k in attrs.getNames(): | ||
self.setValue( attrs.getValue(k), "-" + k ) | ||
|
||
def characters(self, content): | ||
if content is not None and content.strip() is not '': | ||
self.setValue( content.strip() ) | ||
|
||
def endElement(self, name): | ||
self.key_prefix.pop() | ||
|
||
|
||
try: | ||
results,dummyresults,settings = splunk.Intersplunk.getOrganizedResults() | ||
|
||
keywords, argvals = splunk.Intersplunk.getKeywordsAndOptions() | ||
|
||
flatten = argvals.get("flatten", "False") | ||
if flatten.strip().lower() in ['true','1','yes']: | ||
flatten = True | ||
else: | ||
flatten = False | ||
|
||
handler = XmlHandler(flatten) | ||
|
||
for r in results: | ||
try: | ||
if 'xml' in r: | ||
xml_text = r['xml'] | ||
else: | ||
raw = r["_raw"] | ||
|
||
xml_text = raw[ raw.index( '<' ) : raw.rindex( '>' )+1 ] | ||
|
||
handler.reset() | ||
|
||
parser = xml.sax.make_parser() | ||
parser.setContentHandler(handler) | ||
parser.setEntityResolver(NullEntityResolver()) | ||
parser.parse(StringIO.StringIO(xml_text)) | ||
|
||
for k,v in handler.getNewFields().iteritems(): | ||
r[k] = v | ||
except: | ||
import traceback | ||
stack = traceback.format_exc() | ||
r['_raw'] = "Failed to parse: " + str(stack) + "\n" + r['_raw'] | ||
|
||
except: | ||
import traceback | ||
stack = traceback.format_exc() | ||
results = splunk.Intersplunk.generateErrorResults("Error : Traceback: " + str(stack)) | ||
|
||
splunk.Intersplunk.outputResults( results ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
# Copyright (C) 2010 Splunk Inc. All Rights Reserved. Version 4.0 | ||
import sys,splunk.Intersplunk | ||
import xml.sax | ||
import xml.sax.saxutils as saxutils | ||
from xml.sax.handler import ContentHandler | ||
from xml.sax.handler import EntityResolver | ||
from xml.sax.xmlreader import InputSource | ||
import StringIO | ||
|
||
class NullInputSource(InputSource): | ||
def getByteStream(self): | ||
return StringIO.StringIO("entity files not supported.") | ||
|
||
class NullEntityResolver(EntityResolver): | ||
def resolveEntity(self,publicId,systemId): | ||
return NullInputSource() | ||
|
||
class XmlHandler(ContentHandler): | ||
def __init__(self): | ||
self.indent = 0 | ||
|
||
def reset(self , r): | ||
self.current_output = '' | ||
self.indent = 0 | ||
self.open_tag = '' | ||
|
||
def getOutput(self): | ||
return self.current_output | ||
|
||
def startElement(self, name, attrs): | ||
self.open_tag = name | ||
self.current_output += '\n' + ' ' * self.indent | ||
self.indent += 1 | ||
self.current_output += '<' + name | ||
|
||
if attrs.getLength() > 0: | ||
for k in attrs.getNames(): | ||
self.current_output += ' ' + k + '=' + saxutils.quoteattr(attrs.getValue(k)) | ||
self.current_output += '>' | ||
|
||
def characters(self, content): | ||
if len(content.strip()) > 0: | ||
# self.current_output += ' ' * self.indent | ||
self.current_output += saxutils.escape( content ) #+ '\n' | ||
|
||
def endElement(self, name): | ||
self.indent -= 1 | ||
if self.open_tag != name: | ||
self.current_output += '\n' + ' ' * self.indent | ||
self.current_output += '</' + name + '>' | ||
|
||
|
||
try: | ||
results,dummyresults,settings = splunk.Intersplunk.getOrganizedResults() | ||
|
||
handler = XmlHandler() | ||
|
||
for r in results: | ||
try: | ||
if 'xml' in r: | ||
xml_text = r['xml'] | ||
dest_field = 'xml' | ||
else: | ||
raw = r["_raw"] | ||
dest_field = '_raw' | ||
|
||
xml_text = raw[ raw.index( '<' ) : raw.rindex( '>' )+1 ] | ||
|
||
handler.reset(xml_text) | ||
parser = xml.sax.make_parser() | ||
parser.setContentHandler(handler) | ||
parser.setEntityResolver(NullEntityResolver()) | ||
parser.parse(StringIO.StringIO(xml_text)) | ||
|
||
r[dest_field] = handler.getOutput() | ||
|
||
if 'xml' in r: | ||
xml_text = r['xml'] | ||
else: | ||
raw = r["_raw"] | ||
|
||
except: | ||
import traceback | ||
stack = traceback.format_exc() | ||
r['_raw'] = "Failed to parse: " + str(stack) + "\n" + r['_raw'] | ||
|
||
except: | ||
import traceback | ||
stack = traceback.format_exc() | ||
results = splunk.Intersplunk.generateErrorResults("Error : Traceback: " + str(stack)) | ||
|
||
splunk.Intersplunk.outputResults( results ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
# Copyright (C) 2010 Splunk Inc. All Rights Reserved. Version 4.0 | ||
import sys,splunk.Intersplunk | ||
import re | ||
import xml.sax | ||
import xml.sax.saxutils as saxutils | ||
from xml.sax.handler import ContentHandler | ||
from xml.sax.handler import EntityResolver | ||
from xml.sax.xmlreader import InputSource | ||
import copy | ||
import StringIO | ||
|
||
|
||
class NullInputSource(InputSource): | ||
def getByteStream(self): | ||
return StringIO.StringIO("entity files not supported.") | ||
|
||
class NullEntityResolver(EntityResolver): | ||
def resolveEntity(self,publicId,systemId): | ||
return NullInputSource() | ||
|
||
class XmlHandler(ContentHandler): | ||
def __init__(self, field): | ||
self.field = field | ||
|
||
def reset(self , newResults): | ||
self.current_output = '' | ||
self.newResults = newResults | ||
|
||
def startElement(self, name, attrs): | ||
if name == field: | ||
self.current_output = '' | ||
self.current_output += '<' + name | ||
|
||
if attrs.getLength() > 0: | ||
for k in attrs.getNames(): | ||
self.current_output += ' ' + k + '=' + saxutils.quoteattr(attrs.getValue(k)) | ||
self.current_output += '>' | ||
|
||
def characters(self, content): | ||
self.current_output += saxutils.escape( content ) | ||
|
||
def endElement(self, name): | ||
self.current_output += '</' + name + '>' | ||
if name == field: | ||
if re.match('^<' + field + '[ >]', self.current_output): | ||
newRow = copy.deepcopy(r) | ||
newRow['_raw'] = self.current_output | ||
self.newResults.append(newRow) | ||
self.current_output = '' | ||
|
||
try: | ||
results,dummyresults,settings = splunk.Intersplunk.getOrganizedResults() | ||
|
||
keywords, argvals = splunk.Intersplunk.getKeywordsAndOptions() | ||
|
||
field = argvals.get("field", None) | ||
if field is None: | ||
raise Exception("Must supply name of field in field=fieldName") | ||
|
||
newResults = [] | ||
|
||
handler = XmlHandler(field) | ||
|
||
for r in results: | ||
try: | ||
if 'xml' in r: | ||
xml_text = r['xml'] | ||
else: | ||
raw = r["_raw"] | ||
xml_text = raw[ raw.index( '<' ) : raw.rindex( '>' )+1 ] | ||
|
||
handler.reset(newResults) | ||
parser = xml.sax.make_parser() | ||
parser.setContentHandler(handler) | ||
parser.setEntityResolver(NullEntityResolver()) | ||
parser.parse(StringIO.StringIO(xml_text)) | ||
except: | ||
import traceback | ||
stack = traceback.format_exc() | ||
r['_raw'] = "Failed to parse: " + str(stack) + r['_raw'] | ||
newResults = [r] | ||
|
||
except: | ||
import traceback | ||
stack = traceback.format_exc() | ||
newResults = splunk.Intersplunk.generateErrorResults("Error : Traceback: " + str(stack)) | ||
|
||
splunk.Intersplunk.outputResults( newResults ) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# Copyright (C) 2010 Splunk Inc. All Rights Reserved. Version 4.0 | ||
import splunk.Intersplunk | ||
|
||
|
||
try: | ||
results,dummyresults,settings = splunk.Intersplunk.getOrganizedResults() | ||
|
||
for r in results: | ||
try: | ||
if 'xml' in r: | ||
xml_text = r['xml'] | ||
dest_field = 'xml' | ||
else: | ||
raw = r["_raw"] | ||
dest_field = '_raw' | ||
|
||
xml_text = raw[ raw.index( '<' ) : raw.rindex( '>' )+1 ] | ||
if xml_text.startswith('<?'): | ||
#remove the xml declaration. I know, I know, but I ran into a case where charset was wrong, and the parser explodes. | ||
xml_text = xml_text[ raw.index( '<' , 5 ) : raw.rindex( '>' )+1 ] | ||
|
||
r[dest_field] = xml_text | ||
|
||
except: | ||
import traceback | ||
stack = traceback.format_exc() | ||
r['_raw'] = "Failed to parse: " + str(stack) + r['_raw'] | ||
|
||
except: | ||
import traceback | ||
stack = traceback.format_exc() | ||
results = splunk.Intersplunk.generateErrorResults("Error : Traceback: " + str(stack)) | ||
|
||
splunk.Intersplunk.outputResults( results ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
[launcher] | ||
version = 1.1 | ||
author = vbumgarner | ||
description = XML utilities | ||
|
||
[package] | ||
id = xmlutils | ||
|
||
[install] | ||
state = enabled | ||
build = 2 | ||
|
||
[ui] | ||
is_visible = false | ||
is_manageable = false | ||
label = xmlutils | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
[xmlkvrecursive] | ||
filename = xmlkvrecursive.py | ||
retainsevents = true | ||
overrides_timeorder = false | ||
streaming = true | ||
|
||
[xmlsplit] | ||
filename = xmlsplit.py | ||
retainsevents = true | ||
overrides_timeorder = false | ||
run_in_preview = false | ||
streaming = true | ||
|
||
[xmlprettyprint] | ||
filename = xmlprettyprint.py | ||
retainsevents = true | ||
overrides_timeorder = false | ||
run_in_preview = false | ||
streaming = true | ||
|
||
[xmlstripdeclaration] | ||
filename = xmlstripdeclaration.py | ||
retainsevents = true | ||
overrides_timeorder = false | ||
run_in_preview = false | ||
streaming = true | ||
|
Oops, something went wrong.