-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy patho365AuditParser.py
220 lines (176 loc) · 9.83 KB
/
o365AuditParser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
#! /usr/bin/env python3
# pylint: disable=E1101
'''o365 Audit Log Extractor
Audit logs exported from the Office365 Protection Center leave much to be desired. This script
This script accepts comma separated value files (.csv) or a directory of CSV files as input.
Author: Ian Day
Initial Release: December 8 2019 Version 1.0
'''
import argparse
import csv
import datetime
import json
import logging
import pathlib
import sys
from collections import defaultdict
VERSION = '1.0'
NAME = 'o365 Audit Log Extractor'
if __name__=='__main__':
# parse command line arguments
parser = argparse.ArgumentParser(description='o365 Audit Log Extractor')
parser.add_argument(help = 'File/Directory to process', type=str, dest='input' )
parser.add_argument('-o', '--output', help='Output directory, defaults to current directory', type=pathlib.Path, default=pathlib.Path.cwd(), dest='output')
parser.add_argument('-p', '--prefix', help='Prefix for output files, defaults to o365AuditLog', type=str, default='o365AuditLog', dest='prefix')
parser.add_argument('-f', '--format', help='Output file format, defaults to csv', type=str, choices=['csv', 'json' ], default='csv', dest='format')
outputOptions = parser.add_mutually_exclusive_group(required=True)
outputOptions.add_argument('-w', '--workload', help='Generate individual output files per workload', action='store_true', dest='workload')
outputOptions.add_argument('-c', '--combined', help='Generate one output file', action='store_true', dest='combined')
parser.add_argument('-v', '--verbose', help='Enable debug logging', action='store_true', dest='verbose')
parser.add_argument('--version', action='version',version='{0} {1}'.format(NAME, VERSION))
args = parser.parse_args()
# configure logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
# log to screen
ch = logging.StreamHandler()
# log to file
fh = logging.FileHandler('{0}_{1}.log'.format( NAME.replace(' ', '_'), datetime.datetime.now().strftime('%Y%m%d-%H%M%S')))
if args.verbose:
fh.setLevel(logging.DEBUG)
ch.setLevel(logging.DEBUG)
else:
fh.setLevel(logging.INFO)
ch.setLevel(logging.INFO)
# format log output
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
fh.setFormatter(formatter)
ch.setFormatter(formatter)
logger.addHandler(fh)
logger.addHandler(ch)
logger.info('{0} v{1} Started'.format(NAME, VERSION))
# determine input type
logger.debug('Checking input: {0}'.format(args.input))
# check input
try:
input_object = pathlib.Path(args.input)
except:
logger.error('Invalid input path specified, terminating script')
sys.exit(1)
# check output, attempt to create
try:
args.output.mkdir(parents=True,exist_ok=True)
except :
logger.error('Invalid output path or permissions error, terminating script')
logger.error(sys.exc_info()[0])
sys.exit(1)
if not input_object.is_file() and not input_object.is_dir():
logger.error('Invalid input path specified, terminating script')
sys.exit(1)
else:
# determiine list of files to process, expanding file path before adding to list
if input_object.is_file():
logger.debug('Input detected as file')
filesToProcess=[input_object.resolve()]
else:
logger.debug('Input detected as directory')
filesToProcess=list(map(lambda x: x.resolve(), input_object.iterdir()))
#dicts to hold record field names and parsed results
fieldNames = defaultdict(set)
results = defaultdict(list)
# process files
for entry in filesToProcess:
try:
logger.info('Processing file: {}'.format(entry))
with open(entry, 'r', encoding='utf-8') as inFile:
counter = 0
# loop through input file
dictReader = csv.DictReader(inFile)
for line in dictReader:
try:
# transform auditData to dictionary
record = json.loads(line['AuditData'])
# remove random linebreaks in field values
# its a feature, not a bug
for field in record:
if isinstance(record[field], str):
record[field] = record[field].strip()
# get list of fields in auditData
recordFields = list(record.keys())
# events of the same Workload can have different fields
# create a union to ensure fields not seen yet are included in final output
fieldNames[record['Workload']] = set().union(recordFields, fieldNames[record['Workload']])
# add record to results and update record count
results[record['Workload']].append(record)
counter += 1
except Exception as e:
logger.error('unable to parse line {0} in file {1}'.format(counter, entry))
logger.error('error message: {}'.format(e.message))
# log record count per workload
logger.info('Processing complete, {} records found'.format(counter))
except Exception as e:
logger.error('error processing file: {}'.format(entry))
logger.error('error message: {}'.format(e.message))
# sort and output records
logger.info('Beginning export')
# export one file with all workloads
if args.combined:
if args.format == 'csv':
# combine field names into one list
combinedFieldNames = set()
for workload in fieldNames:
combinedFieldNames = set().union(combinedFieldNames, fieldNames[workload])
# generate output path and open file
fileName = '{}-combinedRecords.csv'.format(args.prefix)
output_obj = pathlib.Path(args.output.resolve(), fileName)
logger.debug('Path: {}'.format(output_obj))
with open(output_obj,'w', encoding='utf-8') as outFile:
# create dictionary writer and write headers
dictWriter=csv.DictWriter(outFile, fieldnames=combinedFieldNames, lineterminator='\n')
dictWriter.writeheader()
for workload in results:
logger.info('Sorting and exporting {0} {1} records to CSV file'.format(len(results[workload]), workload))
# sort records based on timestamp in CreationTime field
results[workload] = sorted(results[workload], key=lambda t: t['CreationTime'])
# write results to file
dictWriter.writerows(results[workload])
if args.format == 'json':
logger.info('Exporting records to JSON file')
#combine workloads into one list for export
allResults = []
for workload in results:
for entry in results[workload]:
allResults.append(entry)
# generate output path and open file
fileName = '{}-combinedRecords.json'.format(args.prefix)
output_obj = pathlib.Path(args.output.resolve(), fileName)
logger.debug('Path: {}'.format(output_obj))
with open(output_obj,'w', encoding='utf-8') as outFile:
json.dump(allResults, outFile)
# export one file per workload
if args.workload:
if args.format == 'csv':
for workload in results:
logger.info('Sorting and exporting {0} {1} records to CSV file'.format(len(results[workload]), workload))
# sort records based on timestamp in CreationTime field
results[workload] = sorted(results[workload], key=lambda t: t['CreationTime'])
# write to file
fileName = '{0}-{1}.csv'.format(args.prefix, workload)
output_obj = pathlib.Path(args.output.resolve(), fileName)
with open(output_obj,'w', encoding='utf-8') as outFile:
logger.debug('Path: {}'.format(output_obj))
dictWriter=csv.DictWriter(outFile, fieldnames=fieldNames[workload], lineterminator='\n')
dictWriter.writeheader()
dictWriter.writerows(results[workload])
if args.format == 'json':
for workload in results:
logger.info('Sorting and exporting {0} {1} records to JSON file'.format(len(results[workload]), workload))
# sort records based on timestamp in CreationTime field
results[workload] = sorted(results[workload], key=lambda t: t['CreationTime'])
# write to file
fileName = '{0}-{1}.json'.format(args.prefix, workload)
output_obj = pathlib.Path(args.output.resolve(), fileName)
with open(output_obj,'w', encoding='utf-8') as outFile:
logger.debug('Path: {}'.format(output_obj))
json.dump(results[workload], outFile)
logger.info('Export complete, terminating')