-
Notifications
You must be signed in to change notification settings - Fork 2
/
mrc_prune.py
102 lines (84 loc) · 3.23 KB
/
mrc_prune.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env python2.7
# -*- coding: utf8 -*-
from __future__ import absolute_import, print_function, unicode_literals
"""Script for generating MRCs from stack distances"""
import matplotlib.pyplot as plt
import numpy as np
import pprint, csv
import sys
from datetime import datetime
from cs_prune import CounterStack_prune
# In bytes
_BLOCK_SIZE = 4096
trace = sys.argv[1] #'traces/wdev/wdev_clean.csv' # also available: 'traces/web/web_clean.csv', 'traces/normal_137979.txt'
trace_name = sys.argv[2]
starttime = datetime.now()
def generate_mrc_prune(trace_filename):
# Set the downsample rate
downsample_rate = int(sys.argv[3])
pruning_delta = float(sys.argv[4])
# Create the counterstack and read in the file, feeding it the symbols
counterstack = CounterStack_prune(downsample_rate)
steps = 1
with open(trace_filename, 'r') as f:
for line in f:
symbol = line.rstrip()
counterstack.process_sequence_symbol(symbol,pruning_delta)
if steps % downsample_rate == 0:
print(steps)
steps+=1
print("total time taken to generate curve")
totaltime = datetime.now() - starttime
print (totaltime)
totalsize = counterstack.total_size()
print("total size in bytes")
print(totalsize)
# Get the histogram of stack distances
bins, values = counterstack.get_stack_distance_counts()
# Print all the counts
#pp = pprint.PrettyPrinter(indent=4)
#pp.pprint(zip(bins, values))
# Print the total cumulative sum of all counts
#print(np.sum(values))
# Make bins in terms of GB
bins = [bin*_BLOCK_SIZE/float(1000000000) for bin in bins]
# Carry any fully negative buckets over to the next non-negative bucket (to make the resulting graph monotonically increasing)
neg = 0
total = 0
cum_vals = []
for i in xrange(len(bins)):
val = values[i]
if neg < 0:
val += neg
if val < 0:
neg = val
else:
neg = 0
total += val
cum_vals.append(1-total/float(steps))
# vals = np.cumsum(stack_dist_counts.values())/float(steps)
# vals = np.cumsum(values)
plt.plot(bins, cum_vals)
# plt.hist(vals, bins=bins, histtype='step', cumulative=True)# , bins=stack_dist_counts.keys()) # plt.hist#, histtype='step')#, weights=np.zeros_like(stack_dist_counts.values()) + 1./(np.sum(np.array(stack_dist_counts.values()))))
plt.title(trace_name + "_MRC-Pruned-delta__ " + str(pruning_delta) +"_sample_" + str(downsample_rate))
plt.xlabel("Cache Size (GB)")
plt.ylabel("Miss Ratio")
plt.ylim(0,1)
plt.yticks([0.00, 0.25, 0.50, 0.75, 1.00])
plt.savefig(trace_name + '_MRC_delta_' + str(pruning_delta) + '_sample_' + str(downsample_rate)+'.png')
#plt.show()
csv_title = trace_name+'__delta_' + str(pruning_delta) + '_sample_' + str(downsample_rate)
# Write results to a csv file
with open(trace_name + '_mrc_result_delta_'+ str(pruning_delta) + '_sample_' + str(downsample_rate)+'.csv', 'wb') as csvfile:
# fieldnames = ['bucket', 'cumulative_cache_size']
writer = csv.writer(csvfile)
# writer.writeheader()
for key, value in dict(zip(bins, cum_vals)).items():
writer.writerow([key, value])
with open('performance.csv', 'a') as csvfile:
# fieldnames = ['bucket', 'cumulative_cache_size']
writer = csv.writer(csvfile)
# writer.writeheader()
writer.writerow([csv_title, totaltime, totalsize])
if __name__ == '__main__':
generate_mrc_prune(trace)