1
- import pickle
2
- import json
3
1
import itertools
2
+ import json
3
+ import pickle
4
4
import re
5
5
import sys
6
- from collections import namedtuple
6
+ from dataclasses import dataclass
7
+ from typing import List , Match , Optional , Pattern , Union
7
8
8
- from .._types import Logs , Benchmark , Block
9
+ from .._types import Benchmark , Block , Logs
9
10
10
- _RE_REGION_INFO = re .compile (r'EVENT:.*ProcessDag.*"name": "(?P<name>[^"]*)"' )
11
+ _REGION_DELIMITER = 'INFO: ********** Opt Scheduling **********'
12
+ _RE_REGION_DELIMITER = re .compile (re .escape (_REGION_DELIMITER ))
11
13
12
14
13
15
def import_main (parsefn , * , description ):
@@ -24,18 +26,39 @@ def import_main(parsefn, *, description):
24
26
pickle .dump (result , f )
25
27
26
28
27
- def parse_multi_bench_file (logtext , * , benchstart , filename = None ):
29
+ def parse_multi_bench_file (logtext : str , * , benchstart : Union [Pattern , str ], filename : Optional [Union [Pattern , str ]] = None ):
30
+ if filename is not None :
31
+ filename = re .compile (filename )
32
+ benchstart = re .compile (benchstart )
33
+
34
+ def parse_bench (benchm : Match , nextm : Union [Match , _DummyEnd ], is_first : bool = False ):
35
+ # The RE can specify any extra properties.
36
+ info = benchm .groupdict ()
37
+ # If this is the first benchmark in the file, we want to start from the
38
+ # start of the file so that we don't lose any information.
39
+ start = 0 if is_first else benchm .start ()
40
+ end = nextm .start ()
41
+ return _parse_benchmark (info , logtext ,
42
+ start , end ,
43
+ filenamere = filename )
44
+
45
+ bench_matches = list (benchstart .finditer (logtext ))
28
46
benchmarks = []
29
- for benchm , nextm in _splititer (benchstart , logtext ):
30
- bench = _parse_benchmark (benchm .groupdict (), logtext ,
31
- benchm .end (), nextm .start (),
32
- filenamere = filename )
33
- benchmarks .append (bench )
47
+
48
+ is_first : bool = True
49
+ for benchm , nextm in zip (
50
+ bench_matches ,
51
+ [* bench_matches [1 :], _DummyEnd (len (logtext ))]
52
+ ):
53
+ benchmarks .append (parse_bench (benchm , nextm , is_first ))
54
+ is_first = False
34
55
35
56
return Logs (benchmarks )
36
57
37
58
38
- def parse_single_bench_file (logtext , * , benchname , filename = None ):
59
+ def parse_single_bench_file (logtext , * , benchname , filename : Optional [Union [Pattern , str ]] = None ):
60
+ if filename is not None :
61
+ filename = re .compile (filename )
39
62
return Logs ([
40
63
_parse_benchmark (
41
64
{'name' : benchname },
@@ -45,21 +68,10 @@ def parse_single_bench_file(logtext, *, benchname, filename=None):
45
68
])
46
69
47
70
48
- _FileInfo = namedtuple ('_FileInfo' , ('filename' , 'from_pos' ))
49
-
50
-
51
- def _each_cons (iterable , n ):
52
- '''
53
- Iterates over each consecutive n items of the iterable.
54
-
55
- _each_cons((1, 2, 3, 4), 2) # (1, 2), (2, 3), (3, 4)
56
- '''
57
- iters = [None ] * n
58
- iters [0 ] = iter (iterable )
59
- for i in range (1 , n ):
60
- iters [i - 1 ], iters [i ] = itertools .tee (iters [i - 1 ])
61
- next (iters [i ], None )
62
- return zip (* iters )
71
+ @dataclass
72
+ class _FileInfo :
73
+ filename : Optional [str ]
74
+ from_pos : int
63
75
64
76
65
77
class _DummyEnd :
@@ -73,65 +85,68 @@ def end(self):
73
85
return self ._end
74
86
75
87
76
- def _splititer (regex , text , pos = 0 , endpos = None ):
77
- '''
78
- 'Splits' the string by the regular expression, using an iterable.
79
- Returns both where the regex matches and where it matched next (or the end).
80
- '''
81
- if endpos is None :
82
- endpos = len (text ) - 1
88
+ def _filename_info (filenamere : Optional [Pattern ], logtext : str , start : int , end : int ) -> List [_FileInfo ]:
89
+ if filenamere is None :
90
+ filenamere = re .compile (r'.^' ) # RE that doesn't match anything
91
+ files = []
83
92
84
- return _each_cons (
85
- itertools .chain (regex .finditer (text , pos , endpos ),
86
- (_DummyEnd (endpos + 1 ),)),
87
- 2
88
- )
93
+ for filem in filenamere .finditer (logtext , start , end ):
94
+ filename = filem .group (1 )
95
+ filestart = filem .end ()
96
+ files .append (_FileInfo (filename = filename , from_pos = filestart ))
89
97
98
+ return files
90
99
91
- def _parse_benchmark (info , logtext : str , start , end , * , filenamere ):
92
- NAME = info ['name' ]
100
+
101
+ def _parse_benchmark (info : dict , logtext : str , start : int , end : int , * , filenamere : Optional [Pattern ]):
102
+ BENCHNAME = info ['name' ]
93
103
94
104
blocks = []
95
105
96
- if filenamere and filenamere .search (logtext , start , end ):
97
- files = [
98
- * (_FileInfo (filename = r .group (1 ), from_pos = r .end ())
99
- for r in filenamere .finditer (logtext , start , end )),
100
- _FileInfo (filename = None , from_pos = len (logtext )),
101
- ][::- 1 ]
102
- else :
103
- files = [
104
- _FileInfo (filename = None , from_pos = start ),
105
- _FileInfo (filename = None , from_pos = len (logtext )),
106
- ][::- 1 ]
106
+ files : List [_FileInfo ] = _filename_info (filenamere , logtext , start , end )
107
+ if not files :
108
+ # We have an unknown file starting from the very beginning
109
+ files = [_FileInfo (filename = None , from_pos = start )]
110
+
111
+ # Allow us to peek ahead by giving a dummy "file" at the end which will never match a block
112
+ files .append (_FileInfo (filename = None , from_pos = end ))
113
+ assert len (files ) >= 2
114
+ file_pos = 0
115
+
116
+ block_matches1 , block_matches2 = itertools .tee (_RE_REGION_DELIMITER .finditer (logtext , start , end ))
117
+ next (block_matches2 ) # Drop first
118
+ block_matches2 = itertools .chain (block_matches2 , (_DummyEnd (end ),))
107
119
108
120
blocks = []
109
121
110
- for regionm , nextm in _splititer (_RE_REGION_INFO , logtext , start , end ):
111
- assert regionm .end () > files [- 1 ].from_pos
112
- if regionm .end () > files [- 2 ].from_pos :
113
- files .pop ()
122
+ is_first = True
123
+ for regionm , nextm in zip (block_matches1 , block_matches2 ):
124
+ region_start = regionm .end ()
125
+ if region_start > files [file_pos + 1 ].from_pos :
126
+ file_pos += 1
127
+
128
+ assert region_start > files [file_pos ].from_pos
114
129
115
- try :
116
- filename = files [- 1 ].filename
117
- except NameError :
118
- filename = None
130
+ filename = files [file_pos ].filename if files [file_pos ] else None
119
131
120
132
regioninfo = {
121
- 'name' : regionm ['name' ],
122
133
'file' : filename ,
123
- 'benchmark' : NAME ,
134
+ 'benchmark' : BENCHNAME ,
124
135
}
125
- block = _parse_block (regioninfo , logtext ,
126
- regionm .start () - 1 , nextm .start ())
127
- blocks .append (block )
136
+ blk_start = start if is_first else regionm .start ()
137
+ blk_end = nextm .start ()
138
+ blocks .append (_parse_block (regioninfo , logtext ,
139
+ blk_start , blk_end ))
140
+ is_first = False
128
141
129
142
return Benchmark (info , blocks )
130
143
131
144
132
145
def _parse_block (info , logtext : str , start , end ):
133
146
events = _parse_events (logtext , start , end )
134
147
raw_log = logtext [start :end ]
148
+ assert 'ProcessDag' in events
149
+ info ['name' ] = events ['ProcessDag' ][0 ]['name' ]
135
150
136
151
return Block (info , raw_log , events )
137
152
0 commit comments