Skip to content

Commit 00b0a9c

Browse files
author
Suffian Khan
authored
Add hugging-face models loss curve and performance guards to ROCm CI pipeline. (#8915)
* test running hf bert-large * try again * try again * include other models * correct names * disable deberta-v2-xxlarge * avoid torch.distributed * add compare json loss and perf for bert-large to test * fix sed expression * remove pytest * add more models * move unit tests u * display samples/sec
1 parent 43d6951 commit 00b0a9c

7 files changed

+509
-5
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import sys
2+
import json
3+
4+
actual = sys.argv[1]
5+
expect = sys.argv[2]
6+
7+
with open(actual) as file_actual:
8+
json_actual = json.loads(file_actual.read())
9+
10+
with open(expect) as file_expect:
11+
json_expect = json.loads(file_expect.read())
12+
13+
# loss curve match
14+
def almost_equal(x, y, threshold=0.0001):
15+
return abs(x-y) < threshold
16+
17+
for i in range(len(json_actual['steps'])):
18+
step_actual = json_actual['steps'][i]
19+
step_expect = json_expect['steps'][i]
20+
print('step {} loss actual {:.6f} expected {:.6f}'.format(step_actual['step'], step_actual['loss'], step_expect['loss']))
21+
assert(step_actual['step'] == step_expect['step'])
22+
assert(almost_equal(step_actual['loss'], step_expect['loss']))
23+
24+
# perf match
25+
print('samples_per_second actual {:.3f} expected {:.3f}'.format(json_actual['samples_per_second'], json_expect['samples_per_second']))
26+
assert(json_actual['samples_per_second'] >= 0.95*json_expect['samples_per_second'])
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
{
2+
"steps": [
3+
{
4+
"step": 20,
5+
"loss": 5.9949
6+
},
7+
{
8+
"step": 40,
9+
"loss": 3.7808
10+
},
11+
{
12+
"step": 60,
13+
"loss": 3.4385
14+
},
15+
{
16+
"step": 80,
17+
"loss": 3.3679
18+
},
19+
{
20+
"step": 100,
21+
"loss": 3.2163
22+
},
23+
{
24+
"step": 120,
25+
"loss": 3.1817
26+
},
27+
{
28+
"step": 140,
29+
"loss": 3.1556
30+
},
31+
{
32+
"step": 160,
33+
"loss": 3.0778
34+
},
35+
{
36+
"step": 180,
37+
"loss": 3.0072
38+
},
39+
{
40+
"step": 200,
41+
"loss": 3.0134
42+
},
43+
{
44+
"step": 220,
45+
"loss": 2.9648
46+
},
47+
{
48+
"step": 240,
49+
"loss": 2.9377
50+
},
51+
{
52+
"step": 260,
53+
"loss": 2.9206
54+
}
55+
],
56+
"samples_per_second": 20.253
57+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
{
2+
"steps": [
3+
{
4+
"step": 20,
5+
"loss": 1.9889
6+
},
7+
{
8+
"step": 40,
9+
"loss": 1.8601
10+
},
11+
{
12+
"step": 60,
13+
"loss": 1.7641
14+
},
15+
{
16+
"step": 80,
17+
"loss": 1.6828
18+
},
19+
{
20+
"step": 100,
21+
"loss": 1.678
22+
},
23+
{
24+
"step": 120,
25+
"loss": 1.6889
26+
},
27+
{
28+
"step": 140,
29+
"loss": 1.634
30+
},
31+
{
32+
"step": 160,
33+
"loss": 1.6852
34+
},
35+
{
36+
"step": 180,
37+
"loss": 1.61
38+
},
39+
{
40+
"step": 200,
41+
"loss": 1.6123
42+
},
43+
{
44+
"step": 220,
45+
"loss": 1.6529
46+
},
47+
{
48+
"step": 240,
49+
"loss": 1.5321
50+
},
51+
{
52+
"step": 260,
53+
"loss": 1.5459
54+
}
55+
],
56+
"samples_per_second": 21.777
57+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
{
2+
"steps": [
3+
{
4+
"step": 20,
5+
"loss": 2.4661
6+
},
7+
{
8+
"step": 40,
9+
"loss": 2.1771
10+
},
11+
{
12+
"step": 60,
13+
"loss": 2.078
14+
},
15+
{
16+
"step": 80,
17+
"loss": 2.0619
18+
},
19+
{
20+
"step": 100,
21+
"loss": 2.0197
22+
},
23+
{
24+
"step": 120,
25+
"loss": 2.0521
26+
},
27+
{
28+
"step": 140,
29+
"loss": 2.0609
30+
},
31+
{
32+
"step": 160,
33+
"loss": 1.9942
34+
},
35+
{
36+
"step": 180,
37+
"loss": 1.9846
38+
},
39+
{
40+
"step": 200,
41+
"loss": 1.9379
42+
},
43+
{
44+
"step": 220,
45+
"loss": 1.9671
46+
},
47+
{
48+
"step": 240,
49+
"loss": 1.9789
50+
},
51+
{
52+
"step": 260,
53+
"loss": 1.9761
54+
}
55+
],
56+
"samples_per_second": 106.759
57+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
{
2+
"steps": [
3+
{
4+
"step": 20,
5+
"loss": 4.5012
6+
},
7+
{
8+
"step": 40,
9+
"loss": 1.7472
10+
},
11+
{
12+
"step": 60,
13+
"loss": 1.6405
14+
},
15+
{
16+
"step": 80,
17+
"loss": 1.605
18+
},
19+
{
20+
"step": 100,
21+
"loss": 1.5867
22+
},
23+
{
24+
"step": 120,
25+
"loss": 1.5764
26+
},
27+
{
28+
"step": 140,
29+
"loss": 1.5689
30+
},
31+
{
32+
"step": 160,
33+
"loss": 1.5624
34+
},
35+
{
36+
"step": 180,
37+
"loss": 1.558
38+
},
39+
{
40+
"step": 200,
41+
"loss": 1.5549
42+
},
43+
{
44+
"step": 220,
45+
"loss": 1.5532
46+
},
47+
{
48+
"step": 240,
49+
"loss": 1.5518
50+
},
51+
{
52+
"step": 260,
53+
"loss": 1.5503
54+
}
55+
],
56+
"samples_per_second": 24.375
57+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
{
2+
"steps": [
3+
{
4+
"step": 20,
5+
"loss": 4.8197
6+
},
7+
{
8+
"step": 40,
9+
"loss": 3.8907
10+
},
11+
{
12+
"step": 60,
13+
"loss": 3.3169
14+
},
15+
{
16+
"step": 80,
17+
"loss": 1.9865
18+
},
19+
{
20+
"step": 100,
21+
"loss": 1.784
22+
},
23+
{
24+
"step": 120,
25+
"loss": 1.4789
26+
},
27+
{
28+
"step": 140,
29+
"loss": 1.3819
30+
},
31+
{
32+
"step": 160,
33+
"loss": 1.3282
34+
},
35+
{
36+
"step": 180,
37+
"loss": 1.453
38+
},
39+
{
40+
"step": 200,
41+
"loss": 1.2205
42+
},
43+
{
44+
"step": 220,
45+
"loss": 1.2752
46+
},
47+
{
48+
"step": 240,
49+
"loss": 1.172
50+
},
51+
{
52+
"step": 260,
53+
"loss": 1.263
54+
}
55+
],
56+
"samples_per_second": 37.399
57+
}

0 commit comments

Comments
 (0)