Skip to content
This repository was archived by the owner on Jul 15, 2022. It is now read-only.

老人心脏病诊断 #52

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
## 项目介绍

通过aws的sagemaker建立模型,模型的应用场景是预测老人是否患有心脏病,同时使用aws进行模型的部署

## 团队介绍

张鹏涛,来自明略科技,从事nlp以及推荐领域,对机器学习以及深度学习的应用,有着丰富的经验,联系方式: [email protected]

## 使用到的aws技术

1, sagemaker
2, aws的存储功能

Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
70,1,4,130,322,0,2,109,0,2.4,2,3,3,2
67,0,3,115,564,0,2,160,0,1.6,2,0,7,1
57,1,2,124,261,0,0,141,0,0.3,1,0,7,2
64,1,4,128,263,0,0,105,1,0.2,2,1,7,1
74,0,2,120,269,0,2,121,1,0.2,1,1,3,1
65,1,4,120,177,0,0,140,0,0.4,1,0,7,1
56,1,3,130,256,1,2,142,1,0.6,2,1,6,2
59,1,4,110,239,0,2,142,1,1.2,2,1,7,2
60,1,4,140,293,0,2,170,0,1.2,2,2,7,2
63,0,4,150,407,0,2,154,0,4,2,3,7,2
59,1,4,135,234,0,0,161,0,0.5,2,0,7,1
53,1,4,142,226,0,2,111,1,0,1,0,7,1
44,1,3,140,235,0,2,180,0,0,1,0,3,1
61,1,1,134,234,0,0,145,0,2.6,2,2,3,2
57,0,4,128,303,0,2,159,0,0,1,1,3,1
71,0,4,112,149,0,0,125,0,1.6,2,0,3,1
46,1,4,140,311,0,0,120,1,1.8,2,2,7,2
53,1,4,140,203,1,2,155,1,3.1,3,0,7,2
64,1,1,110,211,0,2,144,1,1.8,2,0,3,1
40,1,1,140,199,0,0,178,1,1.4,1,0,7,1
67,1,4,120,229,0,2,129,1,2.6,2,2,7,2
48,1,2,130,245,0,2,180,0,0.2,2,0,3,1
43,1,4,115,303,0,0,181,0,1.2,2,0,3,1
47,1,4,112,204,0,0,143,0,0.1,1,0,3,1
54,0,2,132,288,1,2,159,1,0,1,1,3,1
48,0,3,130,275,0,0,139,0,0.2,1,0,3,1
46,0,4,138,243,0,2,152,1,0,2,0,3,1
51,0,3,120,295,0,2,157,0,0.6,1,0,3,1
58,1,3,112,230,0,2,165,0,2.5,2,1,7,2
71,0,3,110,265,1,2,130,0,0,1,1,3,1
57,1,3,128,229,0,2,150,0,0.4,2,1,7,2
66,1,4,160,228,0,2,138,0,2.3,1,0,6,1
37,0,3,120,215,0,0,170,0,0,1,0,3,1
59,1,4,170,326,0,2,140,1,3.4,3,0,7,2
50,1,4,144,200,0,2,126,1,0.9,2,0,7,2
48,1,4,130,256,1,2,150,1,0,1,2,7,2
61,1,4,140,207,0,2,138,1,1.9,1,1,7,2
59,1,1,160,273,0,2,125,0,0,1,0,3,2
42,1,3,130,180,0,0,150,0,0,1,0,3,1
48,1,4,122,222,0,2,186,0,0,1,0,3,1
40,1,4,152,223,0,0,181,0,0,1,0,7,2
62,0,4,124,209,0,0,163,0,0,1,0,3,1
44,1,3,130,233,0,0,179,1,0.4,1,0,3,1
46,1,2,101,197,1,0,156,0,0,1,0,7,1
59,1,3,126,218,1,0,134,0,2.2,2,1,6,2
58,1,3,140,211,1,2,165,0,0,1,0,3,1
49,1,3,118,149,0,2,126,0,0.8,1,3,3,2
44,1,4,110,197,0,2,177,0,0,1,1,3,2
66,1,2,160,246,0,0,120,1,0,2,3,6,2
65,0,4,150,225,0,2,114,0,1,2,3,7,2
42,1,4,136,315,0,0,125,1,1.8,2,0,6,2
52,1,2,128,205,1,0,184,0,0,1,0,3,1
65,0,3,140,417,1,2,157,0,0.8,1,1,3,1
63,0,2,140,195,0,0,179,0,0,1,2,3,1
45,0,2,130,234,0,2,175,0,0.6,2,0,3,1
41,0,2,105,198,0,0,168,0,0,1,1,3,1
61,1,4,138,166,0,2,125,1,3.6,2,1,3,2
60,0,3,120,178,1,0,96,0,0,1,0,3,1
59,0,4,174,249,0,0,143,1,0,2,0,3,2
62,1,2,120,281,0,2,103,0,1.4,2,1,7,2
57,1,3,150,126,1,0,173,0,0.2,1,1,7,1
51,0,4,130,305,0,0,142,1,1.2,2,0,7,2
44,1,3,120,226,0,0,169,0,0,1,0,3,1
60,0,1,150,240,0,0,171,0,0.9,1,0,3,1
63,1,1,145,233,1,2,150,0,2.3,3,0,6,1
57,1,4,150,276,0,2,112,1,0.6,2,1,6,2
51,1,4,140,261,0,2,186,1,0,1,0,3,1
58,0,2,136,319,1,2,152,0,0,1,2,3,2
44,0,3,118,242,0,0,149,0,0.3,2,1,3,1
47,1,3,108,243,0,0,152,0,0,1,0,3,2
61,1,4,120,260,0,0,140,1,3.6,2,1,7,2
57,0,4,120,354,0,0,163,1,0.6,1,0,3,1
70,1,2,156,245,0,2,143,0,0,1,0,3,1
76,0,3,140,197,0,1,116,0,1.1,2,0,3,1
67,0,4,106,223,0,0,142,0,0.3,1,2,3,1
45,1,4,142,309,0,2,147,1,0,2,3,7,2
45,1,4,104,208,0,2,148,1,3,2,0,3,1
39,0,3,94,199,0,0,179,0,0,1,0,3,1
42,0,3,120,209,0,0,173,0,0,2,0,3,1
56,1,2,120,236,0,0,178,0,0.8,1,0,3,1
58,1,4,146,218,0,0,105,0,2,2,1,7,2
35,1,4,120,198,0,0,130,1,1.6,2,0,7,2
58,1,4,150,270,0,2,111,1,0.8,1,0,7,2
41,1,3,130,214,0,2,168,0,2,2,0,3,1
57,1,4,110,201,0,0,126,1,1.5,2,0,6,1
42,1,1,148,244,0,2,178,0,0.8,1,2,3,1
62,1,2,128,208,1,2,140,0,0,1,0,3,1
59,1,1,178,270,0,2,145,0,4.2,3,0,7,1
41,0,2,126,306,0,0,163,0,0,1,0,3,1
50,1,4,150,243,0,2,128,0,2.6,2,0,7,2
59,1,2,140,221,0,0,164,1,0,1,0,3,1
61,0,4,130,330,0,2,169,0,0,1,0,3,2
54,1,4,124,266,0,2,109,1,2.2,2,1,7,2
54,1,4,110,206,0,2,108,1,0,2,1,3,2
52,1,4,125,212,0,0,168,0,1,1,2,7,2
47,1,4,110,275,0,2,118,1,1,2,1,3,2
66,1,4,120,302,0,2,151,0,0.4,2,0,3,1
58,1,4,100,234,0,0,156,0,0.1,1,1,7,2
64,0,3,140,313,0,0,133,0,0.2,1,0,7,1
50,0,2,120,244,0,0,162,0,1.1,1,0,3,1
44,0,3,108,141,0,0,175,0,0.6,2,0,3,1
67,1,4,120,237,0,0,71,0,1,2,0,3,2
49,0,4,130,269,0,0,163,0,0,1,0,3,1
57,1,4,165,289,1,2,124,0,1,2,3,7,2
63,1,4,130,254,0,2,147,0,1.4,2,1,7,2
48,1,4,124,274,0,2,166,0,0.5,2,0,7,2
51,1,3,100,222,0,0,143,1,1.2,2,0,3,1
60,0,4,150,258,0,2,157,0,2.6,2,2,7,2
59,1,4,140,177,0,0,162,1,0,1,1,7,2
45,0,2,112,160,0,0,138,0,0,2,0,3,1
55,0,4,180,327,0,1,117,1,3.4,2,0,3,2
41,1,2,110,235,0,0,153,0,0,1,0,3,1
60,0,4,158,305,0,2,161,0,0,1,0,3,2
54,0,3,135,304,1,0,170,0,0,1,0,3,1
42,1,2,120,295,0,0,162,0,0,1,0,3,1
49,0,2,134,271,0,0,162,0,0,2,0,3,1
46,1,4,120,249,0,2,144,0,0.8,1,0,7,2
56,0,4,200,288,1,2,133,1,4,3,2,7,2
66,0,1,150,226,0,0,114,0,2.6,3,0,3,1
56,1,4,130,283,1,2,103,1,1.6,3,0,7,2
49,1,3,120,188,0,0,139,0,2,2,3,7,2
54,1,4,122,286,0,2,116,1,3.2,2,2,3,2
57,1,4,152,274,0,0,88,1,1.2,2,1,7,2
65,0,3,160,360,0,2,151,0,0.8,1,0,3,1
54,1,3,125,273,0,2,152,0,0.5,3,1,3,1
54,0,3,160,201,0,0,163,0,0,1,1,3,1
62,1,4,120,267,0,0,99,1,1.8,2,2,7,2
52,0,3,136,196,0,2,169,0,0.1,2,0,3,1
52,1,2,134,201,0,0,158,0,0.8,1,1,3,1
60,1,4,117,230,1,0,160,1,1.4,1,2,7,2
63,0,4,108,269,0,0,169,1,1.8,2,2,3,2
66,1,4,112,212,0,2,132,1,0.1,1,1,3,2
42,1,4,140,226,0,0,178,0,0,1,0,3,1
64,1,4,120,246,0,2,96,1,2.2,3,1,3,2
54,1,3,150,232,0,2,165,0,1.6,1,0,7,1
46,0,3,142,177,0,2,160,1,1.4,3,0,3,1
67,0,3,152,277,0,0,172,0,0,1,1,3,1
56,1,4,125,249,1,2,144,1,1.2,2,1,3,2
34,0,2,118,210,0,0,192,0,0.7,1,0,3,1
57,1,4,132,207,0,0,168,1,0,1,0,7,1
64,1,4,145,212,0,2,132,0,2,2,2,6,2
59,1,4,138,271,0,2,182,0,0,1,0,3,1
50,1,3,140,233,0,0,163,0,0.6,2,1,7,2
51,1,1,125,213,0,2,125,1,1.4,1,1,3,1
54,1,2,192,283,0,2,195,0,0,1,1,7,2
53,1,4,123,282,0,0,95,1,2,2,2,7,2
52,1,4,112,230,0,0,160,0,0,1,1,3,2
40,1,4,110,167,0,2,114,1,2,2,0,7,2
58,1,3,132,224,0,2,173,0,3.2,1,2,7,2
41,0,3,112,268,0,2,172,1,0,1,0,3,1
41,1,3,112,250,0,0,179,0,0,1,0,3,1
50,0,3,120,219,0,0,158,0,1.6,2,0,3,1
54,0,3,108,267,0,2,167,0,0,1,0,3,1
64,0,4,130,303,0,0,122,0,2,2,2,3,1
51,0,3,130,256,0,2,149,0,0.5,1,0,3,1
46,0,2,105,204,0,0,172,0,0,1,0,3,1
55,1,4,140,217,0,0,111,1,5.6,3,0,7,2
45,1,2,128,308,0,2,170,0,0,1,0,3,1
56,1,1,120,193,0,2,162,0,1.9,2,0,7,1
66,0,4,178,228,1,0,165,1,1,2,2,7,2
38,1,1,120,231,0,0,182,1,3.8,2,0,7,2
62,0,4,150,244,0,0,154,1,1.4,2,0,3,2
55,1,2,130,262,0,0,155,0,0,1,0,3,1
58,1,4,128,259,0,2,130,1,3,2,2,7,2
43,1,4,110,211,0,0,161,0,0,1,0,7,1
64,0,4,180,325,0,0,154,1,0,1,0,3,1
50,0,4,110,254,0,2,159,0,0,1,0,3,1
53,1,3,130,197,1,2,152,0,1.2,3,0,3,1
45,0,4,138,236,0,2,152,1,0.2,2,0,3,1
65,1,1,138,282,1,2,174,0,1.4,2,1,3,2
69,1,1,160,234,1,2,131,0,0.1,2,1,3,1
69,1,3,140,254,0,2,146,0,2,2,3,7,2
67,1,4,100,299,0,2,125,1,0.9,2,2,3,2
68,0,3,120,211,0,2,115,0,1.5,2,0,3,1
34,1,1,118,182,0,2,174,0,0,1,0,3,1
62,0,4,138,294,1,0,106,0,1.9,2,3,3,2
51,1,4,140,298,0,0,122,1,4.2,2,3,7,2
46,1,3,150,231,0,0,147,0,3.6,2,0,3,2
67,1,4,125,254,1,0,163,0,0.2,2,2,7,2
50,1,3,129,196,0,0,163,0,0,1,0,3,1
42,1,3,120,240,1,0,194,0,0.8,3,0,7,1
56,0,4,134,409,0,2,150,1,1.9,2,2,7,2
41,1,4,110,172,0,2,158,0,0,1,0,7,2
42,0,4,102,265,0,2,122,0,0.6,2,0,3,1
53,1,3,130,246,1,2,173,0,0,1,3,3,1
43,1,3,130,315,0,0,162,0,1.9,1,1,3,1
56,1,4,132,184,0,2,105,1,2.1,2,1,6,2
52,1,4,108,233,1,0,147,0,0.1,1,3,7,1
62,0,4,140,394,0,2,157,0,1.2,2,0,3,1
70,1,3,160,269,0,0,112,1,2.9,2,1,7,2
54,1,4,140,239,0,0,160,0,1.2,1,0,3,1
70,1,4,145,174,0,0,125,1,2.6,3,0,7,2
54,1,2,108,309,0,0,156,0,0,1,0,7,1
35,1,4,126,282,0,2,156,1,0,1,0,7,2
48,1,3,124,255,1,0,175,0,0,1,2,3,1
55,0,2,135,250,0,2,161,0,1.4,2,0,3,1
58,0,4,100,248,0,2,122,0,1,2,0,3,1
54,0,3,110,214,0,0,158,0,1.6,2,0,3,1
69,0,1,140,239,0,0,151,0,1.8,1,2,3,1
77,1,4,125,304,0,2,162,1,0,1,3,3,2
68,1,3,118,277,0,0,151,0,1,1,1,7,1
58,1,4,125,300,0,2,171,0,0,1,2,7,2
60,1,4,125,258,0,2,141,1,2.8,2,1,7,2
51,1,4,140,299,0,0,173,1,1.6,1,0,7,2
55,1,4,160,289,0,2,145,1,0.8,2,1,7,2
52,1,1,152,298,1,0,178,0,1.2,2,0,7,1
60,0,3,102,318,0,0,160,0,0,1,1,3,1
58,1,3,105,240,0,2,154,1,0.6,2,0,7,1
64,1,3,125,309,0,0,131,1,1.8,2,0,7,2
37,1,3,130,250,0,0,187,0,3.5,3,0,3,1
59,1,1,170,288,0,2,159,0,0.2,2,0,7,2
51,1,3,125,245,1,2,166,0,2.4,2,0,3,1
43,0,3,122,213,0,0,165,0,0.2,2,0,3,1
58,1,4,128,216,0,2,131,1,2.2,2,3,7,2
29,1,2,130,204,0,2,202,0,0,1,0,3,1
41,0,2,130,204,0,2,172,0,1.4,1,0,3,1
63,0,3,135,252,0,2,172,0,0,1,0,3,1
51,1,3,94,227,0,0,154,1,0,1,1,7,1
54,1,3,120,258,0,2,147,0,0.4,2,0,7,1
44,1,2,120,220,0,0,170,0,0,1,0,3,1
54,1,4,110,239,0,0,126,1,2.8,2,1,7,2
65,1,4,135,254,0,2,127,0,2.8,2,1,7,2
57,1,3,150,168,0,0,174,0,1.6,1,0,3,1
63,1,4,130,330,1,2,132,1,1.8,1,3,7,2
35,0,4,138,183,0,0,182,0,1.4,1,0,3,1
41,1,2,135,203,0,0,132,0,0,2,0,6,1
62,0,3,130,263,0,0,97,0,1.2,2,1,7,2
43,0,4,132,341,1,2,136,1,3,2,0,7,2
58,0,1,150,283,1,2,162,0,1,1,0,3,1
52,1,1,118,186,0,2,190,0,0,2,0,6,1
61,0,4,145,307,0,2,146,1,1,2,0,7,2
39,1,4,118,219,0,0,140,0,1.2,2,0,7,2
45,1,4,115,260,0,2,185,0,0,1,0,3,1
52,1,4,128,255,0,0,161,1,0,1,1,7,2
62,1,3,130,231,0,0,146,0,1.8,2,3,7,1
62,0,4,160,164,0,2,145,0,6.2,3,3,7,2
53,0,4,138,234,0,2,160,0,0,1,0,3,1
43,1,4,120,177,0,2,120,1,2.5,2,0,7,2
47,1,3,138,257,0,2,156,0,0,1,0,3,1
52,1,2,120,325,0,0,172,0,0.2,1,0,3,1
68,1,3,180,274,1,2,150,1,1.6,2,0,7,2
39,1,3,140,321,0,2,182,0,0,1,0,3,1
53,0,4,130,264,0,2,143,0,0.4,2,0,3,1
62,0,4,140,268,0,2,160,0,3.6,3,2,3,2
51,0,3,140,308,0,2,142,0,1.5,1,1,3,1
60,1,4,130,253,0,0,144,1,1.4,1,1,7,2
65,1,4,110,248,0,2,158,0,0.6,1,2,6,2
65,0,3,155,269,0,0,148,0,0.8,1,0,3,1
60,1,3,140,185,0,2,155,0,3,2,0,3,2
60,1,4,145,282,0,2,142,1,2.8,2,2,7,2
54,1,4,120,188,0,0,113,0,1.4,2,1,7,2
44,1,2,130,219,0,2,188,0,0,1,0,3,1
44,1,4,112,290,0,2,153,0,0,1,1,3,2
51,1,3,110,175,0,0,123,0,0.6,1,0,3,1
59,1,3,150,212,1,0,157,0,1.6,1,0,3,1
71,0,2,160,302,0,0,162,0,0.4,1,2,3,1
61,1,3,150,243,1,0,137,1,1,2,0,3,1
55,1,4,132,353,0,0,132,1,1.2,2,1,7,2
64,1,3,140,335,0,0,158,0,0,1,0,3,2
43,1,4,150,247,0,0,171,0,1.5,1,0,3,1
58,0,3,120,340,0,0,172,0,0,1,0,3,1
60,1,4,130,206,0,2,132,1,2.4,2,2,7,2
58,1,2,120,284,0,2,160,0,1.8,2,0,3,2
49,1,2,130,266,0,0,171,0,0.6,1,0,3,1
48,1,2,110,229,0,0,168,0,1,3,0,7,2
52,1,3,172,199,1,0,162,0,0.5,1,0,7,1
44,1,2,120,263,0,0,173,0,0,1,0,7,1
56,0,2,140,294,0,2,153,0,1.3,2,0,3,1
57,1,4,140,192,0,0,148,0,0.4,2,0,6,1
67,1,4,160,286,0,2,108,1,1.5,2,3,3,2
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from sklearn.model_selection import train_test_split
import sagemaker
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.predictor import csv_serializer
from utils import read_data
from sklearn.metrics import accuracy_score
import os
import pandas as pd
import sys
sys.path.append("./")

data_file = "../data/data_raw.csv"
data_columns = ["ATTR" + str(i) for i in range(1, 14)]
data_columns.append("Label")

X, Y = read_data(data_file=data_file, data_columns=data_columns)

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=42)

pd.concat([X_train, y_train], axis=1).to_csv("../data/train.csv", index=False)
pd.concat([X_test, y_test], axis=1).to_csv("../data/test.csv", index=False)

# 将原始的数据以及处理好的数据集存储到s3上面
session = sagemaker.Session()
data_dir = "../data/"
prefix = 'sentiment-web-app'

test_location = session.upload_data(os.path.join(data_dir, 'test.csv'), key_prefix=prefix)
train_location = session.upload_data(os.path.join(data_dir, 'train.csv'), key_prefix=prefix)

# 使用sagemaker来进行模型的训练
role = get_execution_role()

container = get_image_uri(session.boto_region_name, 'xgboost')

xgb = sagemaker.estimator.Estimator(container,
role,
train_instance_count=1,
train_instance_type='ml.m4.xlarge',
output_path='s3://{}/{}/output'.format(session.default_bucket(), prefix),
sagemaker_session=session)

# 指定xgb需要的参数
xgb.set_hyperparameters(max_depth=5,
eta=0.2,
gamma=4,
min_child_weight=6,
subsample=0.8,
silent=0,
objective='binary:logistic',
early_stopping_rounds=10,
num_round=500)
# 从s3中读取数据
s3_input_train = sagemaker.s3_input(s3_data=train_location, content_type='csv')
s3_input_test = sagemaker.s3_input(s3_data=test_location, content_type='csv')

# 模型训练
xgb.fit({'train': s3_input_train, 'test': s3_input_test})

# 模型测试
xgb_transformer = xgb.transformer(instance_count=1, instance_type='ml.m4.xlarge')

xgb_transformer.transform(test_location, content_type='text/csv', split_type='Line')

xgb_transformer.wait()

predictions = pd.read_csv(os.path.join(data_dir, 'test.csv.out'), header=None)
predictions = [round(num) for num in predictions.squeeze().values]

accuracy_score(y_test, predictions)

# 模型的部署
xgb_predictor = xgb.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import pandas as pd
import numpy as np


def read_data(data_file, data_columns=None):
"""
:param data_file: 要读出的数据
:param data_columns: 要读取的数据特征名称
:return: 经过处理后的特征值以及标签值
"""
data = pd.read_csv(data_file)
data.columns = data_columns

label = data.iloc[:, -1]
feat = data.iloc[:, :-1]
# 对数据进行缺失值的填充
feat = feat.fillna(0)

return feat, label