diff --git "a/2 \350\200\201\345\271\264\344\272\272\347\232\204\350\277\234\347\250\213\346\231\272\350\203\275\350\257\212\347\226\227/\350\200\201\344\272\272\345\277\203\350\204\217\347\227\205\350\257\212\346\226\255-\345\274\240\351\271\217\346\266\233/README.md" "b/2 \350\200\201\345\271\264\344\272\272\347\232\204\350\277\234\347\250\213\346\231\272\350\203\275\350\257\212\347\226\227/\350\200\201\344\272\272\345\277\203\350\204\217\347\227\205\350\257\212\346\226\255-\345\274\240\351\271\217\346\266\233/README.md" new file mode 100644 index 00000000..6d60d0bc --- /dev/null +++ "b/2 \350\200\201\345\271\264\344\272\272\347\232\204\350\277\234\347\250\213\346\231\272\350\203\275\350\257\212\347\226\227/\350\200\201\344\272\272\345\277\203\350\204\217\347\227\205\350\257\212\346\226\255-\345\274\240\351\271\217\346\266\233/README.md" @@ -0,0 +1,13 @@ +## 项目介绍 + +通过aws的sagemaker建立模型,模型的应用场景是预测老人是否患有心脏病,同时使用aws进行模型的部署 + +## 团队介绍 + +张鹏涛,来自明略科技,从事nlp以及推荐领域,对机器学习以及深度学习的应用,有着丰富的经验,联系方式: 772215156@qq.com + +## 使用到的aws技术 + +1, sagemaker +2, aws的存储功能 + diff --git "a/2 \350\200\201\345\271\264\344\272\272\347\232\204\350\277\234\347\250\213\346\231\272\350\203\275\350\257\212\347\226\227/\350\200\201\344\272\272\345\277\203\350\204\217\347\227\205\350\257\212\346\226\255-\345\274\240\351\271\217\346\266\233/data/data_raw.csv" "b/2 \350\200\201\345\271\264\344\272\272\347\232\204\350\277\234\347\250\213\346\231\272\350\203\275\350\257\212\347\226\227/\350\200\201\344\272\272\345\277\203\350\204\217\347\227\205\350\257\212\346\226\255-\345\274\240\351\271\217\346\266\233/data/data_raw.csv" new file mode 100644 index 00000000..2f5cdb4f --- /dev/null +++ "b/2 \350\200\201\345\271\264\344\272\272\347\232\204\350\277\234\347\250\213\346\231\272\350\203\275\350\257\212\347\226\227/\350\200\201\344\272\272\345\277\203\350\204\217\347\227\205\350\257\212\346\226\255-\345\274\240\351\271\217\346\266\233/data/data_raw.csv" @@ -0,0 +1,270 @@ +70,1,4,130,322,0,2,109,0,2.4,2,3,3,2 +67,0,3,115,564,0,2,160,0,1.6,2,0,7,1 +57,1,2,124,261,0,0,141,0,0.3,1,0,7,2 +64,1,4,128,263,0,0,105,1,0.2,2,1,7,1 +74,0,2,120,269,0,2,121,1,0.2,1,1,3,1 +65,1,4,120,177,0,0,140,0,0.4,1,0,7,1 +56,1,3,130,256,1,2,142,1,0.6,2,1,6,2 +59,1,4,110,239,0,2,142,1,1.2,2,1,7,2 +60,1,4,140,293,0,2,170,0,1.2,2,2,7,2 +63,0,4,150,407,0,2,154,0,4,2,3,7,2 +59,1,4,135,234,0,0,161,0,0.5,2,0,7,1 +53,1,4,142,226,0,2,111,1,0,1,0,7,1 +44,1,3,140,235,0,2,180,0,0,1,0,3,1 +61,1,1,134,234,0,0,145,0,2.6,2,2,3,2 +57,0,4,128,303,0,2,159,0,0,1,1,3,1 +71,0,4,112,149,0,0,125,0,1.6,2,0,3,1 +46,1,4,140,311,0,0,120,1,1.8,2,2,7,2 +53,1,4,140,203,1,2,155,1,3.1,3,0,7,2 +64,1,1,110,211,0,2,144,1,1.8,2,0,3,1 +40,1,1,140,199,0,0,178,1,1.4,1,0,7,1 +67,1,4,120,229,0,2,129,1,2.6,2,2,7,2 +48,1,2,130,245,0,2,180,0,0.2,2,0,3,1 +43,1,4,115,303,0,0,181,0,1.2,2,0,3,1 +47,1,4,112,204,0,0,143,0,0.1,1,0,3,1 +54,0,2,132,288,1,2,159,1,0,1,1,3,1 +48,0,3,130,275,0,0,139,0,0.2,1,0,3,1 +46,0,4,138,243,0,2,152,1,0,2,0,3,1 +51,0,3,120,295,0,2,157,0,0.6,1,0,3,1 +58,1,3,112,230,0,2,165,0,2.5,2,1,7,2 +71,0,3,110,265,1,2,130,0,0,1,1,3,1 +57,1,3,128,229,0,2,150,0,0.4,2,1,7,2 +66,1,4,160,228,0,2,138,0,2.3,1,0,6,1 +37,0,3,120,215,0,0,170,0,0,1,0,3,1 +59,1,4,170,326,0,2,140,1,3.4,3,0,7,2 +50,1,4,144,200,0,2,126,1,0.9,2,0,7,2 +48,1,4,130,256,1,2,150,1,0,1,2,7,2 +61,1,4,140,207,0,2,138,1,1.9,1,1,7,2 +59,1,1,160,273,0,2,125,0,0,1,0,3,2 +42,1,3,130,180,0,0,150,0,0,1,0,3,1 +48,1,4,122,222,0,2,186,0,0,1,0,3,1 +40,1,4,152,223,0,0,181,0,0,1,0,7,2 +62,0,4,124,209,0,0,163,0,0,1,0,3,1 +44,1,3,130,233,0,0,179,1,0.4,1,0,3,1 +46,1,2,101,197,1,0,156,0,0,1,0,7,1 +59,1,3,126,218,1,0,134,0,2.2,2,1,6,2 +58,1,3,140,211,1,2,165,0,0,1,0,3,1 +49,1,3,118,149,0,2,126,0,0.8,1,3,3,2 +44,1,4,110,197,0,2,177,0,0,1,1,3,2 +66,1,2,160,246,0,0,120,1,0,2,3,6,2 +65,0,4,150,225,0,2,114,0,1,2,3,7,2 +42,1,4,136,315,0,0,125,1,1.8,2,0,6,2 +52,1,2,128,205,1,0,184,0,0,1,0,3,1 +65,0,3,140,417,1,2,157,0,0.8,1,1,3,1 +63,0,2,140,195,0,0,179,0,0,1,2,3,1 +45,0,2,130,234,0,2,175,0,0.6,2,0,3,1 +41,0,2,105,198,0,0,168,0,0,1,1,3,1 +61,1,4,138,166,0,2,125,1,3.6,2,1,3,2 +60,0,3,120,178,1,0,96,0,0,1,0,3,1 +59,0,4,174,249,0,0,143,1,0,2,0,3,2 +62,1,2,120,281,0,2,103,0,1.4,2,1,7,2 +57,1,3,150,126,1,0,173,0,0.2,1,1,7,1 +51,0,4,130,305,0,0,142,1,1.2,2,0,7,2 +44,1,3,120,226,0,0,169,0,0,1,0,3,1 +60,0,1,150,240,0,0,171,0,0.9,1,0,3,1 +63,1,1,145,233,1,2,150,0,2.3,3,0,6,1 +57,1,4,150,276,0,2,112,1,0.6,2,1,6,2 +51,1,4,140,261,0,2,186,1,0,1,0,3,1 +58,0,2,136,319,1,2,152,0,0,1,2,3,2 +44,0,3,118,242,0,0,149,0,0.3,2,1,3,1 +47,1,3,108,243,0,0,152,0,0,1,0,3,2 +61,1,4,120,260,0,0,140,1,3.6,2,1,7,2 +57,0,4,120,354,0,0,163,1,0.6,1,0,3,1 +70,1,2,156,245,0,2,143,0,0,1,0,3,1 +76,0,3,140,197,0,1,116,0,1.1,2,0,3,1 +67,0,4,106,223,0,0,142,0,0.3,1,2,3,1 +45,1,4,142,309,0,2,147,1,0,2,3,7,2 +45,1,4,104,208,0,2,148,1,3,2,0,3,1 +39,0,3,94,199,0,0,179,0,0,1,0,3,1 +42,0,3,120,209,0,0,173,0,0,2,0,3,1 +56,1,2,120,236,0,0,178,0,0.8,1,0,3,1 +58,1,4,146,218,0,0,105,0,2,2,1,7,2 +35,1,4,120,198,0,0,130,1,1.6,2,0,7,2 +58,1,4,150,270,0,2,111,1,0.8,1,0,7,2 +41,1,3,130,214,0,2,168,0,2,2,0,3,1 +57,1,4,110,201,0,0,126,1,1.5,2,0,6,1 +42,1,1,148,244,0,2,178,0,0.8,1,2,3,1 +62,1,2,128,208,1,2,140,0,0,1,0,3,1 +59,1,1,178,270,0,2,145,0,4.2,3,0,7,1 +41,0,2,126,306,0,0,163,0,0,1,0,3,1 +50,1,4,150,243,0,2,128,0,2.6,2,0,7,2 +59,1,2,140,221,0,0,164,1,0,1,0,3,1 +61,0,4,130,330,0,2,169,0,0,1,0,3,2 +54,1,4,124,266,0,2,109,1,2.2,2,1,7,2 +54,1,4,110,206,0,2,108,1,0,2,1,3,2 +52,1,4,125,212,0,0,168,0,1,1,2,7,2 +47,1,4,110,275,0,2,118,1,1,2,1,3,2 +66,1,4,120,302,0,2,151,0,0.4,2,0,3,1 +58,1,4,100,234,0,0,156,0,0.1,1,1,7,2 +64,0,3,140,313,0,0,133,0,0.2,1,0,7,1 +50,0,2,120,244,0,0,162,0,1.1,1,0,3,1 +44,0,3,108,141,0,0,175,0,0.6,2,0,3,1 +67,1,4,120,237,0,0,71,0,1,2,0,3,2 +49,0,4,130,269,0,0,163,0,0,1,0,3,1 +57,1,4,165,289,1,2,124,0,1,2,3,7,2 +63,1,4,130,254,0,2,147,0,1.4,2,1,7,2 +48,1,4,124,274,0,2,166,0,0.5,2,0,7,2 +51,1,3,100,222,0,0,143,1,1.2,2,0,3,1 +60,0,4,150,258,0,2,157,0,2.6,2,2,7,2 +59,1,4,140,177,0,0,162,1,0,1,1,7,2 +45,0,2,112,160,0,0,138,0,0,2,0,3,1 +55,0,4,180,327,0,1,117,1,3.4,2,0,3,2 +41,1,2,110,235,0,0,153,0,0,1,0,3,1 +60,0,4,158,305,0,2,161,0,0,1,0,3,2 +54,0,3,135,304,1,0,170,0,0,1,0,3,1 +42,1,2,120,295,0,0,162,0,0,1,0,3,1 +49,0,2,134,271,0,0,162,0,0,2,0,3,1 +46,1,4,120,249,0,2,144,0,0.8,1,0,7,2 +56,0,4,200,288,1,2,133,1,4,3,2,7,2 +66,0,1,150,226,0,0,114,0,2.6,3,0,3,1 +56,1,4,130,283,1,2,103,1,1.6,3,0,7,2 +49,1,3,120,188,0,0,139,0,2,2,3,7,2 +54,1,4,122,286,0,2,116,1,3.2,2,2,3,2 +57,1,4,152,274,0,0,88,1,1.2,2,1,7,2 +65,0,3,160,360,0,2,151,0,0.8,1,0,3,1 +54,1,3,125,273,0,2,152,0,0.5,3,1,3,1 +54,0,3,160,201,0,0,163,0,0,1,1,3,1 +62,1,4,120,267,0,0,99,1,1.8,2,2,7,2 +52,0,3,136,196,0,2,169,0,0.1,2,0,3,1 +52,1,2,134,201,0,0,158,0,0.8,1,1,3,1 +60,1,4,117,230,1,0,160,1,1.4,1,2,7,2 +63,0,4,108,269,0,0,169,1,1.8,2,2,3,2 +66,1,4,112,212,0,2,132,1,0.1,1,1,3,2 +42,1,4,140,226,0,0,178,0,0,1,0,3,1 +64,1,4,120,246,0,2,96,1,2.2,3,1,3,2 +54,1,3,150,232,0,2,165,0,1.6,1,0,7,1 +46,0,3,142,177,0,2,160,1,1.4,3,0,3,1 +67,0,3,152,277,0,0,172,0,0,1,1,3,1 +56,1,4,125,249,1,2,144,1,1.2,2,1,3,2 +34,0,2,118,210,0,0,192,0,0.7,1,0,3,1 +57,1,4,132,207,0,0,168,1,0,1,0,7,1 +64,1,4,145,212,0,2,132,0,2,2,2,6,2 +59,1,4,138,271,0,2,182,0,0,1,0,3,1 +50,1,3,140,233,0,0,163,0,0.6,2,1,7,2 +51,1,1,125,213,0,2,125,1,1.4,1,1,3,1 +54,1,2,192,283,0,2,195,0,0,1,1,7,2 +53,1,4,123,282,0,0,95,1,2,2,2,7,2 +52,1,4,112,230,0,0,160,0,0,1,1,3,2 +40,1,4,110,167,0,2,114,1,2,2,0,7,2 +58,1,3,132,224,0,2,173,0,3.2,1,2,7,2 +41,0,3,112,268,0,2,172,1,0,1,0,3,1 +41,1,3,112,250,0,0,179,0,0,1,0,3,1 +50,0,3,120,219,0,0,158,0,1.6,2,0,3,1 +54,0,3,108,267,0,2,167,0,0,1,0,3,1 +64,0,4,130,303,0,0,122,0,2,2,2,3,1 +51,0,3,130,256,0,2,149,0,0.5,1,0,3,1 +46,0,2,105,204,0,0,172,0,0,1,0,3,1 +55,1,4,140,217,0,0,111,1,5.6,3,0,7,2 +45,1,2,128,308,0,2,170,0,0,1,0,3,1 +56,1,1,120,193,0,2,162,0,1.9,2,0,7,1 +66,0,4,178,228,1,0,165,1,1,2,2,7,2 +38,1,1,120,231,0,0,182,1,3.8,2,0,7,2 +62,0,4,150,244,0,0,154,1,1.4,2,0,3,2 +55,1,2,130,262,0,0,155,0,0,1,0,3,1 +58,1,4,128,259,0,2,130,1,3,2,2,7,2 +43,1,4,110,211,0,0,161,0,0,1,0,7,1 +64,0,4,180,325,0,0,154,1,0,1,0,3,1 +50,0,4,110,254,0,2,159,0,0,1,0,3,1 +53,1,3,130,197,1,2,152,0,1.2,3,0,3,1 +45,0,4,138,236,0,2,152,1,0.2,2,0,3,1 +65,1,1,138,282,1,2,174,0,1.4,2,1,3,2 +69,1,1,160,234,1,2,131,0,0.1,2,1,3,1 +69,1,3,140,254,0,2,146,0,2,2,3,7,2 +67,1,4,100,299,0,2,125,1,0.9,2,2,3,2 +68,0,3,120,211,0,2,115,0,1.5,2,0,3,1 +34,1,1,118,182,0,2,174,0,0,1,0,3,1 +62,0,4,138,294,1,0,106,0,1.9,2,3,3,2 +51,1,4,140,298,0,0,122,1,4.2,2,3,7,2 +46,1,3,150,231,0,0,147,0,3.6,2,0,3,2 +67,1,4,125,254,1,0,163,0,0.2,2,2,7,2 +50,1,3,129,196,0,0,163,0,0,1,0,3,1 +42,1,3,120,240,1,0,194,0,0.8,3,0,7,1 +56,0,4,134,409,0,2,150,1,1.9,2,2,7,2 +41,1,4,110,172,0,2,158,0,0,1,0,7,2 +42,0,4,102,265,0,2,122,0,0.6,2,0,3,1 +53,1,3,130,246,1,2,173,0,0,1,3,3,1 +43,1,3,130,315,0,0,162,0,1.9,1,1,3,1 +56,1,4,132,184,0,2,105,1,2.1,2,1,6,2 +52,1,4,108,233,1,0,147,0,0.1,1,3,7,1 +62,0,4,140,394,0,2,157,0,1.2,2,0,3,1 +70,1,3,160,269,0,0,112,1,2.9,2,1,7,2 +54,1,4,140,239,0,0,160,0,1.2,1,0,3,1 +70,1,4,145,174,0,0,125,1,2.6,3,0,7,2 +54,1,2,108,309,0,0,156,0,0,1,0,7,1 +35,1,4,126,282,0,2,156,1,0,1,0,7,2 +48,1,3,124,255,1,0,175,0,0,1,2,3,1 +55,0,2,135,250,0,2,161,0,1.4,2,0,3,1 +58,0,4,100,248,0,2,122,0,1,2,0,3,1 +54,0,3,110,214,0,0,158,0,1.6,2,0,3,1 +69,0,1,140,239,0,0,151,0,1.8,1,2,3,1 +77,1,4,125,304,0,2,162,1,0,1,3,3,2 +68,1,3,118,277,0,0,151,0,1,1,1,7,1 +58,1,4,125,300,0,2,171,0,0,1,2,7,2 +60,1,4,125,258,0,2,141,1,2.8,2,1,7,2 +51,1,4,140,299,0,0,173,1,1.6,1,0,7,2 +55,1,4,160,289,0,2,145,1,0.8,2,1,7,2 +52,1,1,152,298,1,0,178,0,1.2,2,0,7,1 +60,0,3,102,318,0,0,160,0,0,1,1,3,1 +58,1,3,105,240,0,2,154,1,0.6,2,0,7,1 +64,1,3,125,309,0,0,131,1,1.8,2,0,7,2 +37,1,3,130,250,0,0,187,0,3.5,3,0,3,1 +59,1,1,170,288,0,2,159,0,0.2,2,0,7,2 +51,1,3,125,245,1,2,166,0,2.4,2,0,3,1 +43,0,3,122,213,0,0,165,0,0.2,2,0,3,1 +58,1,4,128,216,0,2,131,1,2.2,2,3,7,2 +29,1,2,130,204,0,2,202,0,0,1,0,3,1 +41,0,2,130,204,0,2,172,0,1.4,1,0,3,1 +63,0,3,135,252,0,2,172,0,0,1,0,3,1 +51,1,3,94,227,0,0,154,1,0,1,1,7,1 +54,1,3,120,258,0,2,147,0,0.4,2,0,7,1 +44,1,2,120,220,0,0,170,0,0,1,0,3,1 +54,1,4,110,239,0,0,126,1,2.8,2,1,7,2 +65,1,4,135,254,0,2,127,0,2.8,2,1,7,2 +57,1,3,150,168,0,0,174,0,1.6,1,0,3,1 +63,1,4,130,330,1,2,132,1,1.8,1,3,7,2 +35,0,4,138,183,0,0,182,0,1.4,1,0,3,1 +41,1,2,135,203,0,0,132,0,0,2,0,6,1 +62,0,3,130,263,0,0,97,0,1.2,2,1,7,2 +43,0,4,132,341,1,2,136,1,3,2,0,7,2 +58,0,1,150,283,1,2,162,0,1,1,0,3,1 +52,1,1,118,186,0,2,190,0,0,2,0,6,1 +61,0,4,145,307,0,2,146,1,1,2,0,7,2 +39,1,4,118,219,0,0,140,0,1.2,2,0,7,2 +45,1,4,115,260,0,2,185,0,0,1,0,3,1 +52,1,4,128,255,0,0,161,1,0,1,1,7,2 +62,1,3,130,231,0,0,146,0,1.8,2,3,7,1 +62,0,4,160,164,0,2,145,0,6.2,3,3,7,2 +53,0,4,138,234,0,2,160,0,0,1,0,3,1 +43,1,4,120,177,0,2,120,1,2.5,2,0,7,2 +47,1,3,138,257,0,2,156,0,0,1,0,3,1 +52,1,2,120,325,0,0,172,0,0.2,1,0,3,1 +68,1,3,180,274,1,2,150,1,1.6,2,0,7,2 +39,1,3,140,321,0,2,182,0,0,1,0,3,1 +53,0,4,130,264,0,2,143,0,0.4,2,0,3,1 +62,0,4,140,268,0,2,160,0,3.6,3,2,3,2 +51,0,3,140,308,0,2,142,0,1.5,1,1,3,1 +60,1,4,130,253,0,0,144,1,1.4,1,1,7,2 +65,1,4,110,248,0,2,158,0,0.6,1,2,6,2 +65,0,3,155,269,0,0,148,0,0.8,1,0,3,1 +60,1,3,140,185,0,2,155,0,3,2,0,3,2 +60,1,4,145,282,0,2,142,1,2.8,2,2,7,2 +54,1,4,120,188,0,0,113,0,1.4,2,1,7,2 +44,1,2,130,219,0,2,188,0,0,1,0,3,1 +44,1,4,112,290,0,2,153,0,0,1,1,3,2 +51,1,3,110,175,0,0,123,0,0.6,1,0,3,1 +59,1,3,150,212,1,0,157,0,1.6,1,0,3,1 +71,0,2,160,302,0,0,162,0,0.4,1,2,3,1 +61,1,3,150,243,1,0,137,1,1,2,0,3,1 +55,1,4,132,353,0,0,132,1,1.2,2,1,7,2 +64,1,3,140,335,0,0,158,0,0,1,0,3,2 +43,1,4,150,247,0,0,171,0,1.5,1,0,3,1 +58,0,3,120,340,0,0,172,0,0,1,0,3,1 +60,1,4,130,206,0,2,132,1,2.4,2,2,7,2 +58,1,2,120,284,0,2,160,0,1.8,2,0,3,2 +49,1,2,130,266,0,0,171,0,0.6,1,0,3,1 +48,1,2,110,229,0,0,168,0,1,3,0,7,2 +52,1,3,172,199,1,0,162,0,0.5,1,0,7,1 +44,1,2,120,263,0,0,173,0,0,1,0,7,1 +56,0,2,140,294,0,2,153,0,1.3,2,0,3,1 +57,1,4,140,192,0,0,148,0,0.4,2,0,6,1 +67,1,4,160,286,0,2,108,1,1.5,2,3,3,2 diff --git "a/2 \350\200\201\345\271\264\344\272\272\347\232\204\350\277\234\347\250\213\346\231\272\350\203\275\350\257\212\347\226\227/\350\200\201\344\272\272\345\277\203\350\204\217\347\227\205\350\257\212\346\226\255-\345\274\240\351\271\217\346\266\233/src/__pycache__/utils.cpython-36.pyc" "b/2 \350\200\201\345\271\264\344\272\272\347\232\204\350\277\234\347\250\213\346\231\272\350\203\275\350\257\212\347\226\227/\350\200\201\344\272\272\345\277\203\350\204\217\347\227\205\350\257\212\346\226\255-\345\274\240\351\271\217\346\266\233/src/__pycache__/utils.cpython-36.pyc" new file mode 100644 index 00000000..68673e9e Binary files /dev/null and "b/2 \350\200\201\345\271\264\344\272\272\347\232\204\350\277\234\347\250\213\346\231\272\350\203\275\350\257\212\347\226\227/\350\200\201\344\272\272\345\277\203\350\204\217\347\227\205\350\257\212\346\226\255-\345\274\240\351\271\217\346\266\233/src/__pycache__/utils.cpython-36.pyc" differ diff --git "a/2 \350\200\201\345\271\264\344\272\272\347\232\204\350\277\234\347\250\213\346\231\272\350\203\275\350\257\212\347\226\227/\350\200\201\344\272\272\345\277\203\350\204\217\347\227\205\350\257\212\346\226\255-\345\274\240\351\271\217\346\266\233/src/main.py" "b/2 \350\200\201\345\271\264\344\272\272\347\232\204\350\277\234\347\250\213\346\231\272\350\203\275\350\257\212\347\226\227/\350\200\201\344\272\272\345\277\203\350\204\217\347\227\205\350\257\212\346\226\255-\345\274\240\351\271\217\346\266\233/src/main.py" new file mode 100644 index 00000000..eab13855 --- /dev/null +++ "b/2 \350\200\201\345\271\264\344\272\272\347\232\204\350\277\234\347\250\213\346\231\272\350\203\275\350\257\212\347\226\227/\350\200\201\344\272\272\345\277\203\350\204\217\347\227\205\350\257\212\346\226\255-\345\274\240\351\271\217\346\266\233/src/main.py" @@ -0,0 +1,74 @@ +from sklearn.model_selection import train_test_split +import sagemaker +from sagemaker import get_execution_role +from sagemaker.amazon.amazon_estimator import get_image_uri +from sagemaker.predictor import csv_serializer +from utils import read_data +from sklearn.metrics import accuracy_score +import os +import pandas as pd +import sys +sys.path.append("./") + +data_file = "../data/data_raw.csv" +data_columns = ["ATTR" + str(i) for i in range(1, 14)] +data_columns.append("Label") + +X, Y = read_data(data_file=data_file, data_columns=data_columns) + +X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=42) + +pd.concat([X_train, y_train], axis=1).to_csv("../data/train.csv", index=False) +pd.concat([X_test, y_test], axis=1).to_csv("../data/test.csv", index=False) + +# 将原始的数据以及处理好的数据集存储到s3上面 +session = sagemaker.Session() +data_dir = "../data/" +prefix = 'sentiment-web-app' + +test_location = session.upload_data(os.path.join(data_dir, 'test.csv'), key_prefix=prefix) +train_location = session.upload_data(os.path.join(data_dir, 'train.csv'), key_prefix=prefix) + +# 使用sagemaker来进行模型的训练 +role = get_execution_role() + +container = get_image_uri(session.boto_region_name, 'xgboost') + +xgb = sagemaker.estimator.Estimator(container, + role, + train_instance_count=1, + train_instance_type='ml.m4.xlarge', + output_path='s3://{}/{}/output'.format(session.default_bucket(), prefix), + sagemaker_session=session) + +# 指定xgb需要的参数 +xgb.set_hyperparameters(max_depth=5, + eta=0.2, + gamma=4, + min_child_weight=6, + subsample=0.8, + silent=0, + objective='binary:logistic', + early_stopping_rounds=10, + num_round=500) +# 从s3中读取数据 +s3_input_train = sagemaker.s3_input(s3_data=train_location, content_type='csv') +s3_input_test = sagemaker.s3_input(s3_data=test_location, content_type='csv') + +# 模型训练 +xgb.fit({'train': s3_input_train, 'test': s3_input_test}) + +# 模型测试 +xgb_transformer = xgb.transformer(instance_count=1, instance_type='ml.m4.xlarge') + +xgb_transformer.transform(test_location, content_type='text/csv', split_type='Line') + +xgb_transformer.wait() + +predictions = pd.read_csv(os.path.join(data_dir, 'test.csv.out'), header=None) +predictions = [round(num) for num in predictions.squeeze().values] + +accuracy_score(y_test, predictions) + +# 模型的部署 +xgb_predictor = xgb.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge') diff --git "a/2 \350\200\201\345\271\264\344\272\272\347\232\204\350\277\234\347\250\213\346\231\272\350\203\275\350\257\212\347\226\227/\350\200\201\344\272\272\345\277\203\350\204\217\347\227\205\350\257\212\346\226\255-\345\274\240\351\271\217\346\266\233/src/utils.py" "b/2 \350\200\201\345\271\264\344\272\272\347\232\204\350\277\234\347\250\213\346\231\272\350\203\275\350\257\212\347\226\227/\350\200\201\344\272\272\345\277\203\350\204\217\347\227\205\350\257\212\346\226\255-\345\274\240\351\271\217\346\266\233/src/utils.py" new file mode 100644 index 00000000..9779ad96 --- /dev/null +++ "b/2 \350\200\201\345\271\264\344\272\272\347\232\204\350\277\234\347\250\213\346\231\272\350\203\275\350\257\212\347\226\227/\350\200\201\344\272\272\345\277\203\350\204\217\347\227\205\350\257\212\346\226\255-\345\274\240\351\271\217\346\266\233/src/utils.py" @@ -0,0 +1,19 @@ +import pandas as pd +import numpy as np + + +def read_data(data_file, data_columns=None): + """ + :param data_file: 要读出的数据 + :param data_columns: 要读取的数据特征名称 + :return: 经过处理后的特征值以及标签值 + """ + data = pd.read_csv(data_file) + data.columns = data_columns + + label = data.iloc[:, -1] + feat = data.iloc[:, :-1] + # 对数据进行缺失值的填充 + feat = feat.fillna(0) + + return feat, label