Skip to content

Commit da12238

Browse files
authored
Add files via upload
0 parents  commit da12238

File tree

2 files changed

+37
-0
lines changed

2 files changed

+37
-0
lines changed

Universities.csv

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Univ,SAT,Top10,Accept,SFRatio,Expenses,GradRateBrown,1310,89,22,13,22704,94CalTech,1415,100,25,6,63575,81CMU,1260,62,59,9,25026,72Columbia,1310,76,24,12,31510,88Cornell,1280,83,33,13,21864,90Dartmouth,1340,89,23,10,32162,95Duke,1315,90,30,12,31585,95Georgetown,1255,74,24,12,20126,92Harvard,1400,91,14,11,39525,97JohnsHopkins,1305,75,44,7,58691,87MIT,1380,94,30,10,34870,91Northwestern,1260,85,39,11,28052,89NotreDame,1255,81,42,13,15122,94PennState,1081,38,54,18,10185,80Princeton,1375,91,14,8,30220,95Purdue,1005,28,90,19,9066,69Stanford,1360,90,20,12,36450,93TexasA&M,1075,49,67,25,8704,67UCBerkeley,1240,95,40,17,15140,78UChicago,1290,75,50,13,38380,87UMichigan,1180,65,68,16,15470,85UPenn,1285,80,36,11,27553,90UVA,1225,77,44,14,13349,92UWisconsin,1085,40,69,15,11857,71Yale,1375,95,19,11,43514,96

pca_my.py

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Fri Oct 16 11:51:09 2020
4+
5+
@author: HP
6+
"""
7+
import numpy as np
8+
import pandas as pd
9+
import matplotlib.pyplot as plt
10+
11+
univ=pd.read_csv('C:/Users/HP/Desktop/datasets/Universities.csv')
12+
univ.isna().sum()
13+
14+
from sklearn.preprocessing import scale
15+
norm_data=scale(univ.iloc[:,1:])
16+
norm_data
17+
18+
#############PCA############
19+
from sklearn.decomposition import PCA
20+
pca=PCA()
21+
pca_values=pca.fit_transform(norm_data)
22+
pca_values.shape
23+
#amount of variance of each PCA
24+
var=pca.explained_variance_ratio_
25+
var
26+
#cumulative variance
27+
cum_var=np.cumsum(np.round(var,decimals=4)*100)
28+
cum_var
29+
#variance plot for PCA components
30+
plt.plot(cum_var,'r')
31+
#plot between PCA1 and PCA2
32+
x=pca_values[:,0]
33+
y=pca_values[:,1]
34+
plt.plot(x,y,'ro');plt.xlabel('PCA1');plt.ylabel('PCA2')
35+
# no where pca1 and pca2 are correlated
36+
plt.plot(np.arange(25),x,"ro")

0 commit comments

Comments
 (0)