-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathNormal Equations.R
51 lines (34 loc) · 1.66 KB
/
Normal Equations.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
library(tidyverse)
# repeat from Quick LA Demo.R -----------------
Advertising = read_csv("C:/Users/ellen/OneDrive/Documents/GitHub/EllenwTerry/Foundations/Advertising.csv")
Advertising = select(Advertising, TV, Radio, Sales)
mFit <- lm(Sales ~ TV + Radio, data = Advertising)
mFit$coefficients
Advertising$yhat <- predict(mFit, Advertising)
p = ggplot (aes(x = TV, y = Sales), data = Advertising) +
geom_point(aes(x = TV, y = yhat))
p
#
vY <- as.matrix(dplyr::select(Advertising, Sales)) # set up y values in matrix
mX <- as.matrix(cbind(1, dplyr::select(Advertising, TV, Radio))) # set up x values in matrix
vBeta <- solve(t(mX)%*%mX, t(mX)%*%vY) # solve using normal equations
vBeta
str(Advertising)
# Predictions using normal equations
vBeta2 <- as.numeric(vBeta)
Advertising$neY <- t(vBeta2%*%t(mX)) # 3 columns on left * 3 rows on right (after transpose)
# transpose different than pyhon bc lm stores coef in vector vs hoz array
# compare predictions using NE vs lm - should be the same
p = p +
geom_point(data = Advertising, aes(x = TV, y = neY), color = "red")
p
# ----------- Categorical Data ------------------ #
Autos <- read.csv(file="C:/Users/ellen/Documents/UH/Fall 2020/Github Staging/EllenwTerry/Foundations/Automobile Price Prediction.csv")
Autos <- select(Autos, make, horsepower, price )
Autos = filter(Autos, make %in% c("audi", "bmw", "honda"))
model <- lm( price ~ ., Autos)
model$coefficients
vY = as.matrix(select(Autos, price))
mX = model.matrix(price ~ make + horsepower, Autos)
vBeta = solve(t(mX)%*%mX, t(mX)%*%vY) # solve using normal equations
as.numeric(model$coefficients)