-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathQuick LA Demo.R
103 lines (75 loc) · 2.51 KB
/
Quick LA Demo.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
library(tidyverse)
setwd("C:/Users/ellen/Documents/UH/Fall 2020")
set.seed(0118)
mX = matrix(c(1, 8, 2, 6), nrow = 2, ncol = 2)
B = c(1,3)
B*mX
# or
B = matrix(c(1, 3, 2, 2), nrow = 2, ncol = 2)
# or
B = c(1,3)
B*mX
# and
B*t(mX)
solve(mX)
# inverse matrix
# proof
mX %*% solve(mX) %*% mX
# solve is also used to solve systems of equations
# for example, elimination from the ls problem
a = matrix(c(8,20,20,60),nrow=2,ncol=2)
b = matrix(c(56,154),nrow=2,ncol=1)
solve(a, b)
# still want you to do the elimination manually, but you can check with this
# and lm
B*t(mX)
# is not
B%*%t(mX)
# but think if mX as data and B as coefficients, so
B%*%t(mX)
# think of this as a linear equation where col 1, 2, etc. are dimensions
# so the first y_hat would be: (1*1) + (3*2) = 7
# and tbe second y_hat would be (1*8) + (3*6) = 26
# SOOOO, that's why we use this ALL the time - it gives us a regression equation
# Now, what if we have more observations:
#mX = matrix(c(1, 8, 2, 6), nrow = 2, ncol = 2)
mX <- rbind(mX, c(5, 3))
B%*%t(mX)
# and tbe third row y_hat would be (1*5) + (3*3) = 14
# see?
# Also
B%*%t(mX)
# is not
t(B)%*%mX
# and it won't work because for a couple of reasons:
# 1. it's row * column AND the number of columns (2) on the LEFT must equal the number of rows (3) on the RIGHT
# picky picky picky
# 2. and btw, t(vector) doesn't do anything
# so
t(B)%*%t(mX)
# works but that's stupid
# OK with more complex data
Advertising = read_csv("C:/Users/ellen/OneDrive/Documents/GitHub/EllenwTerry/Foundations/Advertising.csv")
Advertising = select(Advertising, TV, Radio, Sales)
mFit <- lm(Sales ~ TV + Radio, data = Advertising)
mFit$coefficients
Advertising$yhat <- predict(mFit, Advertising)
sample = sample_n(Advertising, 4)
sample
vBeta <- as.numeric(mFit$coefficients)
str(mFit$coefficients) # this is a list
str(vBeta) # this is a vector
mX <- as.matrix(cbind(1, select(sample, TV, Radio))) # set up x values in matrix
mX
vBeta %*% mX
# this doesn't work because mX is 4x3 and vBeta is 1x3 (3 columns on left <> 4 rows on right)
# the number of columns on the left must equal the number of rows on the right... EXACTLY in that order, so
vBeta%*%t(mX) # works, but let's transpose it so we can see it better
round(t(vBeta%*%t(mX)),1) # gets us there
# and compare
round(sample,1)
t(vBeta*t(mX))
# keep in mind that we can multiply the elements, but that won't solve the equation
# we want to use %*% because we want:
round((vBeta[1] * t(mX)[1,1]) + (vBeta[2] * t(mX)[2,1]) + (vBeta[3] * t(mX)[3,1]),1)
# tie back to slide