forked from COOKIESROK/Movies
-
Notifications
You must be signed in to change notification settings - Fork 0
/
.Rhistory
149 lines (149 loc) · 5.78 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# === description ===========================================================================
## brief description what the script file is about
#
## if you have clear chunks of code, use collapsible dividers (# ==== or # ----)
## and number the sections. Provide brief description for each section here
## # ***** or #_____ does not make sections
# ******************************************************************************
## use descriptive names
# 1) read raw data load data from clean data folder and ...
# 2) outliers test for outliers and entryr errors
# 3) .....
# === 1) read raw data =========================================================
# reading data
movie_data <- read.csv("movies.csv")
rating_data <- read.csv("ratings.csv")
# inspecting data
head(movie_data)
head(rating_data)
# === 2) outliers ==============================================================
# === 3) ..... =================================================================
#___ end _______________________________________________________________________
# === 1) read raw data =========================================================
# reading data
movie_data <- read.csv("movies.csv")
# === 1) read raw data =========================================================
# reading data
movie_data <- read.csv("movies.csv")
rating_data <- read.csv("ratings.csv")
# inspecting data
head(movie_data)
head(rating_data)
# === 2) outliers ==============================================================
movie_genre <- as.data.frame(movie_data$genres, stringsAsFactors=FALSE)
library(data.table)
movie_genre2 <- as.data.frame(tstrsplit(movie_genre[,1], '[|]',
type.convert=TRUE),
stringsAsFactors=FALSE) #DataFlair
colnames(movie_genre2) <- c(1:10)
colnames(movie_genre2) <- c(1:10)
movie_genre <- as.data.frame(movie_data$genres, stringsAsFactors=FALSE)
library(data.table)
movie_genre2 <- as.data.frame(tstrsplit(movie_genre[,1], '[|]',
type.convert=TRUE),
stringsAsFactors=FALSE) #DataFlair
colnames(movie_genre2) <- c(1:10)
movie_genre <- as.data.frame(movie_data$genres, stringsAsFactors=FALSE)
library(data.table)
movie_genre2 <- as.data.frame(tstrsplit(movie_genre[,1], '[|]',
type.convert=TRUE),
stringsAsFactors=FALSE) #DataFlair
colnames(movie_genre2) <- c(1:10)
for (index in 1:nrow(movie_genre2)) {
for (col in 1:ncol(movie_genre2)) {
gen_col = which(genre_mat1[1,] == movie_genre2[index,col]) #Author DataFlair
genre_mat1[index+1,gen_col] <- 1
}
}
genre_mat2 <- as.data.frame(genre_mat1[-1,], stringsAsFactors=FALSE) #remove first row, which was the genre list
for (col in 1:ncol(genre_mat2)) {
genre_mat2[,col] <- as.integer(genre_mat2[,col]) #convert from characters to integers
}
str(genre_mat2)
genre_mat2 <- as.data.frame(genre_mat1[-1,], stringsAsFactors=FALSE) #remove first row, which was the genre list
for (col in 1:ncol(genre_mat2)) {
genre_mat2[,col] <- as.integer(genre_mat2[,col]) #convert from characters to integers
}
genre_mat2
genre_mat2 <- as.data.frame(genre_mat1[-1,], stringsAsFactors=FALSE) #remove first row, which was the genre list
for (col in 1:ncol(genre_mat2)) {
genre_mat2[,col] <- as.integer(genre_mat2[,col]) #convert from characters to integers
}
movie_genre <- as.data.frame(movie_data$genres, stringsAsFactors=FALSE)
library(data.table)
movie_genre2 <- as.data.frame(tstrsplit(movie_genre[,1], '[|]',
type.convert=TRUE),
stringsAsFactors=FALSE)
colnames(movie_genre2) <- c(1:10)
movie_genre <- as.data.frame(movie_data$genres, stringsAsFactors=FALSE)
library(data.table)
movie_genre2 <- as.data.frame(tstrsplit(movie_genre[,1], '[|]',
type.convert=TRUE),
stringsAsFactors=FALSE) #DataFlair
colnames(movie_genre2) <- c(1:10)
for (index in 1:nrow(movie_genre2)) {
for (col in 1:ncol(movie_genre2)) {
gen_col = which(genre_mat1[1,] == movie_genre2[index,col]) #Author DataFlair
genre_mat1[index+1,gen_col] <- 1
}
}
genre_mat2 <- as.data.frame(genre_mat1[-1,], stringsAsFactors=FALSE) #remove first row, which was the genre list
for (col in 1:ncol(genre_mat2)) {
genre_mat2[,col] <- as.integer(genre_mat2[,col]) #convert from characters to integers
}
str(genre_mat2)
movie_genre
head(movie_genre2)
head(movie_genre)
colnames(movie_genre2) <- c(1:10)
head(movie_genre)
list_genre <- c("Action", "Adventure", "Animation", "Children",
"Comedy", "Crime","Documentary", "Drama", "Fantasy",
"Film-Noir", "Horror", "Musical", "Mystery","Romance",
"Sci-Fi", "Thriller", "War", "Western")
genre_mat1 <- matrix(0,10330,18)
genre_mat1[1,] <- list_genre
colnames(genre_mat1) <- list_genre
genre_mat1
for (index in 1:nrow(movie_genre2)) {
for (col in 1:ncol(movie_genre2)) {
#Creates a new name for each cell, and then assigns a value of 1 if it matches
#E.X. if a movie has action, it gets a 1 under action, but can be searched for
#action as well.
gen_col = which(genre_mat1[1,] == movie_genre2[index,col])
genre_mat1[index+1,gen_col] <- 1
}
}
gen
head(genre_mat2)
# removing first row, which was the genre list
genre_mat2 <- as.data.frame(genre_mat1[-1,], stringsAsFactors=FALSE)
head(genre_mat2)
str(genre_mat2)
for (col in 1:ncol(genre_mat2)) {
genre_mat2[,col] <- as.integer(genre_mat2[,col])
}
str(genre_mat2)
# === 3) ..... =================================================================
SearchMatrix <- cbind(movie_data[,1:2], genre_mat2[])
head(SearchMatrix)
ratingMatrix <- dcast(rating_data, userId~movieId, value.var = "rating", na.rm=FALSE)
ratingMatrix <- as.matrix(ratingMatrix[,-1]) #remove userIds
#Convert rating matrix into a recommenderlab sparse matrix
ratingMatrix <- as(ratingMatrix, "realRatingMatrix")
ratingMatrix
head(movie_data)
head(rating_data)
head(ratingMatrix)
?dcast
head(ratingMatrix)
# Converting rating matrix into a recommenderlab sparse matrix
ratingMatrix <- as(ratingMatrix, "realRatingMatrix")
library(recommenderlab)
install.packages("irlba")
library(recommenderlab)
library(irlba)
library('irlba')
library("irlba")
library("irlba")
library(recommenderlab)