-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
108 lines (86 loc) · 3.83 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import streamlit as st
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
st.set_page_config(
page_title="Anime Recommender", layout="wide", initial_sidebar_state="expanded"
)
# Load the data
anime_df = pd.read_csv("data/df_anime_sample.csv")
users_ratings_df = pd.read_csv("data/df_users_with_ratings_sample.csv")
# Combine genres and synopsis into a single string for each anime
anime_df["content"] = anime_df["Genres"] + " " + anime_df["sypnopsis"]
# Create a TF-IDF Vectorizer to convert the content into vectors
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(anime_df["content"])
# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
# Create a mapping of anime titles to indices
anime_indices = pd.Series(anime_df.index, index=anime_df["Name"]).drop_duplicates()
# Function to get recommendations based on content
def get_recommendations(anime_name, cosine_sim=cosine_sim):
# Get the index of the anime that matches the name
idx = anime_indices[anime_name]
# Get the pairwise similarity scores of all animes with that anime
sim_scores = list(enumerate(cosine_sim[idx]))
# Sort the animes based on the similarity scores
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
# Get the scores of the 5 most similar animes
sim_scores = sim_scores[1:6]
# Get the anime indices
anime_indices_recommendations = [i[0] for i in sim_scores]
# Return the top 5 most similar animes
return anime_df[["Name", "Japanese name", "Genres", "Score", "poster_url"]].iloc[
anime_indices_recommendations
]
# Streamlit UI
st.markdown(
'<h1 style="color: red;">AnimeStream - Anime Recommender⛩️</h1>',
unsafe_allow_html=True,
)
st.write("##")
st.markdown(
"""
<p>Welcome to <b style="color:#E50914;">AnimeStream</b>, your personalized anime recommendation system
tailored to your watch history!</p>
""",
unsafe_allow_html=True,
)
st.write("##")
unique_user_id = users_ratings_df["user_id"].unique()
my_expander = st.expander("Tap to Select a User ID 👤💻")
selected_user_id = my_expander.selectbox("", unique_user_id)
if my_expander.button("Recommend"):
# Fetch the animes rated by the user
user_animes = users_ratings_df[users_ratings_df["user_id"] == int(selected_user_id)]
user_animes = user_animes.merge(anime_df, left_on="anime_id", right_on="MAL_ID")
if not user_animes.empty:
# Get the most highly rated anime by the user
top_anime = user_animes.sort_values(by="rating", ascending=False).iloc[0][
"Name"
]
# Get recommendations based on the top rated anime
recommendations = get_recommendations(top_anime)
st.write(
f'Based on the top rated anime "{top_anime}", here are 5 recommendations for you:'
)
for index, row in recommendations.iterrows():
st.markdown(
f"""
<div style="display: flex; align-items: center;">
<div style="flex: 1;">
<img src="{row["poster_url"]}" width="150">
</div>
<div style="flex: 3;">
<p><b style="color:#E50914">English Name</b>: <b>{row["Name"]}</b></p>
<p><b style="color:#E50914">Japanese Name</b>: <b>{row["Japanese name"]}</b></p>
<p><b style="color:#DB4437">Rating</b>: <b>{row["Score"]}</b></p>
<p><b style="color:#DB4437">Genres</b>: <b>{row["Genres"]}</b></p>
</div>
</div>
<hr>
""",
unsafe_allow_html=True,
)
else:
st.write(f"No ratings found for User {selected_user_id}.")