-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
78 lines (67 loc) · 3.32 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import os
import pickle
import requests
from dotenv import load_dotenv
import streamlit as st
# Load environment variables from .env file
load_dotenv()
# Access the TMDB API key from the environment variables
TMDB_API_KEY = os.getenv("TMDB_API_KEY")
def fetch_poster(movie_id):
url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={TMDB_API_KEY}&language=en-US"
try:
response = requests.get(url)
response.raise_for_status() # Raises an HTTPError if the response was unsuccessful
data = response.json()
poster_path = data.get('poster_path') # Use .get() to avoid KeyError if 'poster_path' is missing
if poster_path:
full_path = f"https://image.tmdb.org/t/p/w500/{poster_path}"
return full_path
else:
st.warning(f"No poster found for movie ID: {movie_id}")
return None
except requests.exceptions.HTTPError as errh:
st.error(f"HTTP Error: {errh}")
except requests.exceptions.ConnectionError as errc:
st.error(f"Error Connecting: {errc}")
except requests.exceptions.Timeout as errt:
st.error(f"Timeout Error: {errt}")
except requests.exceptions.RequestException as err:
st.error(f"Something went wrong: {err}")
return None
def recommend(movie,movies,similarity):
index = movies[movies['title'] == movie].index[0]
distances = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])
recommended_movie_names = []
recommended_movie_posters = []
for i in distances[1:11]: # Modify the loop to iterate over 10 distances
# fetch the movie poster
movie_id = movies.iloc[i[0]].id
recommended_movie_posters.append(fetch_poster(movie_id))
recommended_movie_names.append(movies.iloc[i[0]].title)
return recommended_movie_names, recommended_movie_posters
def improved_recommendations(title,movies,similarity):
index = movies[movies['title'] == title].index
if len(index) == 0:
st.warning(f"No movie found with the title '{title}'.")
return None
idx = index[0]
sim_scores = list(enumerate(similarity[idx]))
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
sim_scores = sim_scores[1:26]
movie_indices = [i[0] for i in sim_scores]
movies_qualified = movies.iloc[movie_indices][['id','title', 'vote_count', 'vote_average', 'year']]
vote_counts = movies_qualified[movies_qualified['vote_count'].notnull()]['vote_count'].astype('int')
vote_averages = movies_qualified[movies_qualified['vote_average'].notnull()]['vote_average'].astype('int')
C = vote_averages.mean()
m = vote_counts.quantile(0.60)
def weighted_rating(x, C=C, m=m):
v = x['vote_count']
R = x['vote_average']
return (v / (v + m) * R) + (m / (m + v) * C)
qualified_movies = movies_qualified[(movies_qualified['vote_count'] >= m) & (movies_qualified['vote_count'].notnull()) & (movies_qualified['vote_average'].notnull())]
qualified_movies['vote_count'] = qualified_movies['vote_count'].astype('int')
qualified_movies['vote_average'] = qualified_movies['vote_average'].astype('int')
qualified_movies['wr'] = qualified_movies.apply(lambda x: weighted_rating(x), axis=1)
qualified_movies = qualified_movies.sort_values('wr', ascending=False).head(10)
return qualified_movies