Movie-Me-Now/app.py
2025-04-02 13:49:04 -05:00

182 lines
7.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from flask import Flask, request, render_template, redirect, url_for, session
import json
import numpy as np
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
app = Flask(__name__)
app.secret_key = 'your_secret_key_here' # Replace with a secure key in production
# Load movies from top_movies.json with UTF-8 encoding
with open('top_movies.json', 'r', encoding='utf-8') as f:
movies = json.load(f)
# Assign a unique ID and preprocess features for each movie
for i, movie in enumerate(movies):
movie['id'] = i # Unique id for each movie
# Combine genres and tags into a feature string (could add description etc.)
movie['features'] = ' '.join(movie.get('genres', [])) + ' ' + ' '.join(movie.get('tags', []))
# Ensure numeric values for year and runtime if possible:
try:
movie['year_num'] = int(movie.get('year', '0'))
except:
movie['year_num'] = 0
try:
# runtime might be a number already or a string; if string, try to convert.
movie['runtime_num'] = float(movie.get('runtime')) if movie.get('runtime') else 0
except:
movie['runtime_num'] = 0
# Build the TFIDF vectorizer on movie features.
vectorizer = TfidfVectorizer(stop_words='english')
movie_features = [movie['features'] for movie in movies]
movie_vectors = vectorizer.fit_transform(movie_features)
# Precompute overall ranges for numeric features across the dataset.
years = [m['year_num'] for m in movies if m['year_num'] > 0]
runtimes = [m['runtime_num'] for m in movies if m['runtime_num'] > 0]
min_year, max_year = (min(years), max(years)) if years else (0, 1)
min_runtime, max_runtime = (min(runtimes), max(runtimes)) if runtimes else (0, 1)
year_range = max_year - min_year if max_year != min_year else 1
runtime_range = max_runtime - min_runtime if max_runtime != min_runtime else 1
def get_diverse_movies(num=10):
"""
Pick up to `num` movies that have not been shown yet, trying to cover different genres.
"""
asked = session.get('asked_movies', [])
available = [m for m in movies if m['id'] not in asked]
if not available:
return []
selected = []
# List of desired genres to cover
desired_genres = ["Action", "Adventure", "Comedy", "Drama", "Horror",
"Romance", "Sci-Fi", "Thriller", "Animation", "Documentary"]
# Try to pick one movie per desired genre.
for genre in desired_genres:
for m in available:
if genre in m.get('genres', []) and m not in selected:
selected.append(m)
break
if len(selected) >= num:
break
# If we still need more movies, fill the remainder randomly.
if len(selected) < num:
remaining = [m for m in available if m not in selected]
random.shuffle(remaining)
selected.extend(remaining[:(num - len(selected))])
return selected[:num]
def enough_info():
"""
Determines whether we have collected enough ratings.
In this example, we require that the user has given a 'like' or 'dislike'
to at least 3 movies.
"""
rated = session.get('rated_movies', {})
count = sum(1 for rating in rated.values() if rating in ['like', 'dislike'])
return count >= 3
@app.route('/')
def home():
# Initialize session variables
session.setdefault('rated_movies', {}) # {movie_id: rating}
session.setdefault('asked_movies', []) # list of movie ids already asked
return redirect(url_for('questionnaire'))
@app.route('/questionnaire', methods=['GET', 'POST'])
def questionnaire():
if request.method == 'POST':
# Process ratings from the current round.
current_ids = request.form.getlist("movie_id")
for movie_id in current_ids:
rating = request.form.get(f"rating_{movie_id}")
session['rated_movies'][movie_id] = rating
if int(movie_id) not in session['asked_movies']:
session['asked_movies'].append(int(movie_id))
remaining = [m for m in movies if m['id'] not in session['asked_movies']]
if enough_info() or not remaining:
return redirect(url_for('recommend'))
else:
return redirect(url_for('questionnaire'))
else:
selected_movies = get_diverse_movies(num=10)
if not selected_movies:
return redirect(url_for('recommend'))
return render_template('questionnaire.html', movies=selected_movies)
def advanced_recommendations():
"""
Build an advanced recommendation score for movies not rated by the user.
Combines:
1. Text similarity (from TF-IDF features on genres/tags).
2. Year similarity: movies with similar release years to liked movies.
3. Runtime similarity: movies with similar runtime to liked movies.
The final score is a weighted sum of these signals.
"""
rated = session.get('rated_movies', {})
liked_ids = [int(mid) for mid, rating in rated.items() if rating == 'like']
disliked_ids = [int(mid) for mid, rating in rated.items() if rating == 'dislike']
# Build text profiles for liked/disliked movies.
if liked_ids:
liked_profile = np.asarray(movie_vectors[liked_ids].mean(axis=0))
else:
liked_profile = np.zeros((1, movie_vectors.shape[1]))
if disliked_ids:
disliked_profile = np.asarray(movie_vectors[disliked_ids].mean(axis=0))
else:
disliked_profile = np.zeros((1, movie_vectors.shape[1]))
# Compute numeric averages for liked movies (for year and runtime).
liked_years = [movies[i]['year_num'] for i in liked_ids if movies[i]['year_num'] > 0]
liked_runtimes = [movies[i]['runtime_num'] for i in liked_ids if movies[i]['runtime_num'] > 0]
avg_year = np.mean(liked_years) if liked_years else None
avg_runtime = np.mean(liked_runtimes) if liked_runtimes else None
recommendations = []
# Weights for each component adjust these to tune the algorithm.
w_text = 0.70
w_year = 0.15
w_runtime = 0.15
for i, movie in enumerate(movies):
movie_id = str(movie['id'])
if rated.get(movie_id, "not seen") != "not seen":
continue # Skip movies already rated.
# TEXT SIMILARITY: difference between similarity to liked and disliked profiles.
movie_vector = movie_vectors[i].toarray()
like_sim = cosine_similarity(movie_vector, liked_profile)[0][0] if np.linalg.norm(liked_profile) != 0 else 0
dislike_sim = cosine_similarity(movie_vector, disliked_profile)[0][0] if np.linalg.norm(disliked_profile) != 0 else 0
text_score = like_sim - dislike_sim
# NUMERIC SIMILARITY for Year.
year_score = 0
if avg_year is not None and movie['year_num'] > 0:
diff_year = abs(movie['year_num'] - avg_year)
year_score = 1 - (diff_year / year_range) # normalized similarity (1 means identical)
# NUMERIC SIMILARITY for Runtime.
runtime_score = 0
if avg_runtime is not None and movie['runtime_num'] > 0:
diff_runtime = abs(movie['runtime_num'] - avg_runtime)
runtime_score = 1 - (diff_runtime / runtime_range)
# Final combined score.
final_score = w_text * text_score + w_year * year_score + w_runtime * runtime_score
recommendations.append((movie, final_score))
# Sort recommendations by final score in descending order.
recommendations.sort(key=lambda x: x[1], reverse=True)
return recommendations
@app.route('/recommend')
def recommend():
recommendations = advanced_recommendations()
return render_template('recommendations.html', recommendations=recommendations)
if __name__ == '__main__':
app.run(debug=True)