Starting on Predictoin Algorithm
This commit is contained in:
commit
547b161138
181
app.py
Normal file
181
app.py
Normal file
@ -0,0 +1,181 @@
|
|||||||
|
from flask import Flask, request, render_template, redirect, url_for, session
|
||||||
|
import json
|
||||||
|
import numpy as np
|
||||||
|
import random
|
||||||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
from sklearn.metrics.pairwise import cosine_similarity
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
app.secret_key = 'your_secret_key_here' # Replace with a secure key in production
|
||||||
|
|
||||||
|
# Load movies from top_movies.json with UTF-8 encoding
|
||||||
|
with open('top_movies.json', 'r', encoding='utf-8') as f:
|
||||||
|
movies = json.load(f)
|
||||||
|
|
||||||
|
# Assign a unique ID and preprocess features for each movie
|
||||||
|
for i, movie in enumerate(movies):
|
||||||
|
movie['id'] = i # Unique id for each movie
|
||||||
|
# Combine genres and tags into a feature string (could add description etc.)
|
||||||
|
movie['features'] = ' '.join(movie.get('genres', [])) + ' ' + ' '.join(movie.get('tags', []))
|
||||||
|
# Ensure numeric values for year and runtime if possible:
|
||||||
|
try:
|
||||||
|
movie['year_num'] = int(movie.get('year', '0'))
|
||||||
|
except:
|
||||||
|
movie['year_num'] = 0
|
||||||
|
try:
|
||||||
|
# runtime might be a number already or a string; if string, try to convert.
|
||||||
|
movie['runtime_num'] = float(movie.get('runtime')) if movie.get('runtime') else 0
|
||||||
|
except:
|
||||||
|
movie['runtime_num'] = 0
|
||||||
|
|
||||||
|
# Build the TF‑IDF vectorizer on movie features.
|
||||||
|
vectorizer = TfidfVectorizer(stop_words='english')
|
||||||
|
movie_features = [movie['features'] for movie in movies]
|
||||||
|
movie_vectors = vectorizer.fit_transform(movie_features)
|
||||||
|
|
||||||
|
# Precompute overall ranges for numeric features across the dataset.
|
||||||
|
years = [m['year_num'] for m in movies if m['year_num'] > 0]
|
||||||
|
runtimes = [m['runtime_num'] for m in movies if m['runtime_num'] > 0]
|
||||||
|
min_year, max_year = (min(years), max(years)) if years else (0, 1)
|
||||||
|
min_runtime, max_runtime = (min(runtimes), max(runtimes)) if runtimes else (0, 1)
|
||||||
|
year_range = max_year - min_year if max_year != min_year else 1
|
||||||
|
runtime_range = max_runtime - min_runtime if max_runtime != min_runtime else 1
|
||||||
|
|
||||||
|
def get_diverse_movies(num=10):
|
||||||
|
"""
|
||||||
|
Pick up to `num` movies that have not been shown yet, trying to cover different genres.
|
||||||
|
"""
|
||||||
|
asked = session.get('asked_movies', [])
|
||||||
|
available = [m for m in movies if m['id'] not in asked]
|
||||||
|
if not available:
|
||||||
|
return []
|
||||||
|
selected = []
|
||||||
|
# List of desired genres to cover
|
||||||
|
desired_genres = ["Action", "Adventure", "Comedy", "Drama", "Horror",
|
||||||
|
"Romance", "Sci-Fi", "Thriller", "Animation", "Documentary"]
|
||||||
|
# Try to pick one movie per desired genre.
|
||||||
|
for genre in desired_genres:
|
||||||
|
for m in available:
|
||||||
|
if genre in m.get('genres', []) and m not in selected:
|
||||||
|
selected.append(m)
|
||||||
|
break
|
||||||
|
if len(selected) >= num:
|
||||||
|
break
|
||||||
|
# If we still need more movies, fill the remainder randomly.
|
||||||
|
if len(selected) < num:
|
||||||
|
remaining = [m for m in available if m not in selected]
|
||||||
|
random.shuffle(remaining)
|
||||||
|
selected.extend(remaining[:(num - len(selected))])
|
||||||
|
return selected[:num]
|
||||||
|
|
||||||
|
def enough_info():
|
||||||
|
"""
|
||||||
|
Determines whether we have collected enough ratings.
|
||||||
|
In this example, we require that the user has given a 'like' or 'dislike'
|
||||||
|
to at least 3 movies.
|
||||||
|
"""
|
||||||
|
rated = session.get('rated_movies', {})
|
||||||
|
count = sum(1 for rating in rated.values() if rating in ['like', 'dislike'])
|
||||||
|
return count >= 3
|
||||||
|
|
||||||
|
@app.route('/')
|
||||||
|
def home():
|
||||||
|
# Initialize session variables
|
||||||
|
session.setdefault('rated_movies', {}) # {movie_id: rating}
|
||||||
|
session.setdefault('asked_movies', []) # list of movie ids already asked
|
||||||
|
return redirect(url_for('questionnaire'))
|
||||||
|
|
||||||
|
@app.route('/questionnaire', methods=['GET', 'POST'])
|
||||||
|
def questionnaire():
|
||||||
|
if request.method == 'POST':
|
||||||
|
# Process ratings from the current round.
|
||||||
|
current_ids = request.form.getlist("movie_id")
|
||||||
|
for movie_id in current_ids:
|
||||||
|
rating = request.form.get(f"rating_{movie_id}")
|
||||||
|
session['rated_movies'][movie_id] = rating
|
||||||
|
if int(movie_id) not in session['asked_movies']:
|
||||||
|
session['asked_movies'].append(int(movie_id))
|
||||||
|
remaining = [m for m in movies if m['id'] not in session['asked_movies']]
|
||||||
|
if enough_info() or not remaining:
|
||||||
|
return redirect(url_for('recommend'))
|
||||||
|
else:
|
||||||
|
return redirect(url_for('questionnaire'))
|
||||||
|
else:
|
||||||
|
selected_movies = get_diverse_movies(num=10)
|
||||||
|
if not selected_movies:
|
||||||
|
return redirect(url_for('recommend'))
|
||||||
|
return render_template('questionnaire.html', movies=selected_movies)
|
||||||
|
|
||||||
|
def advanced_recommendations():
|
||||||
|
"""
|
||||||
|
Build an advanced recommendation score for movies not rated by the user.
|
||||||
|
Combines:
|
||||||
|
1. Text similarity (from TF-IDF features on genres/tags).
|
||||||
|
2. Year similarity: movies with similar release years to liked movies.
|
||||||
|
3. Runtime similarity: movies with similar runtime to liked movies.
|
||||||
|
The final score is a weighted sum of these signals.
|
||||||
|
"""
|
||||||
|
rated = session.get('rated_movies', {})
|
||||||
|
liked_ids = [int(mid) for mid, rating in rated.items() if rating == 'like']
|
||||||
|
disliked_ids = [int(mid) for mid, rating in rated.items() if rating == 'dislike']
|
||||||
|
|
||||||
|
# Build text profiles for liked/disliked movies.
|
||||||
|
if liked_ids:
|
||||||
|
liked_profile = np.asarray(movie_vectors[liked_ids].mean(axis=0))
|
||||||
|
else:
|
||||||
|
liked_profile = np.zeros((1, movie_vectors.shape[1]))
|
||||||
|
if disliked_ids:
|
||||||
|
disliked_profile = np.asarray(movie_vectors[disliked_ids].mean(axis=0))
|
||||||
|
else:
|
||||||
|
disliked_profile = np.zeros((1, movie_vectors.shape[1]))
|
||||||
|
|
||||||
|
# Compute numeric averages for liked movies (for year and runtime).
|
||||||
|
liked_years = [movies[i]['year_num'] for i in liked_ids if movies[i]['year_num'] > 0]
|
||||||
|
liked_runtimes = [movies[i]['runtime_num'] for i in liked_ids if movies[i]['runtime_num'] > 0]
|
||||||
|
avg_year = np.mean(liked_years) if liked_years else None
|
||||||
|
avg_runtime = np.mean(liked_runtimes) if liked_runtimes else None
|
||||||
|
|
||||||
|
recommendations = []
|
||||||
|
# Weights for each component – adjust these to tune the algorithm.
|
||||||
|
w_text = 0.70
|
||||||
|
w_year = 0.15
|
||||||
|
w_runtime = 0.15
|
||||||
|
|
||||||
|
for i, movie in enumerate(movies):
|
||||||
|
movie_id = str(movie['id'])
|
||||||
|
if rated.get(movie_id, "not seen") != "not seen":
|
||||||
|
continue # Skip movies already rated.
|
||||||
|
|
||||||
|
# TEXT SIMILARITY: difference between similarity to liked and disliked profiles.
|
||||||
|
movie_vector = movie_vectors[i].toarray()
|
||||||
|
like_sim = cosine_similarity(movie_vector, liked_profile)[0][0] if np.linalg.norm(liked_profile) != 0 else 0
|
||||||
|
dislike_sim = cosine_similarity(movie_vector, disliked_profile)[0][0] if np.linalg.norm(disliked_profile) != 0 else 0
|
||||||
|
text_score = like_sim - dislike_sim
|
||||||
|
|
||||||
|
# NUMERIC SIMILARITY for Year.
|
||||||
|
year_score = 0
|
||||||
|
if avg_year is not None and movie['year_num'] > 0:
|
||||||
|
diff_year = abs(movie['year_num'] - avg_year)
|
||||||
|
year_score = 1 - (diff_year / year_range) # normalized similarity (1 means identical)
|
||||||
|
|
||||||
|
# NUMERIC SIMILARITY for Runtime.
|
||||||
|
runtime_score = 0
|
||||||
|
if avg_runtime is not None and movie['runtime_num'] > 0:
|
||||||
|
diff_runtime = abs(movie['runtime_num'] - avg_runtime)
|
||||||
|
runtime_score = 1 - (diff_runtime / runtime_range)
|
||||||
|
|
||||||
|
# Final combined score.
|
||||||
|
final_score = w_text * text_score + w_year * year_score + w_runtime * runtime_score
|
||||||
|
recommendations.append((movie, final_score))
|
||||||
|
|
||||||
|
# Sort recommendations by final score in descending order.
|
||||||
|
recommendations.sort(key=lambda x: x[1], reverse=True)
|
||||||
|
return recommendations
|
||||||
|
|
||||||
|
@app.route('/recommend')
|
||||||
|
def recommend():
|
||||||
|
recommendations = advanced_recommendations()
|
||||||
|
return render_template('recommendations.html', recommendations=recommendations)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(debug=True)
|
77
templates/index.html
Normal file
77
templates/index.html
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Movie Slideshow</title>
|
||||||
|
<style>
|
||||||
|
/* Basic styling for slideshow */
|
||||||
|
#movie-container {
|
||||||
|
text-align: center;
|
||||||
|
margin-top: 30px;
|
||||||
|
}
|
||||||
|
#movie-poster {
|
||||||
|
width: 200px;
|
||||||
|
margin: 20px;
|
||||||
|
}
|
||||||
|
.rating-buttons button {
|
||||||
|
margin: 10px;
|
||||||
|
padding: 10px 20px;
|
||||||
|
font-size: 16px;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1 style="text-align: center;">Rate Movies</h1>
|
||||||
|
<form id="ratingForm" method="POST" action="/recommend">
|
||||||
|
<!-- Hidden inputs for movie ratings; one per movie -->
|
||||||
|
{% for movie in movies %}
|
||||||
|
<input type="hidden" name="{{ movie.title }}" id="rating-{{ loop.index0 }}" value="not seen">
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
<div id="movie-container">
|
||||||
|
<img id="movie-poster" src="" alt="Movie Poster">
|
||||||
|
<h2 id="movie-title"></h2>
|
||||||
|
<p id="movie-description"></p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="rating-buttons" style="text-align: center;">
|
||||||
|
<button type="button" onclick="recordRating('like')">Like</button>
|
||||||
|
<button type="button" onclick="recordRating('dislike')">Dislike</button>
|
||||||
|
<button type="button" onclick="recordRating('not seen')">Not Seen</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const movies = {{ movies | tojson }};
|
||||||
|
let currentIndex = 0;
|
||||||
|
const posterEl = document.getElementById("movie-poster");
|
||||||
|
const titleEl = document.getElementById("movie-title");
|
||||||
|
const descriptionEl = document.getElementById("movie-description");
|
||||||
|
|
||||||
|
// Function to display the movie at the given index
|
||||||
|
function showMovie(index) {
|
||||||
|
if (index >= movies.length) {
|
||||||
|
// All movies rated; submit the form
|
||||||
|
document.getElementById("ratingForm").submit();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const movie = movies[index];
|
||||||
|
posterEl.src = movie.poster;
|
||||||
|
posterEl.alt = movie.title;
|
||||||
|
titleEl.textContent = movie.title + " (" + movie.year + ")";
|
||||||
|
descriptionEl.textContent = movie.description;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Record the rating for the current movie and show the next one
|
||||||
|
function recordRating(rating) {
|
||||||
|
// Update the hidden input for the current movie with the chosen rating
|
||||||
|
document.getElementById("rating-" + currentIndex).value = rating;
|
||||||
|
currentIndex++;
|
||||||
|
showMovie(currentIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize the slideshow with the first movie
|
||||||
|
showMovie(currentIndex);
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
83
templates/questionnaire.html
Normal file
83
templates/questionnaire.html
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Movie Questionnaire</title>
|
||||||
|
<style>
|
||||||
|
/* Styling for the slideshow */
|
||||||
|
#movie-container {
|
||||||
|
text-align: center;
|
||||||
|
margin-top: 30px;
|
||||||
|
}
|
||||||
|
#movie-poster {
|
||||||
|
width: 200px;
|
||||||
|
margin: 20px;
|
||||||
|
}
|
||||||
|
.rating-buttons button {
|
||||||
|
margin: 10px;
|
||||||
|
padding: 10px 20px;
|
||||||
|
font-size: 16px;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1 style="text-align: center;">Rate Movies</h1>
|
||||||
|
<form id="questionForm" method="POST">
|
||||||
|
<!-- Container where hidden inputs will be added for the round -->
|
||||||
|
<div id="hiddenFields"></div>
|
||||||
|
<div id="movie-container">
|
||||||
|
<img id="movie-poster" src="" alt="Movie Poster">
|
||||||
|
<h2 id="movie-title"></h2>
|
||||||
|
<p id="movie-description"></p>
|
||||||
|
</div>
|
||||||
|
<div class="rating-buttons" style="text-align: center;">
|
||||||
|
<button type="button" onclick="recordRating('like')">Like</button>
|
||||||
|
<button type="button" onclick="recordRating('dislike')">Dislike</button>
|
||||||
|
<button type="button" onclick="recordRating('not seen')">Not Seen</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
// Movies for the current round are passed from the server.
|
||||||
|
const movies = {{ movies | tojson }};
|
||||||
|
let currentIndex = 0;
|
||||||
|
let movieRatings = {}; // To store ratings for this batch
|
||||||
|
|
||||||
|
function showMovie(index) {
|
||||||
|
if (index >= movies.length) {
|
||||||
|
// All movies rated in this round—append hidden fields and submit the form.
|
||||||
|
const container = document.getElementById("hiddenFields");
|
||||||
|
movies.forEach(movie => {
|
||||||
|
// Hidden input for movie id
|
||||||
|
const movieIdInput = document.createElement("input");
|
||||||
|
movieIdInput.type = "hidden";
|
||||||
|
movieIdInput.name = "movie_id";
|
||||||
|
movieIdInput.value = movie.id;
|
||||||
|
container.appendChild(movieIdInput);
|
||||||
|
// Hidden input for its rating
|
||||||
|
const ratingInput = document.createElement("input");
|
||||||
|
ratingInput.type = "hidden";
|
||||||
|
ratingInput.name = "rating_" + movie.id;
|
||||||
|
ratingInput.value = movieRatings[movie.id] || "not seen";
|
||||||
|
container.appendChild(ratingInput);
|
||||||
|
});
|
||||||
|
document.getElementById("questionForm").submit();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const movie = movies[currentIndex];
|
||||||
|
document.getElementById("movie-poster").src = movie.poster;
|
||||||
|
document.getElementById("movie-poster").alt = movie.title;
|
||||||
|
document.getElementById("movie-title").textContent = movie.title + " (" + movie.year + ")";
|
||||||
|
document.getElementById("movie-description").textContent = movie.description;
|
||||||
|
}
|
||||||
|
|
||||||
|
function recordRating(rating) {
|
||||||
|
movieRatings[movies[currentIndex].id] = rating;
|
||||||
|
currentIndex++;
|
||||||
|
showMovie(currentIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
showMovie(currentIndex);
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
21
templates/recommendations.html
Normal file
21
templates/recommendations.html
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Movie Recommendations</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Your Movie Recommendations</h1>
|
||||||
|
{% for movie, score in recommendations %}
|
||||||
|
<div style="margin-bottom: 20px;">
|
||||||
|
<img src="{{ movie.poster }}" alt="{{ movie.title }}" width="70" style="vertical-align: middle;" />
|
||||||
|
<strong>{{ movie.title }} ({{ movie.year }})</strong>
|
||||||
|
<p>{{ movie.description }}</p>
|
||||||
|
<a href="{{ movie.url }}" target="_blank">More Info</a>
|
||||||
|
<p>Recommendation Score: {{ score | round(3) }}</p>
|
||||||
|
</div>
|
||||||
|
<hr>
|
||||||
|
{% endfor %}
|
||||||
|
<a href="/">Back to Questionnaire</a>
|
||||||
|
</body>
|
||||||
|
</html>
|
142
test.py
Normal file
142
test.py
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
import requests
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
from tqdm import tqdm # progress bar library
|
||||||
|
|
||||||
|
# Replace with your actual TMDb API key
|
||||||
|
api_key = "96f3424d6fe55c2982e6e094416607f5"
|
||||||
|
|
||||||
|
# Output file where results are saved incrementally
|
||||||
|
output_filename = "top_movies.json"
|
||||||
|
|
||||||
|
def write_movies(movies, filename=output_filename):
|
||||||
|
"""Helper function to write the movies list to a JSON file."""
|
||||||
|
try:
|
||||||
|
with open(filename, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(movies, f, indent=4, ensure_ascii=False)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error saving data to JSON file: {e}")
|
||||||
|
|
||||||
|
def get_movie_details_tmdb(movie_id):
|
||||||
|
"""
|
||||||
|
Fetch additional details for a movie using the TMDb API.
|
||||||
|
Returns runtime and genres.
|
||||||
|
"""
|
||||||
|
details = {}
|
||||||
|
details_url = f"https://api.themoviedb.org/3/movie/{movie_id}"
|
||||||
|
params = {
|
||||||
|
"api_key": api_key,
|
||||||
|
"language": "en-US"
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
response = requests.get(details_url, params=params)
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
details["runtime"] = data.get("runtime") # runtime in minutes
|
||||||
|
details["genres"] = [g["name"] for g in data.get("genres", [])]
|
||||||
|
else:
|
||||||
|
print(f"Failed to get details for movie {movie_id}: status code {response.status_code}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Exception while fetching details for movie {movie_id}: {e}")
|
||||||
|
return details
|
||||||
|
|
||||||
|
def get_movie_keywords(movie_id):
|
||||||
|
"""
|
||||||
|
Fetch movie keywords (tags) using the TMDb API.
|
||||||
|
"""
|
||||||
|
keywords = []
|
||||||
|
keywords_url = f"https://api.themoviedb.org/3/movie/{movie_id}/keywords"
|
||||||
|
params = {
|
||||||
|
"api_key": api_key
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
response = requests.get(keywords_url, params=params)
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
keywords = [kw["name"] for kw in data.get("keywords", [])]
|
||||||
|
else:
|
||||||
|
print(f"Failed to get keywords for movie {movie_id}: status code {response.status_code}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Exception while fetching keywords for movie {movie_id}: {e}")
|
||||||
|
return keywords
|
||||||
|
|
||||||
|
def get_top_movies():
|
||||||
|
"""
|
||||||
|
Uses the TMDb API to retrieve top rated movies, then iterates through all pages.
|
||||||
|
For each movie, additional details and keywords are fetched.
|
||||||
|
After processing each page, the current movies list is saved to a JSON file.
|
||||||
|
"""
|
||||||
|
movies = []
|
||||||
|
base_url = "https://api.themoviedb.org/3/movie/top_rated"
|
||||||
|
params = {
|
||||||
|
"api_key": api_key,
|
||||||
|
"language": "en-US",
|
||||||
|
"page": 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Initial request to determine total pages.
|
||||||
|
response = requests.get(base_url, params=params)
|
||||||
|
if response.status_code != 200:
|
||||||
|
print("Failed to retrieve top rated movies")
|
||||||
|
return []
|
||||||
|
data = response.json()
|
||||||
|
total_pages = data.get("total_pages", 1)
|
||||||
|
|
||||||
|
# Loop through all pages.
|
||||||
|
for page in tqdm(range(1, total_pages + 1), desc="Scraping top rated movies"):
|
||||||
|
params["page"] = page
|
||||||
|
response = requests.get(base_url, params=params)
|
||||||
|
if response.status_code != 200:
|
||||||
|
print(f"Failed to retrieve page {page}")
|
||||||
|
continue
|
||||||
|
data = response.json()
|
||||||
|
results = data.get("results", [])
|
||||||
|
for idx, movie in enumerate(results):
|
||||||
|
# Ranking is computed by overall order.
|
||||||
|
ranking = (page - 1) * len(results) + idx + 1
|
||||||
|
movie_id = movie.get("id")
|
||||||
|
title = movie.get("title")
|
||||||
|
release_date = movie.get("release_date", "")
|
||||||
|
year = release_date.split("-")[0] if release_date else None
|
||||||
|
vote_average = movie.get("vote_average")
|
||||||
|
vote_count = movie.get("vote_count")
|
||||||
|
overview = movie.get("overview")
|
||||||
|
poster_path = movie.get("poster_path")
|
||||||
|
poster = f"https://image.tmdb.org/t/p/w500{poster_path}" if poster_path else None
|
||||||
|
tmdb_url = f"https://www.themoviedb.org/movie/{movie_id}"
|
||||||
|
|
||||||
|
# Get additional details: runtime and genres.
|
||||||
|
details = get_movie_details_tmdb(movie_id)
|
||||||
|
runtime = details.get("runtime")
|
||||||
|
genres = details.get("genres", [])
|
||||||
|
|
||||||
|
# Get keywords (tags).
|
||||||
|
tags = get_movie_keywords(movie_id)
|
||||||
|
|
||||||
|
movie_data = {
|
||||||
|
"ranking": ranking,
|
||||||
|
"title": title,
|
||||||
|
"year": year,
|
||||||
|
"runtime": runtime,
|
||||||
|
"content_rating": None, # Not available via TMDb by default.
|
||||||
|
"metascore": None, # Not applicable.
|
||||||
|
"imdb_rating": vote_average, # Using TMDb's vote average.
|
||||||
|
"vote_count": vote_count,
|
||||||
|
"description": overview,
|
||||||
|
"poster": poster,
|
||||||
|
"url": tmdb_url,
|
||||||
|
"genres": genres,
|
||||||
|
"tags": tags
|
||||||
|
}
|
||||||
|
movies.append(movie_data)
|
||||||
|
# Pause a bit between detail requests to be courteous.
|
||||||
|
time.sleep(0.2)
|
||||||
|
# After processing each page, write the current movies list to the JSON file.
|
||||||
|
write_movies(movies)
|
||||||
|
# Pause between pages.
|
||||||
|
time.sleep(0.5)
|
||||||
|
return movies
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
top_movies = get_top_movies()
|
||||||
|
print(f"\nData saved to {output_filename}")
|
4318
top_movies.json
Normal file
4318
top_movies.json
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user