-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added the app for letterboxd scraper and dashboard
- Loading branch information
1 parent
02ceb9d
commit 6a41554
Showing
4 changed files
with
257 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import streamlit as st | ||
st.set_page_config(page_title="Letterboxd Film Tracker", page_icon=":chart_with_upwards_trend:", layout="wide") | ||
|
||
# css for the page with background color #1A232C | ||
# pallete :#1A232C #FF8100 #FFFFFF #3EBDF4 #00E153 | ||
page_css = """ | ||
<style> | ||
[data-testid="stHeader"]{ | ||
background-color: #1A232C; | ||
color: #FFFFFF; | ||
} | ||
[data-testid="stMainBlockContainer"]{ | ||
background-color: #1A232C;\ | ||
color: #FFFFFF; | ||
} | ||
[data-testid="stMain"]{ | ||
background-color: #1A232C;\ | ||
</style> | ||
""" | ||
|
||
st.markdown(page_css, unsafe_allow_html=True) | ||
|
||
# Set padding for the page | ||
padding_top = 2 | ||
# Set page title and layout | ||
|
||
st.title('Letterboxd Film Tracker') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
import streamlit as st | ||
import pandas as pd | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
import connection_mongo | ||
import film as scraper | ||
|
||
# Establish MongoDB connection | ||
client = connection_mongo.connect_to_mongo("-", "-") | ||
db = client.get_database("Letterboxd") | ||
|
||
# Set page title | ||
st.set_page_config(page_title="Letterboxd Film Tracker", page_icon=":chart_with_upwards_trend:") | ||
|
||
# Main title | ||
st.title('Letterboxd Film Tracker') | ||
|
||
# Main content | ||
st.header('Movie Stats') | ||
|
||
# Text input (default empty) and store the movie in session state | ||
if 'movie' not in st.session_state: | ||
st.session_state['movie'] = '' | ||
|
||
user_input = st.text_input('Enter the movie you want:', st.session_state['movie']) | ||
|
||
# Button to change the selected movie and store it in session state | ||
if st.button('Click to change'): | ||
st.session_state['movie'] = user_input | ||
st.write(f'Selected film: {st.session_state["movie"]}') | ||
collection = db[st.session_state['movie']] | ||
|
||
# Only proceed if a movie has been selected | ||
if st.session_state['movie']: | ||
collection = db[st.session_state['movie']] | ||
film = scraper.Film() | ||
film.set_film_name(st.session_state['movie']) | ||
film_poster = film.scrape_film_poster(film.filmMainSoup, film.filmName) | ||
|
||
|
||
image_url = film_poster # Replace with your image URL | ||
st.markdown( | ||
f'<div style="text-align:center;"><img src="{image_url}" alt="Movie Image" width="300"></div>', | ||
unsafe_allow_html=True | ||
) | ||
# Text input for number of reviews | ||
n = st.text_input("Number of recent reviews to show", "10") | ||
n = int(n) | ||
|
||
# Checkbox to display reviews | ||
if st.checkbox(f'Show last {n} reviews'): | ||
data = list( | ||
collection.find({'rating': {'$exists': True}}) | ||
.sort([('$natural', -1)]) | ||
.limit(n) | ||
) | ||
result = [] | ||
for x in data: | ||
result.append([x["username"], x["rating"], x["review_text"], x["date"]]) | ||
st.dataframe(result) | ||
|
||
# Get dates from the database | ||
dates = collection.distinct('date', {'rating': {'$exists': True}}) | ||
dates = [x for x in dates if x != ""] | ||
|
||
# Manage the selected date using session state | ||
if 'selected_date' not in st.session_state: | ||
st.session_state['selected_date'] = None | ||
|
||
if dates: | ||
st.session_state['selected_date'] = st.selectbox('Select a date', options=dates, index=0) | ||
|
||
# Plot ratings distribution for the selected date | ||
ratings = list(collection.aggregate([ | ||
# Filter documents based on the given date and check if rating exists | ||
{'$match': {'date': st.session_state['selected_date'], 'rating': {'$exists': True}}}, | ||
|
||
# Group by rating and count the occurrences of each rating | ||
{'$group': {'_id': '$rating', 'count': {'$sum': 1}}} | ||
])) | ||
|
||
# Convert the result to a dictionary where key is rating and value is the count | ||
ratings_dict = {item['_id']: item['count'] for item in ratings} | ||
ratings_dict = dict(sorted(ratings_dict.items())) | ||
|
||
if ratings_dict: # Proceed if there are ratings to show | ||
ratings = list(ratings_dict.keys()) | ||
counts = list(ratings_dict.values()) | ||
|
||
# Create the horizontal bar plot | ||
st.subheader(f'Plot for {collection.name} on {st.session_state["selected_date"]}') | ||
st.write("The movie may have few scraped reviews, so the plot may not be accurate.") | ||
fig, ax = plt.subplots() # Create a figure and axis | ||
ax.barh(ratings, counts) # Create horizontal bar chart | ||
|
||
# Add labels and title | ||
ax.set_xlabel('Count') | ||
ax.set_ylabel('Rating') | ||
ax.set_title('Ratings Distribution') | ||
|
||
# Display the plot in Streamlit | ||
st.pyplot(fig) | ||
else: | ||
st.write("No ratings data available for the selected date.") | ||
else: | ||
st.write("No dates available for the selected movie.") | ||
else: | ||
st.write("Please enter a movie and click 'Click to change' to see the data.") | ||
|
||
# Footer | ||
st.markdown("---") | ||
|
||
# Transform footer into two columns | ||
col1, col2 = st.columns(2) | ||
col1.write("Canto Arcona Alexis") | ||
col1.write("Castro Echeverria Samantha") | ||
|
||
col2.write("Cumi Llanez Christopher") | ||
col2.write("Fernandez Cruz Juan") | ||
col2.write("Ramayo Cardoso Juliana") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
import streamlit as st | ||
import connection_mongo | ||
import film as scraper | ||
|
||
# Set page title | ||
st.title('Letterboxd Scraper') | ||
|
||
# Establish MongoDB connection | ||
client = connection_mongo.connect_to_mongo("-", "-") | ||
db = client.get_database("Letterboxd") | ||
|
||
# Initialize session state for movie, scraping status, and scrape amount if not already done | ||
if 'movie' not in st.session_state: | ||
st.session_state['movie'] = '' | ||
if 'scrape_status' not in st.session_state: | ||
st.session_state['scrape_status'] = 'No movie scraped yet.' | ||
if 'scrape_amount' not in st.session_state: | ||
st.session_state['scrape_amount'] = '' | ||
if 'film_data' not in st.session_state: | ||
st.session_state['film_data'] = {} | ||
|
||
# User input for movie name | ||
user_input = st.text_input('Enter the movie you want to scrape:', st.session_state['movie']) | ||
st.session_state['movie'] = user_input # Save movie to session state | ||
|
||
|
||
|
||
# Button to scrape the movie | ||
if st.button('Click to scrape'): | ||
if user_input: | ||
film = scraper.Film() | ||
film.set_film_name(st.session_state['movie']) | ||
|
||
# Scrape movie poster | ||
film_poster = film.scrape_film_poster(film.filmMainSoup, film.filmName) | ||
|
||
# Save film data to session state | ||
st.session_state['film_data'] = { | ||
'name': film.filmName, | ||
'year': film.filmReleaseYear, | ||
'directors': film.filmDirectors["Directors"], | ||
'rating': film.filmAverageRating, | ||
'poster': film_poster | ||
} | ||
|
||
# Update scraping status | ||
st.session_state['scrape_status'] = f'Successfully scraped {film.filmName}' | ||
|
||
# Display movie information if it exists | ||
if st.session_state['film_data']: | ||
st.write(f"{st.session_state['film_data']['name']} | {st.session_state['film_data']['year']}") | ||
st.write(f'Directed by: {st.session_state["film_data"]["directors"]}') | ||
st.write(f'Rating: {st.session_state["film_data"]["rating"]:.1f} / 10') | ||
st.write(f'https://letterboxd.com/film/{st.session_state["film_data"]["name"]}/') | ||
|
||
# Display the scraped image | ||
image_url = st.session_state['film_data']['poster'] | ||
st.markdown(f'<br><div style="text-align:center;"><img src="{image_url}" alt="Movie Image" width="300"></div><br>', unsafe_allow_html=True) | ||
|
||
# Text input for scrape amount | ||
st.write("If the movie is not popular recently, it is recommended to scrape more reviews at once, as if the scraper is run again, it will probably scrape the same reviews.") | ||
scrape_amount_input = st.text_input("Number of recent reviews to scrape:", st.session_state['scrape_amount']) | ||
st.session_state['scrape_amount'] = scrape_amount_input # Save scrape amount to session state | ||
|
||
# Button to scrape reviews | ||
if st.button('Scrape Reviews'): | ||
if st.session_state['scrape_amount'] and st.session_state['film_data']: | ||
try: | ||
scrape_amount = int(st.session_state['scrape_amount']) // 12 + (int(st.session_state['scrape_amount']) % 12 > 0) | ||
st.write(f"Scraping {scrape_amount * 12} recent reviews.") | ||
|
||
# Scrape reviews | ||
reviews = scraper.Film.FilmReview(st.session_state['film_data']['name']) | ||
reviews.get_film_reviews(scrape_amount) | ||
data = reviews.filmReviews | ||
collection = db[st.session_state['movie']] | ||
|
||
for review in data: | ||
if not collection.find_one({"review_id": review['review_id']}): | ||
collection.insert_one(review) | ||
print(f"Review with ID {review['review_id']} inserted into the database.") | ||
else: | ||
print(f"Review with ID {review['review_id']} already exists in the database.") | ||
|
||
# Update scraping status | ||
st.session_state['scrape_status'] = f'Successfully scraped {len(data)} reviews for {st.session_state["film_data"]["name"]}.' | ||
|
||
|
||
except ValueError: | ||
st.error("Please enter a valid number for scraping reviews.") | ||
else: | ||
st.warning("Please scrape a movie first before scraping reviews.") | ||
|
||
# If scraping was done, display the last scraped movie | ||
if 'scrape_status' in st.session_state and st.session_state['scrape_status'] != 'No movie scraped yet.': | ||
st.write(f"Last scraped movie: {st.session_state['movie']}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters