-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhome.py
238 lines (195 loc) · 11 KB
/
home.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
import streamlit as st
import pandas as pd
import numpy as np
import re
st.set_page_config(layout="wide")
steam_path = "./data/total_steam_titles.xlsx"
roblox_path = "./data/total_roblox_experiences.xlsx"
steam_charts_path = "./data/total_steam_charts.xlsx"
total_steam_charts = pd.read_excel(steam_charts_path, sheet_name=0)
steam_data = pd.read_excel(steam_path, sheet_name=0)
steam_sheet_names = pd.ExcelFile(steam_path).sheet_names
steam_data.rename(columns={'Indie Budget Dev': 'Indie Budget Dev (Found Online)'}, inplace=True)
curr_roblox_data = pd.read_excel(roblox_path, sheet_name=0, skiprows=1)
prev_roblox_data = pd.read_excel(roblox_path, sheet_name=1, skiprows=1)
roblox_sheet_names = pd.ExcelFile(roblox_path).sheet_names
# Streamlit Titel
st.title("Weekly Roblox and Steam Trends")
curr_tuesday = pd.to_datetime(steam_sheet_names[0], yearfirst=True)
# orubt*
# print(curr_tuesday)
last_tuesday = curr_tuesday - pd.DateOffset(weeks=1)
one_year_ago = curr_tuesday - pd.DateOffset(years=1)
# print("Curr_Tuesday")
# print(curr_tuesday)
# print("Last_Tuesday")
# print(last_tuesday)
st.info(body = f"🎮 Steam Data scraped on {steam_sheet_names[0]} from Global Top 100 Sellers. \n 🏠 Roblox Data scraped on {roblox_sheet_names[0]} from Romonitor Top 50 Experiences. \n ")
# st.warning(body="💡 Note: The next iteration will add estimated revenue/copies sold.")
st.warning(body="💡 Note: Estimated Revneue From Game Sales is derrived from unqiue game owners * game prices")
def filter_greater_than(value):
try:
if int(value):
# change this value for climbers in Game Weekly Change
return int(value) if int(value) >= 15 else np.nan
except:
return np.nan
# def regex_number(string):
# numbers = re.findall(r'\d+', string)
# return numbers[0] if numbers else np.nan
def regex_number(value):
# Ensure value is a string and non-null
if isinstance(value, str):
numbers = re.findall(r'\d+', value)
return numbers[0] if numbers else np.nan # Return the first match if found, else NaN
return np.nan # Return NaN if the value was not a string
# Ensure 'Game Price USD' column is consistently numeric, converting 'Free to Play' to 0 or NaN
steam_data['Game Price USD'] = pd.to_numeric(steam_data['Game Price USD'], errors='coerce').fillna(0)
steam_data["Game Ranking"] = list(steam_data.index+1)
steam_data['Game Release Date'] = pd.to_datetime(steam_data['Game Release Date'])
reordered_columns =['Game Ranking',
'Game',
'Indie',
'Indie Budget Dev (Found Online)',
'Game Developer',
'Game Publisher',
'Game Price USD',
'Game Release Date',
'Game Genre',
'Game Recent Reviews',
'Game Total Reviews',
'Game Description',
'Game Weekly Change',
'Number of Appearances in Weekly Top 100',
'Game Steam Link',
'Estimated Revenue From Game Sales']
steam_drop_columns = ['Game Genre',
'Game Recent Reviews',
'Game Total Reviews',
'Game Weekly Change',
'Number of Appearances in Weekly Top 100']
# data processing steam
steam_data = steam_data[reordered_columns]
new_released = steam_data[(steam_data['Game Release Date'] <= curr_tuesday) & (steam_data['Game Release Date'] >= last_tuesday)].set_index(["Game Ranking"])
print(new_released["Game Release Date"])
print(new_released)
print("Curr_Tuesday")
print(curr_tuesday)
print("Last_Tuesday")
print(last_tuesday)
new_entrants = steam_data[(steam_data["Number of Appearances in Weekly Top 100"] == 1) & (steam_data["Game Weekly Change"] == "NEW")].set_index(["Game Ranking"])
steam_data['Climber Filtered'] = steam_data['Game Weekly Change'].apply(filter_greater_than)
climbers = steam_data.dropna(subset=['Climber Filtered']).set_index(["Game Ranking"])
new_released["Game Release Date"] = new_released["Game Release Date"].dt.date
new_entrants["Game Release Date"] = new_entrants["Game Release Date"].dt.date
climbers["Game Release Date"] = climbers["Game Release Date"].dt.date
new_released = new_released.drop(columns=steam_drop_columns)
new_entrants = new_entrants.drop(columns=steam_drop_columns)
climbers = climbers.drop(columns=steam_drop_columns)
########################################## TOP 10 STEAM GAMES BY 24H PEAK PLAYERS ##########################################
total_steam_charts = (
total_steam_charts.head(10) # Select top 10 rows
.drop(total_steam_charts.columns[:2], axis=1) # Drop first two columns
.sort_values(by='Peak Players', ascending=False) # Sort by 'Peak Players' descending
.reset_index(drop=True) # Reset index
)
total_steam_charts.index += 1 # Make index 1-based
# Reorder columns: move the third column to the first position
columns = total_steam_charts.columns.to_list()
columns = [columns[2], columns[0], columns[1]] + columns[3:]
# Reorder columns: move 'Game Name' to the first position
columns = ['Game Name'] + [col for col in columns if col != 'Game Name']
top_10_steam_games_by_24H_peak = total_steam_charts[columns]
###################################################################################
if not total_steam_charts.empty:
st.subheader("Top 10 Steam Games by 24H Peak Players")
st.table(top_10_steam_games_by_24H_peak.style.format({"Peak Players": "{:,.0f}", "Current Players": "{:,.0f}", "Hours Played": "{:,.0f}"}))
else:
st.info("No Steam Charts available.")
if not new_released.empty:
st.subheader("Titles in Global Top 100 Sellers Released in the Past Week")
st.dataframe(new_released.style.format({"Game Price USD": "{:,.2f}", "Estimated Revenue From Game Sales": "{:.1f}M"}))
else:
st.info(f"There were no new titles on the top 100 released within the past week")
if not new_entrants.empty:
st.subheader("First Time Entrant Titles in Global Top 100 Sellers")
st.dataframe(new_entrants.style.format({"Game Price USD": "{:,.2f}", "Estimated Revenue From Game Sales": "{:.1f}M"}))
else:
st.info(f"There were no new entrants on the top 100 charts within the last week")
if not climbers.empty:
st.subheader("Titles Climbed >15 Ranks Since Last Week on Global Top 100 Sellers")
st.dataframe(climbers.style.format({"Game Price USD": "{:,.2f}", "Climber Filtered": "{:.0f}", "Estimated Revenue From Game Sales": "{:.1f}M"}))
else:
st.info(f"There were no titles that climbed >15 ranks on top 100 charts for the week of {curr_tuesday} and {last_tuesday}")
# roblox data cleaning
curr_roblox_data["Rating"] = curr_roblox_data["Rating"]*100
# # Read previous week's data
# try:
# prev_roblox_data = pd.read_excel(roblox_path, sheet_name=1, skiprows=1)
# except:
# prev_roblox_data = pd.DataFrame()
# def normalize_name(name):
# # Remove content within brackets
# name = re.sub(r'\[.*?\]', '', name)
# # Remove spaces
# name = name.replace(' ', '')
# name = re.sub(r'[^a-zA-Z]', '', name)
# # Convert to lower case
# name = name.lower()
# return name
# # Apply the normalization function to the experience names
# prev_roblox_data['Normalized Name'] = prev_roblox_data['Experience Name'].apply(normalize_name)
# curr_roblox_data['Normalized Name'] = curr_roblox_data['Experience Name'].apply(normalize_name)
curr_roblox_data['ID from URL'] = curr_roblox_data["Romonitor Exp ID"].apply(regex_number)
prev_roblox_data['ID from URL'] = prev_roblox_data["Romonitor Exp ID"].apply(regex_number)
# Identify new entries into top 50 from previous week
old_experiences = set(prev_roblox_data['ID from URL'])
new_experiences = set(curr_roblox_data['ID from URL'])
roblox_new_entries = new_experiences - old_experiences
roblox_new_entries_df = curr_roblox_data[curr_roblox_data['ID from URL'].isin(roblox_new_entries)].sort_index().reset_index(drop=True)
# Ensure 'Release Date' is in datetime format
curr_roblox_data['Release Date'] = pd.to_datetime(curr_roblox_data['Release Date'], errors='coerce')
roblox_new_entries_df["Release Date"] = pd.to_datetime(roblox_new_entries_df["Release Date"], errors='coerce')
# Identify new releases in the past year that are in the current week's Top 50
roblox_new_releases = curr_roblox_data[curr_roblox_data['Release Date'] >= one_year_ago].sort_index().reset_index(drop=True)
# Convert 'Romonitor Ranking' columns to numeric, coercing errors to NaN
merged_roblox_data = curr_roblox_data.merge(prev_roblox_data, on='ID from URL', suffixes=('_curr', '_prev'))
merged_roblox_data['Romonitor Ranking_curr'] = pd.to_numeric(merged_roblox_data['Romonitor Ranking_curr'].str.replace('#', ''), errors='coerce')
merged_roblox_data['Romonitor Ranking_prev'] = pd.to_numeric(merged_roblox_data['Romonitor Ranking_prev'].str.replace('#', ''), errors='coerce')
# Identify experiences that jumped >15 spots in Romonitor Ranking
rank_jumps = merged_roblox_data[merged_roblox_data['Romonitor Ranking_prev'] - merged_roblox_data['Romonitor Ranking_curr'] > 15]
# Drop these columns before creating snapshot
roblox_drop_columns = ['Favourites', 'Likes', 'Dislikes','Romonitor Exp ID','ID from URL']
curr_roblox_data["Release Date"] = curr_roblox_data["Release Date"].dt.date
roblox_new_entries_df["Release Date"] = roblox_new_entries_df["Release Date"].dt.date
roblox_new_releases["Release Date"] = roblox_new_releases["Release Date"].dt.date
# curr_roblox_data.drop(columns=['ID from URL'], inplace=True)
# steam streamlit dataframes
if not curr_roblox_data.empty:
st.subheader("Top 10 Roblox Experiences")
st.dataframe(curr_roblox_data.head(10).drop(columns=roblox_drop_columns).style.format({"Rating": "{:,.0f}"}))
else:
st.info(f"No Roblox Experiences... ask Alson.")
if not roblox_new_entries_df.empty:
st.subheader("New Roblox Experience Entrants to Top 50")
st.dataframe(roblox_new_entries_df.drop(columns=roblox_drop_columns).style.format({"Rating": "{:,.0f}"}))
else:
st.info(f"No New Roblox Experience Entrants into the Top 50 from Last Week")
if not climbers.empty:
st.subheader("Roblox Experiences Released in Past Year in Current Week's Top 50")
st.dataframe(roblox_new_releases.drop(columns=roblox_drop_columns).style.format({"Rating": "{:,.0f}"}))
else:
st.info(f"No Roblox Experiences in the Current Week's Top 50 were Released in the Past Year")
# if not rank_jumps.empty:
# st.subheader("Roblox Experiences Jumped >15 Spots in Romonitor Ranking")
# st.dataframe(rank_jumps[['Experience Name_curr', 'Romonitor Ranking_curr', 'Romonitor Ranking_prev']])
# else:
# st.info("No Roblox Experiences Jumped >15 Spots in Romonitor Ranking")
# New section to share lists
# if not climbers.empty:
# st.subheader("List of Steam Titles Climbed >15 Ranks:")
# st.dataframe(climbers[['Game', 'Climber Filtered']])
# else:
# st.info("No Steam Titles Climbed >15 Ranks.")
st.warning(body="If any steam game has data fields missing, it is either the steam deck or it's NSFW...", icon="⚠️")
st.write("made by your local :red[makers fund] intern")