-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
240 lines (185 loc) · 8.96 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# Importing the necessary libraries
import numpy as np
import pandas as pd
import streamlit as st
import matplotlib.pyplot as plt
import seaborn as sns
import calendar
import datetime as dt
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from IPython.display import HTML
# Setting the title and text
st.title("👨🏻🏭Unemployment Analysis")
st.write("*Developed for 🌎 with ❤️🔥 by Sohaib👨🏻💻🇵🇰*")
# Reading the dataset and storing it as a dataframe
df = pd.read_csv('Unemployment_Rate_upto_11_2020.csv')
# Changing column names so that they don't have white-spaces, numbers, or any special-characters
df.columns =['States','Date','Frequency','Estimated Unemployment Rate','Estimated Employed','Estimated Labour Participation Rate','Region','longitude','latitude']
#Converting the 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
# Converting the 'Frequency' column to categorical data type
df['Frequency'] = df['Frequency'].astype('category')
# Extracting the 'Month' from the Date
df['Month'] = df['Date'].dt.month
# Creating a new column 'MonthNumber' by converting the 'Month' column values to integers
df['MonthNumber'] = df['Month'].apply(lambda x: int(x))
# Creating a new column 'MonthName' by converting the 'MonthNumber' column values to the monthNames
df['MonthName'] = df['MonthNumber'].apply(lambda x: calendar.month_abbr[x])
# Ensuring the categorical variable
df['Region'] = df['Region'].astype('category')
# Dropping the Month column as it is irrelevant now
# We have extracted the monthNumbers and monthNames individually
df.drop(columns='Month', inplace=True)
df.head(3)
st.title(" ")
# Number summary
st.subheader("NUMBER SUMMARY")
st.write(df.describe())
# Number summary of the numerical variables which give some information
st.subheader("NUMBER SUMMARY OF INFORMATORY VARIABLES")
st.write(round(df[['Estimated Unemployment Rate', 'Estimated Employed', 'Estimated Labour Participation Rate']].describe().T, 2))
# Grouping by 'Region' and finding mean values for the numerical columns
regionStats = df.groupby(['Region'])[['Estimated Unemployment Rate', 'Estimated Employed', 'Estimated Labour Participation Rate']].mean().reset_index()
# Rounding the values to 2 decimal points
st.subheader("STATISTICS GROUPED BY REGION")
st.write(round(regionStats, 2))
# Constructing a 'heatMap' to find the 'pair-wise correlation' values
# Dataframe of all the numerical columns
heatMap = df[['Estimated Unemployment Rate', 'Estimated Employed', 'Estimated Labour Participation Rate', 'longitude', 'latitude', 'MonthNumber']]
# Constructing on heatMap with correlation values
heatMap = heatMap.corr()
# Plotting the figure
plt.figure(figsize=(23,8))
sns.heatmap(heatMap, annot=True, cmap='twilight_shifted', fmt='.3f', linewidths=1)
plt.title('heatMap')
st.subheader("HEATMAP")
st.pyplot()
## EDA - Exploratory Data Analysis
st.header("EXPLORATORY DATA ANALYSIS")
# Plotting a box-plot to show unemployment rate in each state
fig = px.box(
df,
x='States',
y='Estimated Unemployment Rate',
color='States',
title='unemploymentRate',
template='plotly'
)
st.subheader("BOX-PLOT [UNEMPLOYMENT RATE IN EACH STATE]")
st.plotly_chart(fig)
# Creating a scatter matrix plot to denote relationship
fig = px.scatter_matrix(df,
dimensions=['Estimated Unemployment Rate','Estimated Employed', 'Estimated Labour Participation Rate'],
color='Region')
st.subheader("SCATTER MATRIX PLOT")
st.plotly_chart(fig)
# Plotting a "Bar-plot" to find the "average unemployment rate in each state"
newDF = df[['Estimated Unemployment Rate','States']]
# Grouping the dataframe by 'States' and finding the corresponding 'mean'
newDF = newDF.groupby('States').mean().reset_index()
# Sorting the values in the dataframe
newDF = newDF.sort_values('Estimated Unemployment Rate')
fig = px.bar(newDF,
x='States',
y='Estimated Unemployment Rate',
color='States',
title='State-wise Average Employment Rate')
st.subheader("BAR-PLOT")
st.plotly_chart(fig)
# Plotting a "Bar-plot" to find the "unemployment rate" for each "Region" month-wise
fig = px.bar(df,
x='Region',
y='Estimated Unemployment Rate',
animation_frame='MonthName',
color='States',
title='Region-wise Unemployment Rate',
height=800)
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 1500
st.subheader("BAR-PLOT [MONTH-WISE]")
st.plotly_chart(fig)
# Creating a new dataframe with 'State-wise' & 'Region-wise' Estimated Unemployment Rate
unempDF = df[['States','Region','Estimated Unemployment Rate','Estimated Employed','Estimated Labour Participation Rate']]
unempDF = unempDF.groupby(['Region','States'])['Estimated Unemployment Rate'].mean().reset_index()
#printing the new dataframe
st.subheader("DATAFRAME - STATEWISE & REGIONWISE")
st.write(unempDF.head(4))
#a sunburst chart (hierarchical chart) for unemployment rate region-wise and state-wise
fig = px.sunburst(unempDF,
path=['Region','States'],
values='Estimated Unemployment Rate',
title='unemployment rate in each region and state',
height=650)
st.subheader("SUNBURST CHART")
st.plotly_chart(fig)
## Impact of Lockdown on States Estimated Employed
st.header("IMPACT OF LOCKDOWN")
# Creating a scatter geospatial plot
fig = px.scatter_geo(df,'longitude', 'latitude',
color="Region",
hover_name="States",
size="Estimated Unemployment Rate",
animation_frame="MonthName",
scope='asia',
title='Lockdown Impact throughout India')
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 1200
# Updating the geospatial axes ranges and ocean color
fig.update_geos(lataxis_range=[5,35],
lonaxis_range=[65, 100],
oceancolor="#6dd5ed",
showocean=True)
st.subheader("SCATTER GEOSPATIAL PLOT")
st.plotly_chart(fig)
# Filtering dataset between month 4 and 7 (inclusive) - after lockdown
df47 = df[(df['MonthNumber'] >= 4) & (df['MonthNumber'] <=7)]
# Filtering dataset between month 1 and 4 (inclusive) - before lockdown
df14 = df[(df['MonthNumber'] >= 1) & (df['MonthNumber'] <=4)]
# Grouping the dataframe on the basis of "States" and finding the corresponding mean values
df47g = df47.groupby('States')['Estimated Unemployment Rate'].mean().reset_index()
# Grouping the dataframe on the basis of "States" and finding the corresponding mean values
df14g = df14.groupby('States')['Estimated Unemployment Rate'].mean().reset_index()
# Clubbing the 2 dataframe values
df47g['Unemployment Rate before lockdown'] = df14g['Estimated Unemployment Rate']
# Renaming the column values for better understanding
df47g.columns = ['States','unemploymentRate A/ lockdown','unemploymentRate B/ lockdown']
# Displaying the top results
st.subheader("DATAFRAME BEFORE & AFTER LOCKDOWN")
st.write(df47g.head())
# Computing the % change in unemployment rate
df47g['% change in unemployment'] = round(df47g['unemploymentRate A/ lockdown'] - df47g['unemploymentRate B/ lockdown'] / df47g['unemploymentRate B/ lockdown'], 2)
# Sorting the values in the "after lockdown dataframe" on the basis of "%change in unemployment"
df47g = df47g.sort_values('% change in unemployment')
# Plotting a 'bar-chart' for the "%change in unemployment A/ lockdown"
fig = px.bar(df47g, x='States',y='% change in unemployment',
color='% change in unemployment',
title='% change in Unemployment A/ Lockdown')
st.subheader("BAR-CHART [%CHANGE IN UNEMPLOYMENT]")
st.plotly_chart(fig)
# Defining a function to sort the values based on impact
# From the above 'box-plot', the values are ranging between 0 and 40
def sort_impact(x):
if x <= 10:
#impactedState
return '🥲'
elif x <= 20:
#hardImpactedState
return '🥲😥'
elif x <= 30:
#harderImpactedState
return '🥲😥😖'
elif x <= 40:
#hardestImpactedState
return '🥲😥😖🤯'
return x
# Adding a new column to the 'dataframe', classifying the "%change in employment" on the basis of impactStatus
df47g['impactStatus'] = df47g['% change in unemployment'].apply(lambda x:sort_impact(x))
# Plotting a "bar-graph" to classify and denote the impact of lockdown on employment for different states
fig = px.bar(df47g,
y='States',
x='% change in unemployment',
color='impactStatus',
title='Lockdown Impact on Employment in India')
st.subheader("BAR-GRAPH [CLASSIFYING THE IMPACT FOR DIFFERENT STATES]")
st.plotly_chart(fig)