-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchart_bli.py
94 lines (77 loc) · 4.81 KB
/
chart_bli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from radar_chart import radar
# Read csv with related columns
cols = ['Country', 'Indicator', 'Inequality', 'Unit', 'Value']
df = pd.read_csv('BLI_05052017143301063.csv', usecols=cols)
# Dataframe consist of a lot of information
df[df['Country'] == 'Japan']
# Use total values only to make it simpler
df = df[df['Inequality'] == 'Total']
# Reduce the number of statistics to make it simpler,
# uses a similar formula and categorization from OECD presentation
# Normalize values
df['Value'] = df.groupby('Indicator')['Value'].apply(
lambda x: (x - x.min()) / (x.max() - x.min()))
# since properties like homicide rate are better the lower, will process
# those first
neg_vals = ['Dwellings without basic facilities',
'Housing expenditure',
'Long-term unemployment rate'
'Air pollution',
'Homicide rate',
'Employees working very long hours',
'Labour market insecurity',
'Stakeholder engagement for developing regulations']
df.loc[df['Indicator'].isin(neg_vals), 'Value'] = 1 - \
df.loc[df['Indicator'].isin(neg_vals), 'Value']
# means of related values to create sub-groups
household = df.groupby('Country').apply(lambda x: x.loc[x['Indicator'].isin(['Dwellings without basic facilities',
'Housing expenditure',
'Rooms per person']), 'Value'].mean())
satisfaction = df.groupby('Country').apply(lambda x: x.loc[x['Indicator'].isin(['Life satisfaction']),
'Value'].mean())
employement = df.groupby('Country').apply(lambda x: x.loc[x['Indicator'].isin(['Employment rate',
'Long-term unemployment rate',
'Employees working very long hours',
'Time devoted to leisure and personal care',
'Labour market insecurity']), 'Value'].mean())
wealth = df.groupby('Country').apply(lambda x: x.loc[x['Indicator'].isin(['Household net adjusted disposable income',
'Household net financial wealth',
'Personal earnings']), 'Value'].mean())
education = df.groupby('Country').apply(lambda x: x.loc[x['Indicator'].isin(['Educational attainment',
'Student skills',
'Years in education']), 'Value'].mean())
health = df.groupby('Country').apply(lambda x: x.loc[x['Indicator'].isin(['Life expectancy',
'Self-reported health']), 'Value'].mean())
government = df.groupby('Country').apply(lambda x: x.loc[x['Indicator'].isin(['Quality of support network',
'Air pollution',
'Water quality',
'Voter turnout',
'Stakeholder engagement for developing regulations']), 'Value'].mean())
safety = df.groupby('Country').apply(lambda x: x.loc[x['Indicator'].isin(['Homicide rate',
'Feeling safe walking alone at night']), 'Value'].mean())
df = pd.concat([household, satisfaction, employement, wealth,
education, government, safety], axis=1)
df.columns = ['Household', 'Satisfaction', 'Employement',
'Wealth', 'Education', 'Government', 'Safety']
def plot_country(ax, theta, v, c, l):
ax.plot(theta, v, color='k', label='')
ax.fill(theta, v, color=c, alpha=.5, label=l)
labels = df.columns
theta = radar(len(labels))
fig, ax = plt.subplots(subplot_kw={'projection': 'radar'})
ax.set_varlabels(labels)
ax.get_yaxis().set_ticks([])
ax.set_ylim(0, 1)
plot_country(ax, theta, df.loc['Japan'],
np.random.rand(3, ), 'Japan')
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.9, box.height * 0.9])
ax.legend(bbox_to_anchor=(1.1, 1.1))
plt.show()
# can compare with another country, or OECD total
plot_country(ax, theta, df.loc['OECD - Total'],
np.random.rand(3, ), 'OECD - Total')
ax.legend(bbox_to_anchor=(1.1, 1.1))