-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcode.py
118 lines (74 loc) · 2.52 KB
/
code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# --------------
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# load the dataset
df = pd.read_csv(path)
# convert state column in lower case
df['state'] = df['state'].apply(lambda x: x.lower())
# Calculate the total
df["total"] = df["Jan"] + df["Feb"] + df["Mar"]
# sum of amount
sum_row = df[["Jan", "Feb", "Mar", "total"]].sum()
# append the row
df_final = df.append(sum_row, ignore_index=True)
# --------------
import requests
# intialize the url
url = 'https://en.wikipedia.org/wiki/List_of_U.S._state_abbreviations'
response = requests.get(url)
# read the html file
df1 = pd.read_html(response.content)[0]
df1 = df1.iloc[11:, :]
df1 = df1.rename(columns=df1.iloc[0, :]).iloc[1:, :]
df1['United States of America'] = df1['United States of America'].apply(lambda x: x.replace(" ", "")).astype(object)
# --------------
df1['United States of America'] = df1['United States of America'].astype(str).apply(lambda x: x.lower())
df1['US'] = df1['US'].astype(str)
# Code starts here
# Mapping
mapping = df1.set_index('United States of America')['US'].to_dict()
df_final.insert(6, 'abbr', np.nan)
df_final['abbr'] = df_final['state'].map(mapping)
print(df_final.head(15))
# Code starts here
# --------------
# Code starts here
# replace missing values
df_mississipi = df_final[df_final['state'] == 'mississipi'].replace(np.nan, 'MS')
df_tenessee = df_final[df_final['state'] == 'tenessee'].replace(np.nan, 'TN')
# replace the final_df
df_final.replace(df_final.iloc[6], df_mississipi, inplace=True)
df_final.replace(df_final.iloc[10], df_tenessee, inplace=True)
# Code ends here
# --------------
# Code starts here
# Calculate the total amount
df_sub=df_final[["abbr", "Jan", "Feb", "Mar", "total"]].groupby("abbr").sum()
print(df_sub.shape)
# Add the $ symbol
formatted_df = df_sub.applymap(lambda x: "${:,.0f}".format(x))
# Code ends here
# --------------
# Code starts here
# Calculate the sum
sum_row = df_sub[["Jan", "Feb", "Mar", "total"]].sum()
df_sub_sum = pd.DataFrame(data=sum_row).T
#apply $ to the sum
df_sub_sum = df_sub_sum.applymap(lambda x: "${:,.0f}".format(x))
# append the sum
print(formatted_df)
final_table = formatted_df.append(df_sub_sum)
print(final_table)
# rename the index
final_table = final_table.rename(index={0: "Total"})
print(final_table)
# Code ends here
# --------------
# Code starts here
# Calculate the total
df_sub['total'] = df_sub['Jan'] + df_sub['Feb'] + df_sub['Mar']
# Plot the pie chart
df_sub['total'].plot(kind='pie')
# Code ends here