-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
147 lines (116 loc) · 4.24 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import seaborn as sns
import os
import streamlit as st
# EDA pkgs
import pandas as pd
# Visualization pkgs
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use("Agg")
def main():
"""Common ML Dataset Explorer"""
st.title("Common ML Dataset Explorer")
st.subheader("Simple Data Science Explorer with Streamlit")
html_temp = """
<div style="background-color:tomato;">
<p>Haha</p>
</div>
"""
st.markdown(
html_temp, unsafe_allow_html=True) # important ti show the html in the app
# load a csv file from the computer
def file_selector(folder_path="./datasets"):
filenames = os.listdir(folder_path)
selected_filename = st.selectbox("Select a file", filenames)
return os.path.join(folder_path, selected_filename)
filename = file_selector()
st.info("You selected {}".format(filename))
# Read Data
df = pd.read_csv(filename)
# Show Dataset
if st.checkbox("Show Dataset"):
number = st.number_input("Number of Rows to View", 5)
st.dataframe(df.head(number))
# Show Columns
if st.button("Column Names"):
st.write(df.columns)
# Show Shape
if st.checkbox("Shape of Dataset"):
data_dim = st.radio("Show Dimension By", ("Rows", "Columns"))
if data_dim == "Rows":
st.text("Number of Rows")
st.write(df.shape[0])
elif data_dim == "Columns":
st.text("Number of Columns")
st.write(df.shape[1])
else:
st.write(df.shape)
# Select Columns
if st.checkbox("Select Columns To Show"):
all_columns = df.columns.tolist()
selected_columns = st.multiselect("Select", all_columns)
new_df = df[selected_columns]
st.dataframe(new_df)
# Show Values
if st.button("Value Counts"):
st.text("Value Counts By Target/Class")
st.write(df.iloc[:, -1].value_counts())
# Show Datatypes
# Show Values
if st.button("Data Types"):
st.write(df.dtypes)
# Show Summary
if st.checkbox("Summary"):
st.write(df.describe().T)
## Plot and Visualization
st.subheader("Data Visualization")
# Correlation
# Seaborn Plot
if st.checkbox("Correlation Plot[Seaborn]"):
st.write(sns.heatmap(df.corr(), annot=True))
st.pyplot()
# Count Plot
if st.checkbox("Pie Plot"):
all_columns_names = df.columns.tolist()
if st.button("Generate Pie Plot"):
st.success("Generating A Pie Plot")
st.write(df.iloc[:, -1].value_counts().plot.pie(autopct="%1.1f%%"))
st.pyplot()
# Pie Chart
# Customizable Plot
st.subheader("Customizable Plot")
all_columns_names = df.columns.tolist()
type_of_plot = st.selectbox("Select Type of Plot", [
"area", "bar", "line", "hist", "box", "kde"])
selected_columns_names = st.multiselect(
"Select columns To Plot", all_columns_names)
if st.button("Generate Plot"):
st.success("Generating Customizable Plot of {} for {}".format(
type_of_plot, selected_columns_names))
# Plot by Streamlit [for area, bar and line]
if type_of_plot == "area":
custom_data = df[selected_columns_names]
st.area_chart(custom_data)
elif type_of_plot == "bar":
custom_data = df[selected_columns_names]
st.bar_chart(custom_data)
elif type_of_plot == "line":
custom_data = df[selected_columns_names]
st.line_chart(custom_data)
# Custom Plot [By Matplotlib or Seaborn for other types of charts...]
elif type_of_plot:
custom_plot = df[selected_columns_names].plot(kind=type_of_plot)
st.write(custom_plot)
st.pyplot()
if st.button("Thanks"):
st.balloons()
st.sidebar.header("About App")
st.sidebar.info("A Simple EDA App for Exploring Common ML Dataset")
st.sidebar.header("Get Datasets")
st.sidebar.markdown("[Common ML Dataset Repo]("")")
st.sidebar.header("About")
st.sidebar.info("Asmee Dhungana")
st.sidebar.text("Built with Streamlit")
st.sidebar.text("Tutorial: Jesse JCharis")
if __name__ == "__main__":
main()