-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfull_churn.py
111 lines (87 loc) · 4.29 KB
/
full_churn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# -*- coding: utf-8 -*-
"""full churn
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1D7J4e8xKPk5MfkzTmeTGA5KO-ibgLdDV
"""
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import preprocessing as pr
import XGTrain as xg
import visualizations as v
import base64
def main():
# Import libraries
# Load data
clients_df = pd.read_csv('/content/clients_data_competition.csv')
orders_df = pd.read_csv('/content/orders_data_competition.csv')
# Preprocess data
processed_data = pr.preprocess_data(orders_df, clients_df, 4, 1)
# Visualizations
v.plot_scatter_and_standardize(processed_data)
v.plot_buy_sell_orders(processed_data)
v.plot_churn_rate_by_risk_rate(processed_data)
v.plot_churn_visualization(processed_data)
v.plot_churn_by_month(processed_data)
v.generate_rfm_bar_plot(processed_data)
# Modeling and performance
results = xg.full_xgboost(processed_data)
# Save the results to a pickle file
with open('xgboost_results.pkl', 'wb') as f:
pickle.dump(results, f)
# Load the results from the pickle file
with open('xgboost_results.pkl', 'rb') as f:
loaded_results = pickle.load(f)
# Extract misclassified and correctly classified instances
misclassified = (loaded_results['y_pred'] == 0) & (loaded_results['y_test'] == 1)
correctly_classified = (loaded_results['y_pred'] == 0) & (loaded_results['y_test'] == 0)
data = loaded_results['X_test']
# Plot misclassified vs correctly classified
v.plot_misclassified_vs_correctly_classified(data, misclassified, correctly_classified)
# Save visualizations to an HTML report
with open('report.html', 'w') as f:
f.write('<html><head><title>Data Analysis Report on Churn Data</title></head><body>')
f.write('<h1>Data Analysis Report on Churn Data</h1>')
# Scatter plot
f.write('<h2>Scatter Plot</h2>')
with open('scatter_plot.png', 'rb') as img_file:
encoded_img = base64.b64encode(img_file.read()).decode('utf-8')
f.write(f'<img src="data:image/png;base64,{encoded_img}" alt="Scatter Plot">')
# Buy and Sell Orders
f.write('<h2>Buy and Sell Orders</h2>')
with open('buy_sell_orders.png', 'rb') as img_file:
encoded_img = base64.b64encode(img_file.read()).decode('utf-8')
f.write(f'<img src="data:image/png;base64,{encoded_img}" alt="Buy and Sell Orders">')
# Churn Rate by Risk Rate
f.write('<h2>Churn Rate by Risk Rate</h2>')
with open('churn_rate_by_risk_rate.png', 'rb') as img_file:
encoded_img = base64.b64encode(img_file.read()).decode('utf-8')
f.write(f'<img src="data:image/png;base64,{encoded_img}" alt="Churn Rate by Risk Rate">')
# Churn Visualization
f.write('<h2>Churn Visualization</h2>')
with open('churn_visualization.png', 'rb') as img_file:
encoded_img = base64.b64encode(img_file.read()).decode('utf-8')
f.write(f'<img src="data:image/png;base64,{encoded_img}" alt="Churn Visualization">')
# Churn Rate by Order Time Month
f.write('<h2>Churn Rate by Order Time Month</h2>')
with open('churn_rate_by_order_time_month.png', 'rb') as img_file:
encoded_img = base64.b64encode(img_file.read()).decode('utf-8')
f.write(f'<img src="data:image/png;base64,{encoded_img}" alt="Churn Rate by Order Time Month">')
# RFM Segments
f.write('<h2>RFM Segments</h2>')
with open('rfm_segments.png', 'rb') as img_file:
encoded_img = base64.b64encode(img_file.read()).decode('utf-8')
f.write(f'<img src="data:image/png;base64,{encoded_img}" alt="RFM Segments">')
# Misclassified vs Correctly Classified
f.write('<h1>Distribution Comparison of Misclassified vs Correctly Classified Data</h1>')
for col in data.columns:
if col != 'Client ID':
f.write(f'<h2>{col}</h2>')
with open(f'{col}_comparison.png', 'rb') as img_file:
encoded_img = base64.b64encode(img_file.read()).decode('utf-8')
f.write(f'<img src="data:image/png;base64,{encoded_img}" alt="{col}">')
f.write('</body></html>')
if __name__ == "__main__":
main()