-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathutils_new.py
106 lines (80 loc) · 3.24 KB
/
utils_new.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: Guansong Pang
Source code for the PReNet algorithm in KDD'23.
"""
import pandas as pd
import numpy as np
from sklearn.metrics import auc,roc_curve, precision_recall_curve, average_precision_score, roc_auc_score
from sklearn import preprocessing
import matplotlib.pyplot as plt
from joblib import Memory
from sklearn.datasets import load_svmlight_file
mem = Memory("./data/svm_data")
@mem.cache
def get_data_from_svmlight_file(path):
data = load_svmlight_file(path)
return data[0], data[1]
def dataLoading(path):
# loading data
df = pd.read_csv(path)
labels = df['class']
x_df = df.drop(['class'], axis=1)
x = x_df.values
print("Data shape: (%d, %d)" % x.shape)
return x, labels;
def dataLoading_noheader(path):
# loading data
df = pd.read_csv(path, header=None)
x = df.values
# print("Data shape: (%d, %d)" % x.shape)
return x;
def aucPerformance(mse, labels):
roc_auc = roc_auc_score(labels, mse)
# print(roc_auc)
ap = average_precision_score(labels, mse)
print("AUC-ROC: %.4f, AUC-PR: %.4f" % (roc_auc, ap))
# plt.title('Receiver Operating Characteristic')
# plt.plot(fpr, tpr, label='AUC = %0.4f'% roc_auc)
# plt.legend(loc='lower right')
# plt.plot([0,1],[0,1],'r--')
# plt.xlim([-0.001, 1])
# plt.ylim([0, 1.001])
# plt.ylabel('True Positive Rate')
# plt.xlabel('False Positive Rate')
# plt.show();
return roc_auc, ap;
def writeOutlierScores(scores, labels, name):
csv_file = open('./outlierscores/' + name + '.csv', 'w')
#"w" indicates that you're writing strings to the file
columnTitleRow = 'class,score\n'
csv_file.write(columnTitleRow)
for idx in range(0, len(scores)):
row = str(labels[idx]) + "," + str(scores[idx][0]) + "\n"
csv_file.write(row)
def writeRepresentation(data, labels, dim, name):
path = ('./dataset/embedding/' + name + '_rp_' + str(dim) + '.csv')
#"w" indicates that you're writing strings to the file
attr_names = [0] * (dim + 1)
for i in range(0, dim):
attr_names[i]= 'attr' + str(i)
attr_names[dim] = 'class'
labels = labels.reshape((len(labels), 1))
print(labels.shape, data.shape)
data = np.concatenate((data, labels), axis = 1)
df = pd.DataFrame(data)
df.to_csv(path, header = attr_names, index=False)
def writeResults(name, n_samples, dim, n_samples_trn, n_outliers_trn, n_outliers, depth, rauc, ap, std_auc, std_ap, train_time, test_time, path = "./results/auc_performance_cl0.5.csv"):
csv_file = open(path, 'a')
row = name + "," + str(n_samples)+ "," + str(dim) + ',' + str(n_samples_trn) + ','+ str(n_outliers_trn) + ','+ str(n_outliers) + ',' + str(depth)+ "," + str(rauc) +"," + str(std_auc) + "," + str(ap) +"," + str(std_ap)+"," + str(train_time)+"," + str(test_time) + "\n"
csv_file.write(row)
def visualizeData(data, labels, name):
plt.figure(figsize=(5, 5))
plt.plot(data[labels == 1, 0], data[labels == 1, 1], 'ro')
plt.plot(data[labels != 1, 0], data[labels != 1, 1], 'bo')
plt.title('2-D ' + name)
plt.xlabel('x1')
plt.ylabel('x2')
plt.legend(['outliers', 'inliers'], loc='upper right')
plt.show()