-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
39 lines (33 loc) · 1.45 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from utils import *
from models import *
import pandas as pd
import glob, nltk
nltk.download('stopwords')
if os.path.exists('./IMDb_stemmed.csv')==False:
df = pd.read_csv('IMDB Dataset.csv')
print("Preprocessing dataset! This may take a while ...") #
df['review'] = df['review'].apply(preprocess)
df['sentiment'] = labelEncoder(df['sentiment'])
df.to_csv('IMDB_stemmed.csv')
else:
df = pd.read_csv('IMDB_stemmed.csv')
print("Splitting dataset")
X_train, y_train, X_val, y_val, X_test, y_test = createSplit(df, printsize=True)
path = './Models/'
if os.path.exists(path)==False:
os.mkdir(path)
models = glob.glob(os.path.join(path, '*.sav'))
vects = glob.glob(os.path.join(path, '*.pk'))
# If no model is there. It can also be run without if-block because it can replace the existing models
# overwrite=True
if len(models)==0:
getLinearSVM(X_train, y_train, X_val, y_val, path, progress=True, overwrite=True),
getLogisticRegressor(X_train, y_train, X_val, y_val, path, progress=True, overwrite=True)
getMNBClassifier(X_train, y_train, X_val, y_val, path, progress=True, overwrite=True)
models = glob.glob(os.path.join(path, '*.sav'))
vects = glob.glob(os.path.join(path, '*.pk'))
print(models,'\n',vects)
print("\nModel-wise Prediction Report\n")
for i in range(len(models)):
model = {'clf' : models[i], 'vect' : vects[i]}
printReport(model=model, X_test=X_test, y_test=y_test, roc=True)