Upload code

nmeripo · Oct 9, 2017 · 6a1eb30 · 6a1eb30
1 parent a9352bd
commit 6a1eb30
Show file tree

Hide file tree

Showing 4 changed files with 459 additions and 0 deletions.
diff --git a/analyze.py b/analyze.py
@@ -0,0 +1,47 @@
+import pandas as pd
+import numpy as np
+import seaborn as sn
+import matplotlib.pyplot as plt
+import pickle
+import cv2
+from sklearn.metrics import confusion_matrix
+from sklearn.metrics import f1_score
+from keras.utils.np_utils import to_categorical
+from sklearn.metrics import roc_curve, auc
+
+
+test =  pd.read_csv("/home/venkat/ClothingAttributeDataset/preprocessed/category_test.csv")
+labels= list(test.columns)
+del labels[0]
+y_true =  np.asarray(test[labels])
+print y_true.shape
+
+num_classes = len(labels)
+with open('/home/venkat/y_pred.pkl', 'rb') as f:
+        y_pred = pickle.load(f)
+
+y_pred = (y_pred == y_pred.max(axis=1)[:,None]).astype(int)
+y_pred = y_pred.argmax(1)
+y_true = y_true.argmax(1)
+
+
+# Micro .. Macro F1 scores
+print f1_score(y_true, y_pred, average='micro')
+print f1_score(y_true, y_pred, average='macro')
+
+
+# Plot confusion matrix and normalized confusion matrix
+cm = confusion_matrix(y_true, y_pred)
+
+df_cm = pd.DataFrame(cm, index = [i for i in labels],
+                  columns = [i for i in labels])
+plt.figure(figsize = (10,7))
+sn.heatmap(df_cm, annot=True)
+plt.show()
+
+cm_norm = cm / cm.astype(np.float).sum(axis=0)
+df_cm_norm = pd.DataFrame(cm_norm, index = [i for i in labels],
+                  columns = [i for i in labels])
+plt.figure(figsize = (10,7))
+sn.heatmap(df_cm_norm, annot=True)
+plt.show()
diff --git a/data_augment.py b/data_augment.py
@@ -0,0 +1,140 @@
+import cv2
+import numpy as np
+import random
+import pandas as pd
+import math
+import glob
+import pickle
+
+coef = np.array([[[0.114, 0.587, 0.299]]])
+
+
+def random_crop(img, size):
+    w, h = img.shape[0], img.shape[1]
+    rangew = (w - size) // 2
+    rangeh = (h - size) // 2
+    offsetw = 0 if rangew == 0 else np.random.randint(rangew)
+    offseth = 0 if rangeh == 0 else np.random.randint(rangeh)
+    return img[offsetw:offsetw + size, offseth:offseth + size, :]
+
+
+def center_crop(img, size):
+    centerw, centerh = img.shape[0] // 2, img.shape[1] // 2
+    halfw, halfh = size // 2, size // 2
+    return img[centerw - halfw:centerw + halfw, centerh - halfh:centerh + halfh, :]
+
+
+def resize(img, size):
+    return cv2.resize(img, (size, size), interpolation=cv2.INTER_CUBIC)
+
+
+def random_flip(img, size):
+    if np.random.uniform() < 0.5:
+        # horizontal_flip
+        img = np.asarray(img).swapaxes(1, 0)
+        img = img[::-1, ...]
+        img = img.swapaxes(0, 1)
+
+    else:
+        # vertical_flip
+        img = np.asarray(img).swapaxes(0, 0)
+        img = img[::-1, ...]
+        img = img.swapaxes(0, 0)
+
+    return img
+
+
+def brightness_aug(img, brightness=0.2):
+    alpha = 1.0 + np.random.uniform(-brightness, brightness)
+    img *= alpha
+    return img
+
+
+def contrast_aug(img, contrast=0.2):
+    alpha = 1.0 + np.random.uniform(-contrast, contrast)
+    gray = img * coef
+    gray = (3.0 * (1.0 - alpha) / gray.size) * np.sum(gray)
+    img *= alpha
+    img += gray
+    return img
+
+
+def saturation_aug(img, saturation=0.4):
+    alpha = 1.0 + np.random.uniform(-saturation, saturation)
+    gray = img * coef
+    gray = np.sum(gray, axis=2, keepdims=True)
+    gray *= (1.0 - alpha)
+    img *= alpha
+    img += gray
+    return img
+
+
+def color_jitter(img):
+    lst = [brightness_aug, contrast_aug, saturation_aug]
+    random.shuffle(lst)
+    for aug in lst:
+        img = aug(img)
+    return img.astype(np.uint8)
+
+
+def normalize(img):
+    mean_pixel = [103.939, 116.779, 123.68]
+    img = img.astype(np.float32, copy=False)
+    for c in range(3):
+        img[:, :, c] = img[:, :, c] - mean_pixel[c]
+    # img = img.transpose((2,0,1))
+    # img = np.expand_dims(img, axis=0)
+    return img
+
+
+IMAGES_FOLDER = "/home/venkat/ClothingAttributeDataset/images/"
+
+# preprocess train data
+train_df = pd.read_csv("/home/venkat/ClothingAttributeDataset/preprocessed/category_train.csv")
+train_imgs = list(train_df["images"])
+train_labels = train_df[['shirt', 'sweater', 't-shirt', 'outerwear', 'suit', 'tank_top', 'dress']].values
+
+X_train = []
+y_train = []
+
+for i in range(len(train_imgs)):
+    img_path = IMAGES_FOLDER + train_imgs[i]
+    img = cv2.imread(img_path)
+    img_resize = normalize(resize(img, 224))
+    img_rf = random_flip(img_resize, 224)
+    img_crop = normalize(center_crop(img, 224))
+    img_cj = normalize(color_jitter(resize(img, 224).astype(np.float64)))
+    X_train += [img_resize, img_rf, img_crop, img_cj]
+    temp = [list(train_labels[i]), list(train_labels[i]), list(train_labels[i]), list(train_labels[i])]
+    y_train += temp
+
+X_train = np.asarray(X_train)
+y_train = np.asarray(y_train)
+
+pickle.dump(X_train, open("X_train.pkl", "wb"), pickle.HIGHEST_PROTOCOL)
+pickle.dump(y_train, open("y_train.pkl", "wb"), pickle.HIGHEST_PROTOCOL)
+
+# preprocess test data
+test_df = pd.read_csv("/home/venkat/ClothingAttributeDataset/preprocessed/category_test.csv")
+test_imgs = list(test_df["images"])
+test_labels = test_df[['shirt', 'sweater', 't-shirt', 'outerwear', 'suit', 'tank_top', 'dress']].values
+
+X_test = []
+y_test = []
+
+for i in range(len(test_imgs)):
+    img_path = IMAGES_FOLDER + test_imgs[i]
+    img = cv2.imread(img_path)
+    img_resize = normalize(resize(img, 224))
+    X_test += [img_resize]
+    temp = [list(test_labels[i])]
+    y_test += temp
+
+X_test = np.asarray(X_test)
+y_test = np.asarray(y_test)
+
+pickle.dump(X_test, open("X_test.pkl", "wb"), pickle.HIGHEST_PROTOCOL)
+pickle.dump(y_test, open("y_test.pkl", "wb"), pickle.HIGHEST_PROTOCOL)
+
+print X_train.shape
+print X_test.shape
diff --git a/preprocess.py b/preprocess.py
@@ -0,0 +1,113 @@
+import pandas as pd
+import numpy as np
+import glob
+import scipy.io
+import shutil
+from tqdm import tqdm
+import os
+
+
+def merge_dicts(*dict_args):
+    result = {}
+    for dictionary in dict_args:
+        result.update(dictionary)
+    return result
+
+
+ROOT = "/home/venkat/ClothingAttributeDataset/"
+LABELS = "/home/venkat/ClothingAttributeDataset/labels/"
+PREPROCESS = "/home/venkat/ClothingAttributeDataset/preprocessed/"
+
+if not os.path.exists(PREPROCESS):
+    os.makedirs(PREPROCESS)
+
+val = ["No", "Yes"]
+data_colors = {'black': val, 'blue': val, 'brown': val, 'cyan': val, 'gray': val, 'green': val, 'purple': val,
+               'red': val, 'white': val, 'yellow': val, 'purple': val, 'many_colors': val}
+
+data_pattern = {'pattern_floral': val, 'pattern_graphics': val, 'pattern_plaid': val,
+                'pattern_solid': val, 'pattern_spot': val, 'pattern_stripe': val}
+
+data_binary = {'collar': val, 'gender': ["male", "female"], 'necktie': val,
+               'placket': val, 'skin_exposure': ["low", "high"], 'scarf': val}
+
+data_multi = {'sleevelength': ["no", "short", "long"], 'neckline': ["v-shape", "round", "other"],
+              'category': ["shirt", "sweater", "t-shirt", "outerwear", "suit", "tank_top", "dress"]}
+
+data = merge_dicts(data_colors, data_binary, data_pattern)
+
+category_df = pd.DataFrame()
+
+for filename in glob.iglob(LABELS + '*.mat'):
+    feature_name = filename.split("/")[-1].split(".")[0][:-3]
+
+    if feature_name == "category":
+        labels = data_multi[feature_name]
+        mat = scipy.io.loadmat(filename)['GT'].flatten()
+        category_df = pd.get_dummies(mat, prefix="category")
+        category_df.columns = labels
+        category_df.insert(0, "images", category_df.index.map(lambda val: "{:06d}.jpg".format(val + 1)))
+        category_df = category_df[~np.isnan(mat)]
+
+# train-test split randomly
+msk = np.random.rand(len(category_df)) < 0.8
+train = category_df[msk]
+test = category_df[~msk]
+
+# Data Percentage for each category
+for key in data_multi['category']:
+    print key, round(100 * category_df[key].value_counts()[1]/ float(category_df.shape[0]), 2)
+
+train.to_csv(PREPROCESS + "category_train" + ".csv", index=False)
+test.to_csv(PREPROCESS + "category_test" + ".csv", index=False)
+
+
+# For Keras ImageGenerator - Flow from Directory
+"""
+train_label_map = {}
+
+for item in data_multi['category']:
+    train_label_map[item] = list(train.loc[train[item] == 1]["images"])
+
+test_label_map = {}
+for item in data_multi['category']:
+    test_label_map[item] = list(test.loc[test[item] == 1]["images"])
+
+label_cols = list(train.columns)
+del label_cols[0]
+y_train = train[label_cols].values
+y_test = test[label_cols].values
+
+copy_path_train = ROOT + "category_train/"
+copy_path_test = ROOT + "category_test/"
+
+if not os.path.exists(copy_path_train):
+    os.makedirs(copy_path_train)
+
+if not os.path.exists(copy_path_test):
+    os.makedirs(copy_path_test)
+
+for key in train_label_map.keys():
+    class_path = copy_path_train + key
+
+    if not os.path.exists(class_path):
+        os.makedirs(class_path)
+    img_paths = train_label_map[key]
+
+    for path in img_paths:
+        src_path = "/home/venkat/ClothingAttributeDataset/images/" + path
+        copy_path = class_path + "/" + path
+        shutil.copyfile(src_path, copy_path)
+
+for key in test_label_map.keys():
+    class_path = copy_path_test + key
+    
+    if not os.path.exists(class_path):
+        os.makedirs(class_path)
+    img_paths = test_label_map[key]
+    
+    for path in img_paths:
+        src_path = "/home/venkat/ClothingAttributeDataset/images/" + path
+        copy_path = class_path + "/" + path
+        shutil.copyfile(src_path, copy_path)
+"""