Skip to content


Upload code
Browse files Browse the repository at this point in the history
  • Loading branch information
nmeripo authored Oct 9, 2017
1 parent a9352bd commit 6a1eb30
Show file tree
Hide file tree
Showing 4 changed files with 459 additions and 0 deletions.
47 changes: 47 additions & 0 deletions
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import pandas as pd
import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt
import pickle
import cv2
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from keras.utils.np_utils import to_categorical
from sklearn.metrics import roc_curve, auc

test = pd.read_csv("/home/venkat/ClothingAttributeDataset/preprocessed/category_test.csv")
labels= list(test.columns)
del labels[0]
y_true = np.asarray(test[labels])
print y_true.shape

num_classes = len(labels)
with open('/home/venkat/y_pred.pkl', 'rb') as f:
y_pred = pickle.load(f)

y_pred = (y_pred == y_pred.max(axis=1)[:,None]).astype(int)
y_pred = y_pred.argmax(1)
y_true = y_true.argmax(1)

# Micro .. Macro F1 scores
print f1_score(y_true, y_pred, average='micro')
print f1_score(y_true, y_pred, average='macro')

# Plot confusion matrix and normalized confusion matrix
cm = confusion_matrix(y_true, y_pred)

df_cm = pd.DataFrame(cm, index = [i for i in labels],
columns = [i for i in labels])
plt.figure(figsize = (10,7))
sn.heatmap(df_cm, annot=True)

cm_norm = cm / cm.astype(np.float).sum(axis=0)
df_cm_norm = pd.DataFrame(cm_norm, index = [i for i in labels],
columns = [i for i in labels])
plt.figure(figsize = (10,7))
sn.heatmap(df_cm_norm, annot=True)
140 changes: 140 additions & 0 deletions
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import cv2
import numpy as np
import random
import pandas as pd
import math
import glob
import pickle

coef = np.array([[[0.114, 0.587, 0.299]]])

def random_crop(img, size):
w, h = img.shape[0], img.shape[1]
rangew = (w - size) // 2
rangeh = (h - size) // 2
offsetw = 0 if rangew == 0 else np.random.randint(rangew)
offseth = 0 if rangeh == 0 else np.random.randint(rangeh)
return img[offsetw:offsetw + size, offseth:offseth + size, :]

def center_crop(img, size):
centerw, centerh = img.shape[0] // 2, img.shape[1] // 2
halfw, halfh = size // 2, size // 2
return img[centerw - halfw:centerw + halfw, centerh - halfh:centerh + halfh, :]

def resize(img, size):
return cv2.resize(img, (size, size), interpolation=cv2.INTER_CUBIC)

def random_flip(img, size):
if np.random.uniform() < 0.5:
# horizontal_flip
img = np.asarray(img).swapaxes(1, 0)
img = img[::-1, ...]
img = img.swapaxes(0, 1)

# vertical_flip
img = np.asarray(img).swapaxes(0, 0)
img = img[::-1, ...]
img = img.swapaxes(0, 0)

return img

def brightness_aug(img, brightness=0.2):
alpha = 1.0 + np.random.uniform(-brightness, brightness)
img *= alpha
return img

def contrast_aug(img, contrast=0.2):
alpha = 1.0 + np.random.uniform(-contrast, contrast)
gray = img * coef
gray = (3.0 * (1.0 - alpha) / gray.size) * np.sum(gray)
img *= alpha
img += gray
return img

def saturation_aug(img, saturation=0.4):
alpha = 1.0 + np.random.uniform(-saturation, saturation)
gray = img * coef
gray = np.sum(gray, axis=2, keepdims=True)
gray *= (1.0 - alpha)
img *= alpha
img += gray
return img

def color_jitter(img):
lst = [brightness_aug, contrast_aug, saturation_aug]
for aug in lst:
img = aug(img)
return img.astype(np.uint8)

def normalize(img):
mean_pixel = [103.939, 116.779, 123.68]
img = img.astype(np.float32, copy=False)
for c in range(3):
img[:, :, c] = img[:, :, c] - mean_pixel[c]
# img = img.transpose((2,0,1))
# img = np.expand_dims(img, axis=0)
return img

IMAGES_FOLDER = "/home/venkat/ClothingAttributeDataset/images/"

# preprocess train data
train_df = pd.read_csv("/home/venkat/ClothingAttributeDataset/preprocessed/category_train.csv")
train_imgs = list(train_df["images"])
train_labels = train_df[['shirt', 'sweater', 't-shirt', 'outerwear', 'suit', 'tank_top', 'dress']].values

X_train = []
y_train = []

for i in range(len(train_imgs)):
img_path = IMAGES_FOLDER + train_imgs[i]
img = cv2.imread(img_path)
img_resize = normalize(resize(img, 224))
img_rf = random_flip(img_resize, 224)
img_crop = normalize(center_crop(img, 224))
img_cj = normalize(color_jitter(resize(img, 224).astype(np.float64)))
X_train += [img_resize, img_rf, img_crop, img_cj]
temp = [list(train_labels[i]), list(train_labels[i]), list(train_labels[i]), list(train_labels[i])]
y_train += temp

X_train = np.asarray(X_train)
y_train = np.asarray(y_train)

pickle.dump(X_train, open("X_train.pkl", "wb"), pickle.HIGHEST_PROTOCOL)
pickle.dump(y_train, open("y_train.pkl", "wb"), pickle.HIGHEST_PROTOCOL)

# preprocess test data
test_df = pd.read_csv("/home/venkat/ClothingAttributeDataset/preprocessed/category_test.csv")
test_imgs = list(test_df["images"])
test_labels = test_df[['shirt', 'sweater', 't-shirt', 'outerwear', 'suit', 'tank_top', 'dress']].values

X_test = []
y_test = []

for i in range(len(test_imgs)):
img_path = IMAGES_FOLDER + test_imgs[i]
img = cv2.imread(img_path)
img_resize = normalize(resize(img, 224))
X_test += [img_resize]
temp = [list(test_labels[i])]
y_test += temp

X_test = np.asarray(X_test)
y_test = np.asarray(y_test)

pickle.dump(X_test, open("X_test.pkl", "wb"), pickle.HIGHEST_PROTOCOL)
pickle.dump(y_test, open("y_test.pkl", "wb"), pickle.HIGHEST_PROTOCOL)

print X_train.shape
print X_test.shape
113 changes: 113 additions & 0 deletions
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import pandas as pd
import numpy as np
import glob
import shutil
from tqdm import tqdm
import os

def merge_dicts(*dict_args):
result = {}
for dictionary in dict_args:
return result

ROOT = "/home/venkat/ClothingAttributeDataset/"
LABELS = "/home/venkat/ClothingAttributeDataset/labels/"
PREPROCESS = "/home/venkat/ClothingAttributeDataset/preprocessed/"

if not os.path.exists(PREPROCESS):

val = ["No", "Yes"]
data_colors = {'black': val, 'blue': val, 'brown': val, 'cyan': val, 'gray': val, 'green': val, 'purple': val,
'red': val, 'white': val, 'yellow': val, 'purple': val, 'many_colors': val}

data_pattern = {'pattern_floral': val, 'pattern_graphics': val, 'pattern_plaid': val,
'pattern_solid': val, 'pattern_spot': val, 'pattern_stripe': val}

data_binary = {'collar': val, 'gender': ["male", "female"], 'necktie': val,
'placket': val, 'skin_exposure': ["low", "high"], 'scarf': val}

data_multi = {'sleevelength': ["no", "short", "long"], 'neckline': ["v-shape", "round", "other"],
'category': ["shirt", "sweater", "t-shirt", "outerwear", "suit", "tank_top", "dress"]}

data = merge_dicts(data_colors, data_binary, data_pattern)

category_df = pd.DataFrame()

for filename in glob.iglob(LABELS + '*.mat'):
feature_name = filename.split("/")[-1].split(".")[0][:-3]

if feature_name == "category":
labels = data_multi[feature_name]
mat =['GT'].flatten()
category_df = pd.get_dummies(mat, prefix="category")
category_df.columns = labels
category_df.insert(0, "images", val: "{:06d}.jpg".format(val + 1)))
category_df = category_df[~np.isnan(mat)]

# train-test split randomly
msk = np.random.rand(len(category_df)) < 0.8
train = category_df[msk]
test = category_df[~msk]

# Data Percentage for each category
for key in data_multi['category']:
print key, round(100 * category_df[key].value_counts()[1]/ float(category_df.shape[0]), 2)

train.to_csv(PREPROCESS + "category_train" + ".csv", index=False)
test.to_csv(PREPROCESS + "category_test" + ".csv", index=False)

# For Keras ImageGenerator - Flow from Directory
train_label_map = {}
for item in data_multi['category']:
train_label_map[item] = list(train.loc[train[item] == 1]["images"])
test_label_map = {}
for item in data_multi['category']:
test_label_map[item] = list(test.loc[test[item] == 1]["images"])
label_cols = list(train.columns)
del label_cols[0]
y_train = train[label_cols].values
y_test = test[label_cols].values
copy_path_train = ROOT + "category_train/"
copy_path_test = ROOT + "category_test/"
if not os.path.exists(copy_path_train):
if not os.path.exists(copy_path_test):
for key in train_label_map.keys():
class_path = copy_path_train + key
if not os.path.exists(class_path):
img_paths = train_label_map[key]
for path in img_paths:
src_path = "/home/venkat/ClothingAttributeDataset/images/" + path
copy_path = class_path + "/" + path
shutil.copyfile(src_path, copy_path)
for key in test_label_map.keys():
class_path = copy_path_test + key
if not os.path.exists(class_path):
img_paths = test_label_map[key]
for path in img_paths:
src_path = "/home/venkat/ClothingAttributeDataset/images/" + path
copy_path = class_path + "/" + path
shutil.copyfile(src_path, copy_path)

0 comments on commit 6a1eb30

Please sign in to comment.