-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathglove.py
32 lines (28 loc) · 1.09 KB
/
glove.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import pickle as pickle
import numpy as np
from scipy.io import loadmat
import torch.nn as nn
import torch
data=loadmat('dataset/RAP_annotation/RAP_annotation.mat')
dataset=pickle.load(open('dataset/rap2_dataset.pkl','rb'))
partition=pickle.load(open('dataset/rap2_partition.pkl','rb'))
select=[]
for idx in partition['train'][0]: # self.partition['train'][0]
select=np.array(dataset['att'][idx])[dataset['selected_attribute']].tolist()
all=[]
for idx in range(152):
all.append(data['RAP_annotation'][0][0][2][idx][0][0])
select_name=[]
for i in range(len(dataset['selected_attribute'])):
select_name.append(all[dataset['selected_attribute'][i]])
word_to_ix = {j: i for i, j in enumerate(select_name)}
embeds=nn.Embedding(60,300)
word2vec=torch.tensor([])
for i in range(len(select)):
lookup_tensor=torch.tensor([word_to_ix[select_name[i]]],dtype=torch.long)
embed=embeds(lookup_tensor)
word2vec=torch.cat((word2vec,embed),0)
#print(word2vec.size())
word2vec=word2vec.detach().numpy()
with open('dataset/glove.pkl','wb+') as f:
pickle.dump(word2vec,f)