-
Notifications
You must be signed in to change notification settings - Fork 40
/
sentence.py
73 lines (65 loc) · 2.92 KB
/
sentence.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#----------------------------------------------------------------------------------
# Description: Sentence class to store setences from the individual files in the
# document cluster.
#----------------------------------------------------------------------------------
from nltk.corpus import stopwords
class sentence(object):
#------------------------------------------------------------------------------
# Description : Constructor to initialize the setence object
# Parameters : docName, name of the document/file
# preproWords, words of the file after the stemming process
# originalWords, actual words before stemming
# Return : None
#------------------------------------------------------------------------------
def __init__(self, docName, preproWords, originalWords):
self.docName = docName
self.preproWords = preproWords
self.wordFrequencies = self.sentenceWordFreq()
self.originalWords = originalWords
#------------------------------------------------------------------------------
# Description : Function to return the name of the document
# Parameters : None
# Return : name of the document
#------------------------------------------------------------------------------
def getDocName(self):
return self.docName
#------------------------------------------------------------------------------
# Description : Function to return the stemmed words
# Parameters : None
# Return : stemmed words of the sentence
#------------------------------------------------------------------------------
def getPreProWords(self):
return self.preproWords
#------------------------------------------------------------------------------
# Description : Function to return the original words of the sentence before
# stemming
# Parameters : None
# Return : pre-stemmed words
#------------------------------------------------------------------------------
def getOriginalWords(self):
return self.originalWords
#------------------------------------------------------------------------------
# Description : Function to return a dictonary of the word frequencies for
# the particular sentence object
# Parameters : None
# Return : dictionar of word frequencies
#------------------------------------------------------------------------------
def getWordFreq(self):
return self.wordFrequencies
#------------------------------------------------------------------------------
# Description : Function to create a dictonary of word frequencies for the
# sentence object
# Parameters : None
# Return : dictionar of word frequencies
#------------------------------------------------------------------------------
def sentenceWordFreq(self):
wordFreq = {}
for word in self.preproWords:
if word not in wordFreq.keys():
wordFreq[word] = 1
else:
# if word in stopwords.words('english'):
# wordFreq[word] = 1
# else:
wordFreq[word] = wordFreq[word] + 1
return wordFreq