-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtextAnalysis.py
85 lines (67 loc) · 2.17 KB
/
textAnalysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 14 14:29:58 2015
@author: dagrha & ngreeney
"""
from textblob import TextBlob
import pandas as pd
import pylab as plt
import collections
import re
def readBook(book_text):
with open (book_text, 'r') as book:
reader = book.read()
return unicode(reader, 'utf-8')
def sentiment(textblob):
'''
analyzer used : pattern
'''
paragraph = textblob.sentences
i = -1
for sentence in paragraph:
i += 1
pol = sentence.sentiment.polarity
if i == 0:
write_type = 'w'
with open('shunned.csv', write_type) as text_file:
header = 'number,' + 'polarity,' + '\n'
text_file.write(str(header))
write_type = 'a'
with open('shunned.csv', write_type) as text_file:
newline = str(i) + ',' + str(pol) + '\n'
text_file.write(str(newline))
df = pd.DataFrame.from_csv('shunned.csv')
return df
def graph(pandaFrame):
'''
hard coded ploting - polarity and sum of polarity plots
'''
book_title = 'HP Lovecraft\'s The Shunned House'
plt.figure()
pandaFrame.polarity.plot(figsize=(12,5), color='b',
title='Sentiment Polarity for\n'+book_title)
plt.xlabel('Sentence number')
plt.ylabel('Sentiment polarity')
pandaFrame['cum_sum'] = pandaFrame.polarity.cumsum()
plt.figure()
pandaFrame.cum_sum.plot(figsize=(12,5), color='r',
title='Sentiment Polarity cumulative summation for\n'
+book_title)
plt.xlabel('Sentence number')
plt.ylabel('Sum of Sentiment')
return
def analyze(df):
# df.head()
# df.describe()
for i in df[df.polarity < -0.5].index:
print i, tb.sentences[i]
words = re.findall(r'\w+', open('lovecraft.txt').read().lower())
common = collections.Counter(words).most_common(10)
df_freq = pd.DataFrame(common, columns=['word', 'freq'])
df_freq.set_index('word').head()
return
if __name__ == '__main__':
tb = TextBlob(readBook('lovecraft.txt'))
df = sentiment(tb)
graph(df)
analyze(df)