-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathTM_SentimentAnalysis.py
84 lines (63 loc) · 1.9 KB
/
TM_SentimentAnalysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 29 17:15:01 2020
@author: Abhinav
"""
#Project-1
from textblob import TextBlob
Feedback1 = 'Starbucks Coffee is awesome.'
Feedback2 = 'Starbucks Coffee was bad.'
Feedback3 = 'Starbucks Coffee was Ok.'
b1 = TextBlob(Feedback1)
b2 = TextBlob(Feedback2)
b3 = TextBlob(Feedback3)
print(b1.sentiment)
print(b2.sentiment)
print(b3.sentiment)
#Project-2
#Import Library
import pandas as pd
#Load data
dataset = pd.read_csv('F:/pyWork/Text_Mining/PresidentSpeechs/Obama.txt')
#Converting data into string format
dataset = dataset.to_string(index = False)
type(dataset)
b1 =TextBlob(dataset)
print(b1.sentiment)
#-------------------Cleaning the data-----------------------------------
import re
dataset = re.sub("[^A-Za-z0-9]+"," ",dataset)
#----------------------Tokenization--------------------------------------------
import nltk
#nltk.download()
#for word in dataset[:500]:
#print(word, sep='',end='')
from nltk.tokenize import word_tokenize
Tokens = word_tokenize(dataset)
print (Tokens)
#No. of tokens in the dataset
len(Tokens)
#Freq of occurence of distinct elements
from nltk.probability import FreqDist
fdist = FreqDist()
for word in Tokens:
fdist[word.lower()]+=1
fdist
fdist.plot(20)
#-------------------------Stemming----------------------------------------
from nltk.stem import PorterStemmer
pst=PorterStemmer()
pst.stem("having")
#-------------Remove the Stop Words---------------------
import nltk.corpus
#Enlisting the stopwords present in English lang
stopwords = nltk.corpus.stopwords.words('english')
stopwords[0:10]
#Getting rid of stopwords
for FinalWord in Tokens:
if FinalWord not in stopwords:
print(FinalWord)
#Classification of words as Positive, Negative & Neutral
#Calculating final Sentiment Score
b2 =TextBlob(FinalWord)
print(b2.sentiment)