-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnlp_techniques.py
59 lines (48 loc) · 1.77 KB
/
nlp_techniques.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import spacy
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lex_rank import LexRankSummarizer
from textblob import TextBlob
from nltk.corpus import stopwords
# Initialize spaCy model for NER (Named Entity Recognition)
nlp = spacy.load("en_core_web_sm")
# import nltk
# nltk.download('stopwords')
def refine_summary(article_text, sentence_count=3):
"""
Refines the summary of the article using LexRank (sumy) for better relevance.
"""
parser = PlaintextParser.from_string(article_text, Tokenizer("english"))
summarizer_lexrank = LexRankSummarizer()
refined_summary = summarizer_lexrank(parser.document, sentence_count)
return " ".join(str(sentence) for sentence in refined_summary)
def analyze_sentiment(article):
"""
Analyzes sentiment of both the main text and the summary.
"""
sentiment_main_text = get_sentiment_score(article.text)
sentiment_summary = get_sentiment_score(article.summary)
return {
"main_text_sentiment": sentiment_main_text,
"summary_sentiment": sentiment_summary
}
def get_sentiment_score(text):
"""
Analyzes the sentiment of a text using TextBlob.
"""
sentiment = TextBlob(text).sentiment
return {"polarity": sentiment.polarity, "subjectivity": sentiment.subjectivity}
def enrich_keywords(article):
"""
Enhances the keyword list by removing common stop words.
"""
stop_words = set(stopwords.words('english'))
enriched_keywords = [kw for kw in article.keywords if kw.lower() not in stop_words]
return enriched_keywords
def extract_named_entities(text):
"""
Extracts named entities using spaCy.
"""
doc = nlp(text)
entities = {ent.text for ent in doc.ents}
return entities