forked from MarvelBoy047/Article_Summarizer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathname_error.py
113 lines (82 loc) · 3.83 KB
/
name_error.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import streamlit as st
import requests
from bs4 import BeautifulSoup
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from textblob import TextBlob
from googlesearch import search
# Function to extract article content from Google
def extract_article_content(article_name):
try:
query = article_name + " article"
search_results = search(query, num_results=1) # Specify the number of results to fetch
article_url = next(search_results)
response = requests.get(article_url)
html_content = response.text
soup = BeautifulSoup(html_content, 'html.parser')
article_content = ""
for paragraph in soup.find_all('p'):
article_content += paragraph.get_text() + "\n"
return article_content, article_url
except Exception as e:
st.error(f"An error occurred: {e}")
return None, None
# Function to summarize the article content
def summarize_article(article_content):
try:
tokenizer = AutoTokenizer.from_pretrained("t5-small")
model = AutoModelForSeq2SeqLM.from_pretrained("t5-small")
inputs = tokenizer.encode("summarize: " + article_content, return_tensors="pt", max_length=4096,
truncation=True)
summary_ids = model.generate(inputs, max_length=1000, min_length=300, length_penalty=2.0, num_beams=4,
early_stopping=True)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
return summary
except Exception as e:
st.error(f"An error occurred during summarization: {e}")
return None
# Function to extract keywords from the summary
def extract_keywords(summary):
blob = TextBlob(summary)
return blob.noun_phrases
# Function to find related articles based on keywords
def find_related_articles(keywords):
related_articles = []
for keyword in keywords:
query = f"{keyword} in India"
search_results = search(query, num_results=5) # Limit to 5 articles
related_articles.extend(search_results)
return related_articles
# Function to highlight keywords in summary
def highlight_keywords(summary, keywords):
for keyword in keywords:
summary = summary.replace(keyword, f"<span style='font-size: 20px'><b>{keyword}</b></span>")
return summary
# Main function for UI
def main():
st.set_page_config(page_title="Name Error - Article Extraction", page_icon=":newspaper:", layout="wide")
st.markdown("<h1 style='text-align: center; color: green;'>Name Error</h1>", unsafe_allow_html=True)
st.header("Article Extraction")
article_name = st.text_input("Enter the name of the article:", key="article_input")
# Automatically trigger "Summarize" when Enter is pressed
if st.session_state.article_input and st.session_state.article_input != article_name:
st.session_state.article_input = article_name
st.experimental_rerun()
if st.button("Summarize"):
content, url = extract_article_content(article_name)
if content:
summary = summarize_article(content)
if summary:
keywords = extract_keywords(summary)
related_articles = find_related_articles(keywords)
highlighted_summary = highlight_keywords(summary, keywords)
st.subheader("Summarization Points:")
st.markdown(f"<p style='font-size: 16px;'>{highlighted_summary}</p>", unsafe_allow_html=True)
st.subheader("Related Articles:")
for i, article in enumerate(related_articles[:5], start=1):
st.write(f"{i}. {article}")
st.subheader("Original Article URL:")
st.write(url)
else:
st.error("Failed to extract article content.")
if __name__ == "__main__":
main()