-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrecommendations.py
47 lines (34 loc) · 1.42 KB
/
recommendations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import html
from functional import seq
from unidecode import unidecode
import MonkeyLearnProductSentiment
import markdown_to_plaintext
import search
from comment import Comment, CommentList
from comments import get_comments
def clean_comment(comment):
comment["text"] = unidecode(markdown_to_plaintext.unmark(html.unescape(comment["text"])))
return comment
def get_recommendations(query):
if not query:
return {"error_message": "No query", "success": False, "recommendations": []}
# search google for "<query name> reddit"
reddit_urls = search.return_links(query)
# resolve reddit URLs to comments and remove HTML/markdown syntax
# comments are dictionaries of string text, number score, and string url.
# reddit = comments.connect()
# all_comments = dump_comments.load_comments("dump_movies.dumps")
# chunked_comments = CommentList(
# seq(all_comments)
# .map(Comment.from_dict)
# .to_list()
# ).chunk()
comment_list = CommentList(
seq(get_comments(reddit_urls))
.map(clean_comment)
.map(Comment.from_dict)
.to_list()
)
results = MonkeyLearnProductSentiment.recommendation_extractor_chunked(comment_list, query)
recommendations = seq(results).smap(lambda text, score: {"keyword": text, "score": score}).to_list()
return {"error_message": "", "success": True, "recommendations": recommendations}