-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathABT_add_post_popularity.R
79 lines (66 loc) · 2.49 KB
/
ABT_add_post_popularity.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
library(jsonlite)
library(tidyverse)
library(lubridate)
# path <- "G:/Saját meghajtó/HiFly/Common/Homokozó/nlp-hackathon/data/"
# data_in <- readRDS(paste0(path, "collected/items_225_245_v2.RDS"))
# ABT_in <- read.csv(paste0(path, "transformed/ABT_base.csv"))
# abt_date <- "2020-07-01"
add_post_popularity <- function(ABT_in, data_in, abt_date){
ABT <- ABT_in %>% filter(year_month == abt_date)
data <- data_in %>%
mutate(time = as.Date(time)) %>%
filter(time < as.Date(abt_date),
time > as.Date(abt_date) - as.difftime(183, unit="days"),
by %in% ABT$by, type == "story") %>%
select(by, descendants, score, time) %>%
replace_na(list("score" = 0, "descendants" = 0))
print(dim(data))
last_post <- data %>%
group_by(by) %>%
mutate(max_post = max(time)) %>%
ungroup() %>%
filter(max_post == time) %>%
group_by(by) %>%
summarise(last_post_score = max(score),
last_post_desc = max(descendants),
days_from_last_post = as.Date(abt_date) - max(max_post)) %>%
ungroup()
ABT <- ABT %>%
left_join(last_post %>% select(by, last_post_score, last_post_desc, days_from_last_post)) %>%
replace_na(list("last_post_score" = 0, "last_post_desc" = 0, "days_from_last_post" = 183))
popularity_30 <- data %>%
filter(as.Date(abt_date) - time < 30) %>%
group_by(by) %>%
summarise(max_post_score_30 = max(score),
max_post_desc_30 = max(descendants),
mean_post_score_30 = mean(score),
mean_post_desc_30 = mean(descendants),
post_count_30 = n()) %>%
ungroup() %>%
replace(is.na(.), 0)
popularity_60 <- data %>%
filter(as.Date(abt_date) - time < 60) %>%
group_by(by) %>%
summarise(max_post_score_60 = max(score),
max_post_desc_60 = max(descendants),
mean_post_score_60 = mean(score),
mean_post_desc_60 = mean(descendants),
post_count_60 = n()) %>%
ungroup() %>%
replace(is.na(.), 0)
popularity_183 <- data %>%
group_by(by) %>%
summarise(max_post_score_183 = max(score),
max_post_desc_183 = max(descendants),
mean_post_score_183 = mean(score),
mean_post_desc_183 = mean(descendants),
post_count_183 = n()) %>%
ungroup() %>%
replace(is.na(.), 0)
ABT <- ABT %>%
left_join(popularity_30) %>%
left_join(popularity_60) %>%
left_join(popularity_183) %>%
replace(is.na(.), 0)
return(ABT)
}