Skip to content

Commit

Permalink
Merge pull request #436 from massimoaria/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
massimoaria authored Mar 12, 2024
2 parents b0180a0 + 8178a09 commit df28306
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 26 deletions.
7 changes: 6 additions & 1 deletion R/convert2df.R
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,12 @@ convert2df<-function(file,dbsource="wos",format="plaintext", remove.duplicates=T
id_field <- "UT"
},
scopus={
id_field <- "UT"
if (format=="csv"){
id_field <- "UT"
} else {
id_field <- "TI"
}

},
openalex={
id_field <- "id_oa"
Expand Down
33 changes: 8 additions & 25 deletions inst/biblioshiny/server.R
Original file line number Diff line number Diff line change
Expand Up @@ -239,11 +239,7 @@ To ensure the functionality of Biblioshiny,
M <- convert2df(D,
dbsource = input$dbsource,
format = format(D))
if (input$authorName=="AF"){
M <- M %>%
rename(AU_IN = .data$AU,
AU = .data$AF)
}
M <- authorNameFormat(M, input$authorName)
})
},
### WoS Txt/Bib Files
Expand All @@ -253,11 +249,7 @@ To ensure the functionality of Biblioshiny,
M <- convert2df(inFile$datapath,
dbsource = input$dbsource,
format = format(inFile$datapath))
if (input$authorName=="AF"){
M <- M %>%
rename(AU_IN = .data$AU,
AU = .data$AF)
}
M <- authorNameFormat(M, input$authorName)
})
})
},
Expand All @@ -271,11 +263,8 @@ To ensure the functionality of Biblioshiny,
M <- convert2df(D,
dbsource = input$dbsource,
format = format(D))
if (input$authorName=="AF"){
M <- M %>%
rename(AU_IN = .data$AU,
AU = .data$AF)
}
M <- authorNameFormat(M, input$authorName)
if (format(D)=="csv") M <- AuthorNameMerge(M)
})
},
### Scopus CSV/Bib Files
Expand All @@ -285,11 +274,8 @@ To ensure the functionality of Biblioshiny,
M <- convert2df(inFile$datapath,
dbsource = input$dbsource,
format = "csv")
if (input$authorName=="AF"){
M <- M %>%
rename(AU_IN = .data$AU,
AU = .data$AF)
}
M <- authorNameFormat(M, input$authorName)
M <- AuthorNameMerge(M)
})
},
bib = {
Expand All @@ -298,11 +284,8 @@ To ensure the functionality of Biblioshiny,
M <- convert2df(inFile$datapath,
dbsource = input$dbsource,
format = "bibtex")
if (input$authorName=="AF"){
M <- M %>%
rename(AU_IN = .data$AU,
AU = .data$AF)
}
M <- authorNameFormat(M, input$authorName)
#M <- AuthorNameMerge(M)
})
})
},
Expand Down
64 changes: 64 additions & 0 deletions inst/biblioshiny/utils.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,69 @@
### COMMON FUNCTIONS ####

authorNameFormat <- function(M, format){
if (format=="AF" & "AF" %in% names(M)){
M <- M %>%
rename(AU_IN = .data$AU,
AU = .data$AF)
}
return(M)
}

split_text_numbers <- function(input_str, UT) {
# Split the string into components based on "; "
components <- unlist(strsplit(input_str, "; ", fixed = TRUE))

# Initialize two vectors to store the separated parts
texts <- character(length(components))
numbers <- numeric(length(components))

# Iterate through each component to separate text and numbers
for (i in seq_along(components)) {
# Extract the text using regex, matching everything up to " ("
texts[i] <- gsub("\\s\\(.*$", "", components[i])

# Extract the numbers using regex, matching digits inside parentheses
numbers[i] <- as.numeric(gsub(".*\\((\\d+)\\).*", "\\1", components[i]))
}

# Return a list with texts and numbers separated
data.frame(Texts = texts, Numbers = numbers, UT=UT)
}


AuthorNameMerge <- function(M){

df_list <- list()
for (i in 1:nrow(M)){
if(nchar(M$AU[i])>0){
df_list[[i]] <- split_text_numbers(M$AU[i],M$UT[i])
}
}

df <- do.call(rbind,df_list)

AU <- df %>%
group_by(.data$Numbers, .data$Texts) %>%
count() %>%
group_by(.data$Numbers) %>%
arrange(desc(.data$n)) %>%
mutate(AU = .data$Texts[1]) %>%
select(-"n", - "Texts") %>%
ungroup() %>%
distinct()

df <- df %>%
left_join(AU, by = "Numbers") %>%
group_by(UT) %>%
summarize(AU = paste0(AU,collapse=";"),
AU_ID = paste0(.data$Numbers, collapse=";"))

M <- M %>%
rename(AU_original = .data$AU) %>%
left_join(df, by="UT")
return(M)
}

getFileNameExtension <- function (fn) {
# remove a path
splitted <- strsplit(x=fn, split='/')[[1]]
Expand Down

0 comments on commit df28306

Please sign in to comment.