diff --git a/R/convert2df.R b/R/convert2df.R index f5b5f23..33f764e 100644 --- a/R/convert2df.R +++ b/R/convert2df.R @@ -229,7 +229,12 @@ convert2df<-function(file,dbsource="wos",format="plaintext", remove.duplicates=T id_field <- "UT" }, scopus={ - id_field <- "UT" + if (format=="csv"){ + id_field <- "UT" + } else { + id_field <- "TI" + } + }, openalex={ id_field <- "id_oa" diff --git a/inst/biblioshiny/server.R b/inst/biblioshiny/server.R index 9acf7d8..81d6a2d 100644 --- a/inst/biblioshiny/server.R +++ b/inst/biblioshiny/server.R @@ -239,11 +239,7 @@ To ensure the functionality of Biblioshiny, M <- convert2df(D, dbsource = input$dbsource, format = format(D)) - if (input$authorName=="AF"){ - M <- M %>% - rename(AU_IN = .data$AU, - AU = .data$AF) - } + M <- authorNameFormat(M, input$authorName) }) }, ### WoS Txt/Bib Files @@ -253,11 +249,7 @@ To ensure the functionality of Biblioshiny, M <- convert2df(inFile$datapath, dbsource = input$dbsource, format = format(inFile$datapath)) - if (input$authorName=="AF"){ - M <- M %>% - rename(AU_IN = .data$AU, - AU = .data$AF) - } + M <- authorNameFormat(M, input$authorName) }) }) }, @@ -271,11 +263,8 @@ To ensure the functionality of Biblioshiny, M <- convert2df(D, dbsource = input$dbsource, format = format(D)) - if (input$authorName=="AF"){ - M <- M %>% - rename(AU_IN = .data$AU, - AU = .data$AF) - } + M <- authorNameFormat(M, input$authorName) + if (format(D)=="csv") M <- AuthorNameMerge(M) }) }, ### Scopus CSV/Bib Files @@ -285,11 +274,8 @@ To ensure the functionality of Biblioshiny, M <- convert2df(inFile$datapath, dbsource = input$dbsource, format = "csv") - if (input$authorName=="AF"){ - M <- M %>% - rename(AU_IN = .data$AU, - AU = .data$AF) - } + M <- authorNameFormat(M, input$authorName) + M <- AuthorNameMerge(M) }) }, bib = { @@ -298,11 +284,8 @@ To ensure the functionality of Biblioshiny, M <- convert2df(inFile$datapath, dbsource = input$dbsource, format = "bibtex") - if (input$authorName=="AF"){ - M <- M %>% - rename(AU_IN = .data$AU, - AU = .data$AF) - } + M <- authorNameFormat(M, input$authorName) + #M <- AuthorNameMerge(M) }) }) }, diff --git a/inst/biblioshiny/utils.R b/inst/biblioshiny/utils.R index d729e29..42baa5f 100644 --- a/inst/biblioshiny/utils.R +++ b/inst/biblioshiny/utils.R @@ -1,5 +1,69 @@ ### COMMON FUNCTIONS #### +authorNameFormat <- function(M, format){ + if (format=="AF" & "AF" %in% names(M)){ + M <- M %>% + rename(AU_IN = .data$AU, + AU = .data$AF) + } + return(M) +} + +split_text_numbers <- function(input_str, UT) { + # Split the string into components based on "; " + components <- unlist(strsplit(input_str, "; ", fixed = TRUE)) + + # Initialize two vectors to store the separated parts + texts <- character(length(components)) + numbers <- numeric(length(components)) + + # Iterate through each component to separate text and numbers + for (i in seq_along(components)) { + # Extract the text using regex, matching everything up to " (" + texts[i] <- gsub("\\s\\(.*$", "", components[i]) + + # Extract the numbers using regex, matching digits inside parentheses + numbers[i] <- as.numeric(gsub(".*\\((\\d+)\\).*", "\\1", components[i])) + } + + # Return a list with texts and numbers separated + data.frame(Texts = texts, Numbers = numbers, UT=UT) +} + + +AuthorNameMerge <- function(M){ + + df_list <- list() + for (i in 1:nrow(M)){ + if(nchar(M$AU[i])>0){ + df_list[[i]] <- split_text_numbers(M$AU[i],M$UT[i]) + } + } + + df <- do.call(rbind,df_list) + + AU <- df %>% + group_by(.data$Numbers, .data$Texts) %>% + count() %>% + group_by(.data$Numbers) %>% + arrange(desc(.data$n)) %>% + mutate(AU = .data$Texts[1]) %>% + select(-"n", - "Texts") %>% + ungroup() %>% + distinct() + + df <- df %>% + left_join(AU, by = "Numbers") %>% + group_by(UT) %>% + summarize(AU = paste0(AU,collapse=";"), + AU_ID = paste0(.data$Numbers, collapse=";")) + + M <- M %>% + rename(AU_original = .data$AU) %>% + left_join(df, by="UT") + return(M) +} + getFileNameExtension <- function (fn) { # remove a path splitted <- strsplit(x=fn, split='/')[[1]]