Merge pull request #436 from massimoaria/develop

Develop
massimoaria · Mar 12, 2024 · df28306 · df28306
2 parents b0180a0 + 8178a09
commit df28306
Show file tree

Hide file tree

Showing 3 changed files with 78 additions and 26 deletions.
diff --git a/R/convert2df.R b/R/convert2df.R
@@ -229,7 +229,12 @@ convert2df<-function(file,dbsource="wos",format="plaintext", remove.duplicates=T
              id_field <- "UT"
            },
            scopus={
-             id_field <- "UT"
+             if (format=="csv"){
+               id_field <- "UT"
+             } else {
+               id_field <- "TI"
+             }
+
            },
            openalex={
              id_field <- "id_oa"

diff --git a/inst/biblioshiny/server.R b/inst/biblioshiny/server.R
@@ -239,11 +239,7 @@ To ensure the functionality of Biblioshiny,
                                   M <- convert2df(D,
                                                   dbsource = input$dbsource,
                                                   format = format(D))
-                                  if (input$authorName=="AF"){
-                                    M <- M %>% 
-                                      rename(AU_IN = .data$AU,
-                                             AU = .data$AF)
-                                  }
+                                  M <- authorNameFormat(M, input$authorName)
                                 })
                  },
                  ### WoS Txt/Bib Files
@@ -253,11 +249,7 @@ To ensure the functionality of Biblioshiny,
                                   M <- convert2df(inFile$datapath,
                                                   dbsource = input$dbsource,
                                                   format = format(inFile$datapath))
-                                  if (input$authorName=="AF"){
-                                    M <- M %>% 
-                                      rename(AU_IN = .data$AU,
-                                             AU = .data$AF)
-                                  }
+                                  M <- authorNameFormat(M, input$authorName)
                                 })
                  })
         },
@@ -271,11 +263,8 @@ To ensure the functionality of Biblioshiny,
                                   M <- convert2df(D,
                                                   dbsource = input$dbsource,
                                                   format = format(D))
-                                  if (input$authorName=="AF"){
-                                    M <- M %>% 
-                                      rename(AU_IN = .data$AU,
-                                             AU = .data$AF)
-                                  }
+                                  M <- authorNameFormat(M, input$authorName)
+                                  if (format(D)=="csv") M <- AuthorNameMerge(M)
                                 })
                  },
                  ### Scopus CSV/Bib Files
@@ -285,11 +274,8 @@ To ensure the functionality of Biblioshiny,
                                   M <- convert2df(inFile$datapath,
                                                   dbsource = input$dbsource,
                                                   format = "csv")
-                                  if (input$authorName=="AF"){
-                                    M <- M %>% 
-                                      rename(AU_IN = .data$AU,
-                                             AU = .data$AF)
-                                  }
+                                  M <- authorNameFormat(M, input$authorName)
+                                  M <- AuthorNameMerge(M)
                                 })
                  },
                  bib = {
@@ -298,11 +284,8 @@ To ensure the functionality of Biblioshiny,
                                   M <- convert2df(inFile$datapath,
                                                   dbsource = input$dbsource,
                                                   format = "bibtex")
-                                  if (input$authorName=="AF"){
-                                    M <- M %>% 
-                                      rename(AU_IN = .data$AU,
-                                             AU = .data$AF)
-                                  }
+                                  M <- authorNameFormat(M, input$authorName)
+                                  #M <- AuthorNameMerge(M)
                                 })
                  })
         },

diff --git a/inst/biblioshiny/utils.R b/inst/biblioshiny/utils.R
@@ -1,5 +1,69 @@
 ### COMMON FUNCTIONS ####
 
+authorNameFormat <- function(M, format){
+  if (format=="AF" & "AF" %in% names(M)){
+    M <- M %>% 
+      rename(AU_IN = .data$AU,
+             AU = .data$AF)
+  }
+  return(M)
+}
+
+split_text_numbers <- function(input_str, UT) {
+  # Split the string into components based on "; "
+  components <- unlist(strsplit(input_str, "; ", fixed = TRUE))
+
+  # Initialize two vectors to store the separated parts
+  texts <- character(length(components))
+  numbers <- numeric(length(components))
+
+  # Iterate through each component to separate text and numbers
+  for (i in seq_along(components)) {
+    # Extract the text using regex, matching everything up to " ("
+    texts[i] <- gsub("\\s\\(.*$", "", components[i])
+
+    # Extract the numbers using regex, matching digits inside parentheses
+    numbers[i] <- as.numeric(gsub(".*\\((\\d+)\\).*", "\\1", components[i]))
+  }
+
+  # Return a list with texts and numbers separated
+  data.frame(Texts = texts, Numbers = numbers, UT=UT)
+}
+
+
+AuthorNameMerge <- function(M){
+
+  df_list <- list()
+  for (i in 1:nrow(M)){
+    if(nchar(M$AU[i])>0){
+      df_list[[i]] <- split_text_numbers(M$AU[i],M$UT[i])
+    }
+  }
+
+  df <- do.call(rbind,df_list)
+
+  AU <- df %>% 
+    group_by(.data$Numbers, .data$Texts) %>% 
+    count() %>% 
+    group_by(.data$Numbers) %>%
+    arrange(desc(.data$n)) %>% 
+    mutate(AU = .data$Texts[1]) %>% 
+    select(-"n", - "Texts") %>% 
+    ungroup() %>% 
+    distinct()
+
+  df <- df %>% 
+    left_join(AU, by = "Numbers") %>% 
+    group_by(UT) %>% 
+    summarize(AU = paste0(AU,collapse=";"),
+              AU_ID = paste0(.data$Numbers, collapse=";"))
+
+  M <- M %>% 
+    rename(AU_original = .data$AU) %>% 
+    left_join(df, by="UT")
+  return(M)
+}
+
 getFileNameExtension <- function (fn) {
   # remove a path
   splitted    <- strsplit(x=fn, split='/')[[1]]