Skip to content

Commit

Permalink
Merge pull request #424 from massimoaria/develop
Browse files Browse the repository at this point in the history
Removed dependence from FactoMiner
  • Loading branch information
massimoaria authored Feb 28, 2024
2 parents 331828c + 936a1ec commit 05db9a8
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 118 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: bibliometrix
Type: Package
Title: Comprehensive Science Mapping Analysis
Version: 4.1.4
Version: 4.0.0
Authors@R: c(
person(given = "Massimo",
family = "Aria",
Expand Down Expand Up @@ -31,7 +31,7 @@ Imports: stats,
dimensionsR,
dplyr,
DT,
FactoMineR,
ca,
forcats,
ggplot2,
ggrepel,
Expand Down
4 changes: 1 addition & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ export(trim)
export(trim.leading)
export(trimES)
import(bibliometrixData)
import(ca)
import(dimensionsR)
import(forcats)
import(ggplot2)
Expand All @@ -65,9 +66,6 @@ import(tidytext)
importFrom(DT,DTOutput)
importFrom(DT,datatable)
importFrom(DT,renderDT)
importFrom(FactoMineR,CA)
importFrom(FactoMineR,MCA)
importFrom(FactoMineR,PCA)
importFrom(Matrix,"%&%")
importFrom(Matrix,"diag<-")
importFrom(Matrix,Arith)
Expand Down
179 changes: 80 additions & 99 deletions R/conceptualStructure.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
utils::globalVariables(c("label"))
#' Creating and plotting conceptual structure map of a scientific field
#'
#' The function \code{conceptualStructure} creates a conceptual structure map of
Expand Down Expand Up @@ -64,17 +65,17 @@ conceptualStructure<-function(M,field="ID", ngrams=1, method="MCA", quali.supp=N
#cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
cbPalette <- colorlist()#c(brewer.pal(9, 'Set1')[-6], brewer.pal(8, 'Set2')[-7], brewer.pal(12, 'Paired')[-11],brewer.pal(12, 'Set3')[-c(2,8,12)])

if (!is.null(quali.supp)){
QSUPP=data.frame(M[,quali.supp])
names(QSUPP)=names(M)[quali.supp]
row.names(QSUPP)=tolower(row.names(M))
}

if (!is.null(quanti.supp)){
SUPP=data.frame(M[,quanti.supp])
names(SUPP)=names(M)[quanti.supp]
row.names(SUPP)=tolower(row.names(M))
}
# if (!is.null(quali.supp)){
# QSUPP=data.frame(M[,quali.supp])
# names(QSUPP)=names(M)[quali.supp]
# row.names(QSUPP)=tolower(row.names(M))
# }
#
# if (!is.null(quanti.supp)){
# SUPP=data.frame(M[,quanti.supp])
# names(SUPP)=names(M)[quanti.supp]
# row.names(SUPP)=tolower(row.names(M))
# }
binary=FALSE
if (method=="MCA"){binary=TRUE}

Expand Down Expand Up @@ -157,20 +158,20 @@ conceptualStructure<-function(M,field="ID", ngrams=1, method="MCA", quali.supp=N
quali=NULL
quanti=NULL
# Perform Multiple Correspondence Analysis (MCA)
if (!is.null(quali.supp)){
ind=which(row.names(QSUPP) %in% row.names(CW))
QSUPP=as.data.frame(QSUPP[ind,])
CW=cbind(CW,QSUPP)
quali=(p+1):dim(CW)[2]
names(CW)[quali]=names(M)[quali.supp]
}
if (!is.null(quanti.supp)){
ind=which(row.names(SUPP) %in% row.names(CW))
SUPP=as.data.frame(SUPP[ind,])
CW=cbind(CW,SUPP)
quanti=(p+1+length(quali)):dim(CW)[2]
names(CW)[quanti]=names(M)[quanti.supp]
}
# if (!is.null(quali.supp)){
# ind=which(row.names(QSUPP) %in% row.names(CW))
# QSUPP=as.data.frame(QSUPP[ind,])
# CW=cbind(CW,QSUPP)
# quali=(p+1):dim(CW)[2]
# names(CW)[quali]=names(M)[quali.supp]
# }
# if (!is.null(quanti.supp)){
# ind=which(row.names(SUPP) %in% row.names(CW))
# SUPP=as.data.frame(SUPP[ind,])
# CW=cbind(CW,SUPP)
# quanti=(p+1+length(quali)):dim(CW)[2]
# names(CW)[quanti]=names(M)[quanti.supp]
# }

results <- factorial(CW,method=method,quanti=quanti,quali=quali)
res.mca <- results$res.mca
Expand All @@ -193,10 +194,9 @@ conceptualStructure<-function(M,field="ID", ngrams=1, method="MCA", quali.supp=N

km.res$data=df
km.res$cluster=cutree(km.res,k=clust)
km.res$data.clust=cbind(km.res$data,km.res$cluster)
names(km.res$data.clust)[3]="clust"
km.res$data.clust=data.frame(km.res$data,clust=km.res$cluster)
centers<- km.res$data.clust %>% group_by(.data$clust) %>%
summarise("Dim.1"=mean(.data$Dim.1),"Dim.2"=mean(.data$Dim.2)) %>%
summarise("Dim1"=mean(.data$Dim1),"Dim2"=mean(.data$Dim2)) %>%
as.data.frame()

km.res$centers=centers[,c(2,3,1)]
Expand All @@ -213,7 +213,7 @@ conceptualStructure<-function(M,field="ID", ngrams=1, method="MCA", quali.supp=N
hull_data <-
df_clust %>%
group_by(.data$clust) %>%
slice(chull(.data$Dim.1, .data$Dim.2))
slice(chull(.data$Dim1, .data$Dim2))

hull_data <- hull_data %>%
bind_rows(
Expand All @@ -224,7 +224,7 @@ conceptualStructure<-function(M,field="ID", ngrams=1, method="MCA", quali.supp=N

size <- labelsize

b <- ggplot(df_clust, aes(x=.data$Dim.1, y=.data$Dim.2, shape=.data$shape, color=.data$color)) +
b <- ggplot(df_clust, aes(x=.data$Dim1, y=.data$Dim2, shape=.data$shape, color=.data$color)) +
geom_point() +
geom_polygon(data = hull_data,
aes(fill = .data$color,
Expand All @@ -250,28 +250,7 @@ conceptualStructure<-function(M,field="ID", ngrams=1, method="MCA", quali.supp=N
ylab(paste("Dim 2 (",round(res.mca$eigCorr$perc[2],2),"%)",sep=""))
}else{b=b+xlab("Dim 1")+ylab("Dim 2")}

if (!is.null(quali.supp)){
s_df_quali=df_quali[(abs(df_quali[,1]) >= quantile(abs(df_quali[,1]),0.75) | abs(df_quali[,2]) >= quantile(abs(df_quali[,2]),0.75)),]
names(s_df_quali)=c("x","y")
s_df_quali$label=row.names(s_df_quali)
x=s_df_quali$x
y=s_df_quali$y
label=s_df_quali$label
b=b+geom_point(aes(x=x,y=y),data=s_df_quali,colour="red",size=1) +
geom_label_repel(aes(x=x,y=y,label=label,size=1),data=s_df_quali)
}

if (!is.null(quanti.supp)){
names(df_quanti)=c("x","y")
df_quanti$label=row.names(df_quanti)
x=df_quanti$x
y=df_quanti$y
label=df_quanti$label
b=b+geom_point(aes(x=x,y=y),data=df_quanti,colour="blue",size=1) +
geom_label_repel(aes(x=x,y=y,label=label,size=1),data=df_quanti) +
geom_segment(data=df_quanti,aes(x=0,y=0,xend = x, yend = y), size=1.5,arrow = arrow(length = unit(0.3,"cm")))
}
b=b + theme(legend.position="none")
b <- b + theme(legend.position="none")

## logo coordinates
coord_b <- plotCoord(b)
Expand Down Expand Up @@ -417,12 +396,12 @@ conceptualStructure<-function(M,field="ID", ngrams=1, method="MCA", quali.supp=N
if (isTRUE(graph)){plot(b_doc_TC)}

semanticResults=list(net=CW,res=res.mca,km.res=km.res,graph_terms=b,graph_dendogram=b_dend,
graph_documents_Contrib=b_doc,graph_documents_TC=b_doc_TC,docCoord=docCoord, hull_data=hull_data)
graph_documents_Contrib=b_doc,graph_documents_TC=b_doc_TC,docCoord=docCoord, coord=results$coord, hull_data=hull_data)

}else{

semanticResults=list(net=CW,res=res.mca,km.res=km.res,graph_terms=b,graph_dendogram=b_dend,
graph_documents_Contrib=NULL,graph_documents_TC=NULL,docCoord=NULL, hull_data=hull_data)
graph_documents_Contrib=NULL,graph_documents_TC=NULL,docCoord=NULL, coord=NA, hull_data=hull_data)
}

params <- list(field = field,
Expand All @@ -447,53 +426,54 @@ conceptualStructure<-function(M,field="ID", ngrams=1, method="MCA", quali.supp=N
}


factorial<-function(X,method,quanti,quali){
factorial<-function(X,method,quanti=NULL,quali=NULL){
df_quali=data.frame()
df_quanti=data.frame()

switch(method,
### CORRESPONDENCE ANALYSIS ###
CA={
res.mca <- CA(X, quanti.sup=quanti, quali.sup=quali, ncp=2, graph=FALSE)

res.mca <- ca::ca(X, nd=2)
# Get coordinates of keywords
#coord=get_ca_col(res.mca)
#df=data.frame(coord$coord)
coord <- list(coord=res.mca$col$coord, contrib=res.mca$col$contrib, cos2=res.mca$col$cos2)
df <- data.frame(coord$coord)
if (!is.null(quali)){
df_quali=data.frame(res.mca$quali.sup$coord)
}
if (!is.null(quanti)){
df_quanti=data.frame(res.mca$quanti.sup$coord)
}
#coord_doc=get_ca_row(res.mca)
#df_doc=data.frame(coord_doc$coord)
coord_doc <- list(coord=res.mca$row$coord, contrib=res.mca$row$contrib, cos2=res.mca$row$cos2)
df_doc <- data.frame(coord_doc$coord)
coord <- list(coord=res.mca$colcoord,
contrib =data.frame((res.mca$colcoord[,1:2]^2)*res.mca$colmass),
cos2=data.frame(((res.mca$colcoord[,1:2]^2)/(res.mca$coldist))))
#df <- data.frame(coord$coord)
coord_doc <- list(coord=res.mca$rowcoord,
contrib=data.frame((res.mca$rowcoord[,1:2]^2)*res.mca$rowmass),
cos2=data.frame(((res.mca$rowcoord[,1:2]^2)/(res.mca$rowdist))))
#df_doc <- data.frame(coord_doc$coord)
},
### MULTIPLE CORRESPONDENCE ANALYSIS ###
MCA={
if(length(quanti)>0){
X[,-quanti]=data.frame(apply(X[,-quanti],2,factor))} else{X=data.frame(apply(X,2,factor))}
res.mca <- MCA(X, quanti.sup=quanti, quali.sup=quali, ncp=2, graph=FALSE)
X=data.frame(apply(X,2,factor))
res.mca <- ca::mjca(X, nd=2, lambda="indicator", ps="_")
# Get coordinates of keywords (we take only categories "1"")
#coord=get_mca_var(res.mca)
#df=data.frame(coord$coord)[seq(2,dim(coord$coord)[1],by=2),]
df <- data.frame(res.mca$var$coord)[seq(2,dim(res.mca$var$coord)[1],by=2),]
row.names(df)=gsub("_1","",row.names(df))
if (!is.null(quali)){
df_quali=data.frame(res.mca$quali.sup$coord)[seq(1,dim(res.mca$quali.sup$coord)[1],by=2),]
row.names(df_quali)=gsub("_1","",row.names(df_quali))
}
if (!is.null(quanti)){
df_quanti=data.frame(res.mca$quanti.sup$coord)[seq(1,dim(res.mca$quanti.sup$coord)[1],by=2),]
row.names(df_quanti)=gsub("_1","",row.names(df_quanti))
}
#coord_doc=get_mca_ind(res.mca)
#df_doc=data.frame(coord_doc$coord)
coord_doc <- list(coord=res.mca$ind$coord, contrib=res.mca$ind$contrib, cos2=res.mca$ind$cos2)
df_doc=data.frame(res.mca$ind$coord)
K <- 2
I <- dim(res.mca$rowcoord)[1] ; J <- dim(res.mca$colcoord)[1]
evF <- matrix(rep(res.mca$sv[1:K], I), I, K, byrow = TRUE)
evG <- matrix(rep(res.mca$sv[1:K], J), J, K, byrow = TRUE)
rpc <- res.mca$rowcoord[,1:2] * evF
cpc <- res.mca$colcoord[,1:2] * evG

coord <- list(coord=data.frame(Dim1=cpc[,1],Dim2=cpc[,2], label=res.mca$levelnames,row.names = res.mca$levelnames) %>%
dplyr::filter(substr(label,nchar(label)-1,nchar(label))=="_1") %>%
select(-"label"),
contrib = data.frame(cpc^2*res.mca$colmass/res.mca$sv[1:2], label=res.mca$levelnames,row.names = res.mca$levelnames) %>%
dplyr::filter(substr(label,nchar(label)-1,nchar(label))=="_1") %>%
select(-"label"),
cos2=data.frame(((cpc^2)/(res.mca$coldist)),label=res.mca$levelnames,row.names = res.mca$levelnames) %>%
dplyr::filter(substr(label,nchar(label)-1,nchar(label))=="_1") %>%
select(-"label")
)
row.names(coord$coord) <- row.names(coord$contrib) <- row.names(coord$cos2) <- substr(row.names(coord$coord),1,nchar(row.names(coord$coord))-2)
#df <- coord$coord

coord_doc <- list(coord=data.frame(Dim1=rpc[,1],Dim2=rpc[,1], row.names=row.names(X)),
contrib=data.frame((rpc[,1:2]^2)*res.mca$rowmass/res.mca$sv[1:2]),
cos2=data.frame(res.mca$rowmass*rpc^2 / res.mca$rowinertia)
)
#df_doc <- coord_doc$coord
},
MDS={
NetMatrix=Matrix::crossprod(X,X)
Expand All @@ -503,26 +483,28 @@ factorial<-function(X,method,quanti,quali){
res.mca <- Net %>%
#dist() %>%
cmdscale()
colnames(res.mca) <- c("Dim.1", "Dim.2")
colnames(res.mca) <- c("Dim1", "Dim2")
df=data.frame(res.mca)
row.names(df)=row.names(Net)
}
)

if (method!="MDS"){
#
docCoord=as.data.frame(cbind(df_doc,rowSums(coord_doc$contrib)))
docCoord=as.data.frame(cbind(coord_doc$coord,rowSums(coord_doc$contrib)))
names(docCoord)=c("dim1","dim2","contrib")
docCoord=docCoord[order(-docCoord$contrib),]

# Benzecrì eigenvalue correction
res.mca <- eigCorrection(res.mca)

results=list(res.mca=res.mca,df=df,df_doc=df_doc,df_quali=df_quali,df_quanti=df_quanti,docCoord=docCoord)
res.mca$coord_doc <- coord_doc

results=list(res.mca=res.mca,df=coord$coord,df_doc=coord_doc$coord,df_quali=df_quali,df_quanti=df_quanti,docCoord=docCoord, coord=coord)


}else{
results=list(res.mca=res.mca,df=df,df_doc=NA,df_quali=NA,df_quanti=NA,docCoord=NA)
results=list(res.mca=res.mca,df=df,df_doc=NA,df_quali=NA,df_quanti=NA,docCoord=NA, coord=NA)
}
return(results)
}
Expand All @@ -543,11 +525,10 @@ euclDist<-function(x,y){

eigCorrection <- function(res) {
# Benzecri correction calculation

n <- nrow(res$eig)


e <- res$eig[,1]
n <- length(res$sv)
e <- res$sv^2
#n <- nrow(res$eig)
#e <- res$eig[,1]
eigBenz <- ((n / (n - 1)) ^ 2) * ((e - (1 / n)) ^ 2)
eigBenz[e< 1/n] <- 0
perc <- eigBenz / sum(eigBenz) * 100
Expand Down
7 changes: 4 additions & 3 deletions R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#' @import stringi
#' @import tidytext
#' @import openalexR
#' @import ca
#' @importFrom dplyr %>%
#' @importFrom dplyr across
#' @importFrom dplyr row_number
Expand Down Expand Up @@ -92,9 +93,9 @@
#' @importFrom rscopus author_search
#' @importFrom rscopus get_complete_author_info
# @importFrom RColorBrewer brewer.pal
#' @importFrom FactoMineR MCA
#' @importFrom FactoMineR CA
#' @importFrom FactoMineR PCA
# @importFrom FactoMineR MCA
# @importFrom FactoMineR CA
# @importFrom FactoMineR PCA
# @importFrom factoextra get_mca_var
# @importFrom factoextra get_mca_ind
# @importFrom factoextra get_ca_row
Expand Down
21 changes: 10 additions & 11 deletions inst/biblioshiny/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -823,8 +823,8 @@ CAmap <- function(input, values){
cluster=values$CS$km.res$cluster)
})

WData$Dim.1=round(WData$Dim.1,2)
WData$Dim.2=round(WData$Dim.2,2)
WData$Dim1=round(WData$Dim1,2)
WData$Dim2=round(WData$Dim2,2)
values$CS$WData <- WData

}else{emptyPlot("Selected field is not included in your data collection")
Expand Down Expand Up @@ -1645,23 +1645,22 @@ ca2plotly <- function(CS, method="MCA", dimX = 1, dimY = 2, topWordPlot = Inf, t

switch(method,
CA={
contrib = rowSums(CS$res$col$contrib %>% as.data.frame())/2
wordCoord <- CS$res$col$coord[,1:2] %>%
contrib = rowSums(CS$coord$contrib %>% as.data.frame())/2
wordCoord <- CS$coord$coord[,1:2] %>%
data.frame() %>%
mutate(label = row.names(CS$res$col$coord),
mutate(label = row.names(CS$coord$coord),
contrib = contrib) %>%
select(c(3,1,2,4))
xlabel <- paste0("Dim 1 (",round(CS$res$eigCorr$perc[1],2),"%)")
ylabel <- paste0("Dim 2 (",round(CS$res$eigCorr$perc[2],2),"%)")
},
MCA={
contrib =rowSums(CS$res$var$contrib)/2
wordCoord <- CS$res$var$coord[,1:2] %>%
contrib = rowSums(CS$coord$contrib %>% as.data.frame())/2
wordCoord <- CS$coord$coord[,1:2] %>%
data.frame() %>%
mutate(label = row.names(CS$res$var$coord),
mutate(label = row.names(CS$coord$coord),
contrib = contrib) %>%
select(c(3,1,2,4)) %>%
filter(substr(label,nchar(label)-1,nchar(label))=="_1")
select(c(3,1,2,4))
xlabel <- paste0("Dim 1 (",round(CS$res$eigCorr$perc[1],2),"%)")
ylabel <- paste0("Dim 2 (",round(CS$res$eigCorr$perc[2],2),"%)")
},
Expand Down Expand Up @@ -1725,7 +1724,7 @@ ca2plotly <- function(CS, method="MCA", dimX = 1, dimY = 2, topWordPlot = Inf, t
Dim2 = Dim2+dotSize*0.01)
if (max(CS$hull_data$clust)>1){
hull_df <- CS$hull_data %>% dplyr::filter(.data$clust==i)
fig <- fig %>% add_polygons(x = hull_df$Dim.1, y=hull_df$Dim.2, inherit = FALSE, showlegend = FALSE,
fig <- fig %>% add_polygons(x = hull_df$Dim1, y=hull_df$Dim2, inherit = FALSE, showlegend = FALSE,
color = I(hull_df$color[1]), opacity=0.3, line=list(width=2),
text=paste0("Cluster ",i), hoverinfo = 'text', hoveron="points")
}
Expand Down

0 comments on commit 05db9a8

Please sign in to comment.