-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path04 - Find_number_clusters.R
74 lines (65 loc) · 3.37 KB
/
04 - Find_number_clusters.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# Clustering
rm(list=ls())
library(tidyverse)
library(sf)
library(terra)
library(cluster)
library(NbClust)
library(factoextra)
library(hopkins)
# Load data
## Planning units
load("Planning_units.RData")
# Open genetic raster and Extract values for the centroid of each PU
g_rast_values <- list()
g_rast <- rast(paste0(getwd(),"/Results/Diplodus_allAxes_8068.grd"))
g_rast_values[[1]] <- terra::extract(g_rast,pus_centroid)
g_rast <- rast(paste0(getwd(),"/Results/Mullus_allAxes_2753.grd"))
g_rast_values[[2]] <- terra::extract(g_rast,pus_centroid)
rm(g_rast)
# https://rstudio-pubs-static.s3.amazonaws.com/375287_5021917f670c435bb0458af333716136.html
# Diplodus sargus
num_axes <- 17
# Create an object containing the coordinates of the occupied PU
# in all genetic PCA axes: need this for finding clusters
pus_g_values <- cbind(g_rast_values[[1]], st_drop_geometry(pus))
names(pus_g_values)[1:ncol(g_rast_values[[1]])] <- paste0("spca",sprintf("%02d",1:ncol(g_rast_values[[1]])))
species_coord <- filter(pus_g_values, Diplodus_sargus == 1)
species_coord <- species_coord[1:num_axes]
rm(pus_g_values)
# Hopkins test to see if the data can be clustered
hopkins <- hopkins::hopkins(species_coord,m=500)
# pca to get a flavour of the possible number of clusters
fviz_pca_ind(prcomp(species_coord), title = "PCA - diplodus", palette = "jco", geom = "point", ggtheme = theme_classic(), legend = "bottom")
# run different algorithms to find the number of clusters
clusternum <- NbClust(species_coord, distance="euclidean", method="kmeans", index="all", max.nc = 30)
# print results
clusternum$Best.nc[1,] %>% table %>% sort(decreasing=T) %>% names %>% as.numeric %>% print
save(hopkins, clusternum, file="Results/Clustering_Diplodus.RData")
write.csv(clusternum$All.index,file="Results/Clustering_Diplodus_allIndex.csv")
t(clusternum$Best.nc) -> clustering_Diplodus_Best.nc
# Mullus surmuletus
num_axes <- 26
# Create an object containing the coordinates of the occupied PU
# in all genetic PCA axes: need this for finding clusters
pus_g_values <- cbind(g_rast_values[[2]], st_drop_geometry(pus))
names(pus_g_values)[1:ncol(g_rast_values[[2]])] <- paste0("spca",sprintf("%02d",1:ncol(g_rast_values[[2]])))
species_coord <- filter(pus_g_values, Mullus_surmuletus == 1)
species_coord <- species_coord[1:num_axes]
rm(pus_g_values)
# Hopkins test to see if the data can be clustered
hopkins <- hopkins::hopkins(species_coord,m=500)
# pca to get a flavour of the possible number of clusters
fviz_pca_ind(prcomp(species_coord), title = "PCA - Mullus", palette = "jco", geom = "point", ggtheme = theme_classic(), legend = "bottom")
# run different algorithms to find the number of clusters
clusternum <- NbClust(species_coord, distance="euclidean", method="kmeans", index="all", max.nc = 30)
# print results
clusternum$Best.nc[1,] %>% table %>% sort(decreasing=T) %>% names %>% as.numeric %>% print
save(hopkins, clusternum, file="Results/Clustering_Mullus.RData")
write.csv(clusternum$All.index,file="Results/Clustering_Mullus_allIndex.csv")
t(clusternum$Best.nc) -> clustering_Mullus_Best.nc
# Table S2: List of the 26 clustering indices evaluated by the NbClust function
# to find the most likely number of multidimensional genetic clusters of the
# two species, with the most likely number of clusters and the value of the index.
cbind(clustering_Diplodus_Best.nc, clustering_Mullus_Best.nc) %>%
write.csv(file="Results/Clustering_BestNc.csv")