R语言排序分析 ordination analysis

文章目录

****排序 Ordination****
非约束性排序

PCA
CA
PCoA
NMDS

**约束性排序**
RDA

排序 Ordination

按照有无解释变量，可分为非约束性排序（unconstrained ordination）与约束性排序（constrained ordination）。

非约束性排序

PCA 线性模型，基于原始矩阵
CA 单峰模型，基于原始矩阵
PCoA 基于原始距离矩阵
NMDS 基于秩和距离矩阵
Clustering

PCA

PCA计算的是欧氏距离。

1
2
3
4
5
6
7

library(vegan)
library(tidyverse)
library(ggpubr)
#l对iris做PCA
model_pca<-rda(iris[,-5],scale=T,scaling=1)
#提取特征值与方差解释度%
summary(model_pca)$cont$importance #特征值与方差解释度

1
2
3
4
5

## Importance of components:
## PC1 PC2 PC3 PC4
## Eigenvalue 2.9185 0.9140 0.14676 0.020715
## Proportion Explained 0.7296 0.2285 0.03669 0.005179
## Cumulative Proportion 0.7296 0.9581 0.99482 1.000000

1 2	#载荷矩阵loading head(model_pca$CA$v)

1
2
3
4
5

## PC1 PC2 PC3 PC4
## Sepal.Length 0.5210659 -0.37741762 0.7195664 0.2612863
## Sepal.Width -0.2693474 -0.92329566 -0.2443818 -0.1235096
## Petal.Length 0.5804131 -0.02449161 -0.1421264 -0.8014492
## Petal.Width 0.5648565 -0.06694199 -0.6342727 0.5235971

1
2
3
4
5
6

#site scores
sites_score<-scores(model_pca,display = "sites")%>%
as.data.frame()%>%
rownames_to_column()%>%
as_tibble()
sites_score

1
2
3
4
5
6
7
8
9
10
11
12
13
14

## # A tibble: 150 x 3
## rowname PC1 PC2
## <chr> <dbl> <dbl>
## 1 sit1 -0.535 -0.203
## 2 sit2 -0.491 0.284
## 3 sit3 -0.558 0.144
## 4 sit4 -0.543 0.252
## 5 sit5 -0.564 -0.273
## 6 sit6 -0.490 -0.628
## 7 sit7 -0.577 -0.0201
## 8 sit8 -0.527 -0.0942
## 9 sit9 -0.551 0.471
## 10 sit10 -0.516 0.198
## # ... with 140 more rows

1
2
3
4
5
6

#specoes scores
species_score<-scores(model_pca,display = "species")%>%
as.data.frame()%>%
rownames_to_column()%>%
as_tibble()
species_score

1
2
3
4
5
6
7

## # A tibble: 4 x 3
## rowname PC1 PC2
## <chr> <dbl> <dbl>
## 1 Sepal.Length 2.20 -0.891
## 2 Sepal.Width -1.14 -2.18
## 3 Petal.Length 2.45 -0.0578
## 4 Petal.Width 2.38 -0.158

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

#可视化
ggplot()+
geom_point(data=sites_score,aes(x=PC1,y=PC2,color=iris[,5]))+
labs(x=paste("PC1 (",round(summary(model_pca)$cont$importance [2,1]*100,2)
,"% )",sep=""),
y=paste("PC2 (",round(summary(model_pca)$cont$importance [2,2]*100,2)
,"% )",sep=""))+
geom_segment(data=species_score,aes(x=0,y=0,xend=PC1,yend=PC2),size = 1,
arrow = arrow(length = unit(0.2, "inches")),color="steelblue")+
ggrepel::geom_text_repel(data=species_score,nudge_x = 0.4,
fontface="bold", color="black",
aes(x=PC1,y=PC2,label=rowname))+
stat_ellipse(type="norm",level= 0.9,
data=sites_score,aes(x=PC1,y=PC2,color=iris[,5]))+
theme_bw()

在这里插入图片描述

CA

CA计算用的是卡方距离。

1
2
3
4

#l对iris做PCA
model_ca<-cca(iris[,-5],scale=T,scaling=1)
#提取特征值与方差解释度%
summary(model_ca)$cont$importance #特征值与方差解释度

1
2
3
4
5

## Importance of components:
## CA1 CA2 CA3
## Eigenvalue 0.06129 0.00220 0.0006302
## Proportion Explained 0.95585 0.03432 0.0098288
## Cumulative Proportion 0.95585 0.99017 1.0000000

1 2	#载荷矩阵loading head(model_ca$CA$v)

1
2
3
4
5

## CA1 CA2 CA3
## Sepal.Length -0.4118927 0.6554888 0.8787892
## Sepal.Width -1.2425352 -1.0612937 -0.9286927
## Petal.Length 1.0958023 0.5920802 -1.0659541
## Petal.Width 1.7406720 -2.3434255 1.4258929

1
2
3
4
5
6

#site scores
sites_score<-scores(model_ca,display = "sites")%>%
as.data.frame()%>%
rownames_to_column()%>%
as_tibble()
sites_score

1
2
3
4
5
6
7
8
9
10
11
12
13
14

## # A tibble: 150 x 3
## rowname CA1 CA2
## <chr> <dbl> <dbl>
## 1 sit1 -1.81 -0.0236
## 2 sit2 -1.64 0.871
## 3 sit3 -1.78 -0.0325
## 4 sit4 -1.61 0.328
## 5 sit5 -1.84 -0.382
## 6 sit6 -1.60 -0.992
## 7 sit7 -1.69 -1.03
## 8 sit8 -1.72 0.187
## 9 sit9 -1.60 0.399
## 10 sit10 -1.71 1.28
## # ... with 140 more rows

1
2
3
4
5
6

#species scores
species_score<-scores(model_ca,display = "species")%>%
as.data.frame()%>%
rownames_to_column()%>%
as_tibble()
species_score

1
2
3
4
5
6
7

## # A tibble: 4 x 3
## rowname CA1 CA2
## <chr> <dbl> <dbl>
## 1 Sepal.Length -0.102 0.0307
## 2 Sepal.Width -0.308 -0.0498
## 3 Petal.Length 0.271 0.0278
## 4 Petal.Width 0.431 -0.110

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

#plot
ggplot()+
geom_point(data=sites_score,aes(x=CA1,y=CA2,color=iris[,5]))+
labs(x=paste("CA1 (",round(summary(model_ca)$cont$importance [2,1]*100,2)
,"% )",sep=""),
y=paste("CA2 (",round(summary(model_ca)$cont$importance [2,2]*100,2)
,"% )",sep=""))+
geom_segment(data=species_score,aes(x=0,y=0,xend=CA1,yend=CA2),size = 1,
arrow = arrow(length = unit(0.2, "inches")),color="steelblue")+
ggrepel::geom_text_repel(data=species_score,nudge_x = 0.4,
fontface="bold", color="black",
aes(x=CA1,y=CA2,label=rowname))+
stat_ellipse(type="norm",level= 0.9,
data=sites_score,aes(x=CA1,y=CA2,color=iris[,5]))+

theme_bw()

在这里插入图片描述

PCoA

PCoA可以采用其它距离矩阵（非欧式、卡方距离），如Bray-Cuurtis距离等。

1
2
3
4
5
6
7
8

#为防止出现大量负值特征值，需要对距离开方转换
model_pcoa<-vegdist(iris[,1:4],method="jaccard")%>% #计算距离
sqrt()%>% #平方根转换
cmdscale(k=2,eig=T) #多维标度转换

#变异解释
pcoa_explain<-round(model_pcoa$eig/sum(model_pcoa$eig)*100)
pcoa_explain

1
2
3
4
5
6

## [1] 52 13 5 4 3 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0
## [28] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [55] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [82] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [109] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [136] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

1
2
3
4
5
6
7
8
9
10
11

#可视化
ggplot()+
geom_point(aes(x=model_pcoa$points[,1],
y=model_pcoa$points[,2],
color=iris[,5]))+
labs(x=paste("PCoA1 (",pcoa_explain[1],"% )",sep=""),
y=paste("PCoA2 (",pcoa_explain[2],"% )",sep=""))+
stat_ellipse(type="norm",level= 0.9,
aes(x=model_pcoa$points[,1],
y=model_pcoa$points[,2],color=iris[,5]))+
theme_bw()

在这里插入图片描述

NMDS

NMDS对距离采用rank排序的方式处理。
According to Clarke and Warwick 2001:

stress < 0.05: excellent representation
stress < 0.1: good representation
stress < 0.2: acceptable representation
stress > 0.3: unsatisfactory representation

1	model_nmds<-metaMDS(iris[,-5],distance="bray")

1
2
3
4
5
6
7
8
9

## Run 0 stress 0.03775523
## Run 1 stress 0.03775544
## ... Procrustes: rmse 3.313225e-05 max resid 0.000108817
## ... Similar to previous best
## Run 2 stress 0.03775554
## ... Procrustes: rmse 4.837426e-05 max resid 0.0001586252
## ... Similar to previous best
......
## *** Solution reached

1	print(model_nmds$stress)

1	## [1] 0.03775523

1	head(model_nmds$points)

1
2
3
4
5
6
7

## MDS1 MDS2
## 1 -0.3306354 0.04482984
## 2 -0.3353751 -0.03146390
## 3 -0.3707664 -0.01345794
## 4 -0.3577813 -0.03315096
## 5 -0.3441011 0.04848315
## 6 -0.2648452 0.10403667

1
2
3
4
5
6
7
8
9
10

ggplot()+
geom_point(aes(x=model_nmds$points[,1],y=model_nmds$points[,2],color=iris[,5]))+
labs(x="NMDS1",y="NMDS2")+
annotate(geom='text',x=median(model_nmds$points[,2]),
y=max(model_nmds$points[,2]),
label=paste("stress=",round(model_nmds$stress,3),sep=""))+
stat_ellipse(type="norm",level= 0.9,
aes(x=model_nmds$points[,1],
y=model_nmds$points[,2],color=iris[,5]))+
theme_bw()

在这里插入图片描述

约束性排序

RDA

1
2
3
4
5
6
7

library(vegan)
data(dune)
data(dune.env)
model_rda<-rda(dune~.,data=dune.env,scale=T)
explain<-round(summary(eigenvals(model_rda))[2,1:2]*100) #explain%
rda_scores<-scores(model_rda,display=c("wa","sp","bp","cn"),scaling=1)
rda_scores$sites

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21

## RDA1 RDA2
## 1 0.17012176 0.2048313
## 2 0.68187820 0.2013668
## 3 0.21124601 0.5171633
## 4 0.18100214 0.7088526
## 5 0.69515894 -0.1751914
## 6 0.69696718 -0.4193920
## 7 0.59278613 -0.1098875
## 8 -0.30730636 0.3066177
## 9 -0.01054738 0.5613862
## 10 0.79625359 -0.2566410
## 11 0.26659300 -0.3943883
## 12 -0.26793763 0.4836942
## 13 -0.32866543 0.6546796
## 14 -0.66940722 -0.1760738
## 15 -0.67360200 -0.1135603
## 16 -0.83383112 0.1572147
## 17 -0.03519031 -0.4439119
## 18 0.13726362 -0.4754222
## 19 -0.39800381 -0.9159053
## 20 -0.90477932 -0.3154327

1	rda_scores$species

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

## RDA1 RDA2
## Achimill 1.38853776 -0.46240331
## Agrostol -1.13099958 1.29004038
## Airaprae -0.43971307 -1.00612947
## Alopgeni -0.38786559 1.83152559
## Anthodor 0.70586460 -1.12519719
## Bellpere 1.10048587 0.20468799
## Bromhord 1.16709674 0.27991718
## Chenalbu -0.36345744 0.61594353
## Cirsarve 0.17045040 0.74300739
## Comapalu -0.88628201 -0.26489086
## Eleopalu -1.26177786 -0.04904603
## Elymrepe 0.76654345 1.11501096
## Empenigr -0.57537471 -0.72118181
## Hyporadi -0.37510919 -1.11407684
## Juncarti -0.86649439 0.26819947
## Juncbufo -0.19547681 1.23118128
## Lolipere 1.44104660 0.37715419
## Planlanc 1.28192072 -1.07778092
## Poaprat 1.41566983 0.75238004
## Poatriv 0.85067568 1.56875431
## Ranuflam -1.35268248 -0.21521734
## Rumeacet 0.91244945 -0.18679682
## Sagiproc -0.34871475 0.99052287
## Salirepe -0.69696904 -1.31597734
## Scorautu 0.52688790 -1.23513319
## Trifprat 0.90176681 -0.55673270
## Trifrepe 0.61002630 -0.25632153
## Vicilath 0.54665670 -0.87435551
## Bracruta -0.07515391 -0.72475692
## Callcusp -1.17488480 -0.57552593

1	rda_scores$biplot

1
2
3
4
5
6
7
8
9
10
11
12
13

## RDA1 RDA2
## A1 -0.22839753 0.01363636
## Moisture.L -0.41894583 0.05020896
## Moisture.Q -0.08736320 -0.15683991
## Moisture.C -0.07699492 -0.01995266
## ManagementHF 0.17618401 0.01728307
## ManagementNM -0.25402460 -0.24369745
## ManagementSF -0.08668791 0.27227133
## Use.L -0.07336037 0.10751561
## Use.Q -0.06268156 -0.20902587
## Manure.L 0.13565541 0.28001645
## Manure.Q -0.20728884 -0.08341861
## Manure.C 0.23839491 0.05732039

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

rda_scores_biplot<-rda_scores$biplot%>%as.data.frame()%>%rownames_to_column()%>%as_tibble()
ggplot()+
geom_point(data=as.data.frame(rda_scores$sites),
aes(x=RDA1,y=RDA2),color="steelblue",size=2)+
stat_ellipse(data=as.data.frame(rda_scores$sites),
aes(x=RDA1,y=RDA2),type="norm",level= 0.9)+
geom_point(data=as.data.frame(rda_scores$species),
aes(x=RDA1,y=RDA2),color="gray",size=1)+
geom_segment(data=as.data.frame(rda_scores_biplot),
aes(x=0,y=0,xend=RDA1,yend=RDA2),
size = 1,arrow = arrow(length = unit(0.2, "inches")),color="black")+
ggrepel::geom_text_repel(
data=rda_scores_biplot,
aes(x=RDA1,y=RDA2,label=rowname),
nudge_x = 0.4,fontface="bold", color="black")+
theme_bw()

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-QVOiyl4M-1591709464906)(figure/unnamed-chunk-5-1.png)]

CCA
dbRDA