R语言排序分析 ordination analysis

文章目录

    • ****排序 Ordination****
    • 非约束性排序
      • PCA
      • CA
      • PCoA
      • NMDS
    • **约束性排序**
    • RDA

排序 Ordination

按照有无解释变量,可分为非约束性排序(unconstrained ordination)与约束性排序(constrained ordination)。

非约束性排序

  • PCA 线性模型,基于原始矩阵
  • CA 单峰模型,基于原始矩阵
  • PCoA 基于原始距离矩阵
  • NMDS 基于秩和距离矩阵
  • Clustering

PCA

PCA计算的是欧氏距离。

1
2
3
4
5
6
7
library(vegan)
library(tidyverse)
library(ggpubr)
#l对iris做PCA
model_pca<-rda(iris[,-5],scale=T,scaling=1)
#提取特征值与方差解释度%
summary(model_pca)$cont$importance #特征值与方差解释度
1
2
3
4
5
## Importance of components:
##                          PC1    PC2     PC3      PC4
## Eigenvalue            2.9185 0.9140 0.14676 0.020715
## Proportion Explained  0.7296 0.2285 0.03669 0.005179
## Cumulative Proportion 0.7296 0.9581 0.99482 1.000000
1
2
#载荷矩阵loading
head(model_pca$CA$v)
1
2
3
4
5
##                     PC1         PC2        PC3        PC4
## Sepal.Length  0.5210659 -0.37741762  0.7195664  0.2612863
## Sepal.Width  -0.2693474 -0.92329566 -0.2443818 -0.1235096
## Petal.Length  0.5804131 -0.02449161 -0.1421264 -0.8014492
## Petal.Width   0.5648565 -0.06694199 -0.6342727  0.5235971
1
2
3
4
5
6
#site scores
sites_score<-scores(model_pca,display = "sites")%>%
  as.data.frame()%>%
  rownames_to_column()%>%
  as_tibble()
sites_score

1
2
3
4
5
6
7
8
9
10
11
12
13
14
## # A tibble: 150 x 3
##    rowname    PC1     PC2
##    <chr>    <dbl>   <dbl>
##  1 sit1    -0.535 -0.203
##  2 sit2    -0.491  0.284
##  3 sit3    -0.558  0.144
##  4 sit4    -0.543  0.252
##  5 sit5    -0.564 -0.273
##  6 sit6    -0.490 -0.628
##  7 sit7    -0.577 -0.0201
##  8 sit8    -0.527 -0.0942
##  9 sit9    -0.551  0.471
## 10 sit10   -0.516  0.198
## # ... with 140 more rows
1
2
3
4
5
6
#specoes scores
species_score<-scores(model_pca,display = "species")%>%
  as.data.frame()%>%
  rownames_to_column()%>%
  as_tibble()
species_score
1
2
3
4
5
6
7
## # A tibble: 4 x 3
##   rowname        PC1     PC2
##   <chr>        <dbl>   <dbl>
## 1 Sepal.Length  2.20 -0.891
## 2 Sepal.Width  -1.14 -2.18  
## 3 Petal.Length  2.45 -0.0578
## 4 Petal.Width   2.38 -0.158
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#可视化
  ggplot()+
    geom_point(data=sites_score,aes(x=PC1,y=PC2,color=iris[,5]))+
    labs(x=paste("PC1 (",round(summary(model_pca)$cont$importance [2,1]*100,2)
                 ,"% )",sep=""),
         y=paste("PC2 (",round(summary(model_pca)$cont$importance [2,2]*100,2)
                 ,"% )",sep=""))+
    geom_segment(data=species_score,aes(x=0,y=0,xend=PC1,yend=PC2),size = 1,
                 arrow = arrow(length = unit(0.2, "inches")),color="steelblue")+
    ggrepel::geom_text_repel(data=species_score,nudge_x = 0.4,
                             fontface="bold", color="black",
                             aes(x=PC1,y=PC2,label=rowname))+
    stat_ellipse(type="norm",level= 0.9,
                  data=sites_score,aes(x=PC1,y=PC2,color=iris[,5]))+
    theme_bw()

在这里插入图片描述

CA

CA计算用的是卡方距离。

1
2
3
4
#l对iris做PCA
model_ca<-cca(iris[,-5],scale=T,scaling=1)
#提取特征值与方差解释度%
summary(model_ca)$cont$importance #特征值与方差解释度
1
2
3
4
5
## Importance of components:
##                           CA1     CA2       CA3
## Eigenvalue            0.06129 0.00220 0.0006302
## Proportion Explained  0.95585 0.03432 0.0098288
## Cumulative Proportion 0.95585 0.99017 1.0000000
1
2
#载荷矩阵loading
head(model_ca$CA$v)
1
2
3
4
5
##                     CA1        CA2        CA3
## Sepal.Length -0.4118927  0.6554888  0.8787892
## Sepal.Width  -1.2425352 -1.0612937 -0.9286927
## Petal.Length  1.0958023  0.5920802 -1.0659541
## Petal.Width   1.7406720 -2.3434255  1.4258929
1
2
3
4
5
6
#site scores
sites_score<-scores(model_ca,display = "sites")%>%
  as.data.frame()%>%
  rownames_to_column()%>%
  as_tibble()
sites_score
1
2
3
4
5
6
7
8
9
10
11
12
13
14
## # A tibble: 150 x 3
##    rowname   CA1     CA2
##    <chr>   <dbl>   <dbl>
##  1 sit1    -1.81 -0.0236
##  2 sit2    -1.64  0.871
##  3 sit3    -1.78 -0.0325
##  4 sit4    -1.61  0.328
##  5 sit5    -1.84 -0.382
##  6 sit6    -1.60 -0.992
##  7 sit7    -1.69 -1.03  
##  8 sit8    -1.72  0.187
##  9 sit9    -1.60  0.399
## 10 sit10   -1.71  1.28  
## # ... with 140 more rows
1
2
3
4
5
6
#species scores
species_score<-scores(model_ca,display = "species")%>%
  as.data.frame()%>%
  rownames_to_column()%>%
  as_tibble()
species_score

1
2
3
4
5
6
7
## # A tibble: 4 x 3
##   rowname         CA1     CA2
##   <chr>         <dbl>   <dbl>
## 1 Sepal.Length -0.102  0.0307
## 2 Sepal.Width  -0.308 -0.0498
## 3 Petal.Length  0.271  0.0278
## 4 Petal.Width   0.431 -0.110
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#plot
ggplot()+
    geom_point(data=sites_score,aes(x=CA1,y=CA2,color=iris[,5]))+
    labs(x=paste("CA1 (",round(summary(model_ca)$cont$importance [2,1]*100,2)
                 ,"% )",sep=""),
         y=paste("CA2 (",round(summary(model_ca)$cont$importance [2,2]*100,2)
                 ,"% )",sep=""))+
    geom_segment(data=species_score,aes(x=0,y=0,xend=CA1,yend=CA2),size = 1,
                 arrow = arrow(length = unit(0.2, "inches")),color="steelblue")+
    ggrepel::geom_text_repel(data=species_score,nudge_x = 0.4,
                             fontface="bold", color="black",
                             aes(x=CA1,y=CA2,label=rowname))+
    stat_ellipse(type="norm",level= 0.9,
                  data=sites_score,aes(x=CA1,y=CA2,color=iris[,5]))+

    theme_bw()

在这里插入图片描述

PCoA

PCoA可以采用其它距离矩阵(非欧式、卡方距离),如Bray-Cuurtis距离等。

1
2
3
4
5
6
7
8
#为防止出现大量负值特征值,需要对距离开方转换
model_pcoa<-vegdist(iris[,1:4],method="jaccard")%>% #计算距离
  sqrt()%>% #平方根转换
  cmdscale(k=2,eig=T) #多维标度转换

#变异解释
pcoa_explain<-round(model_pcoa$eig/sum(model_pcoa$eig)*100)
pcoa_explain
1
2
3
4
5
6
##   [1] 52 13  5  4  3  2  2  2  1  1  1  1  1  1  1  1  1  1  0  0  0  0  0  0  0  0  0
##  [28]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##  [55]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##  [82]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [109]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [136]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
1
2
3
4
5
6
7
8
9
10
11
#可视化
ggplot()+
  geom_point(aes(x=model_pcoa$points[,1],
                 y=model_pcoa$points[,2],
                 color=iris[,5]))+
  labs(x=paste("PCoA1 (",pcoa_explain[1],"% )",sep=""),
         y=paste("PCoA2 (",pcoa_explain[2],"% )",sep=""))+
  stat_ellipse(type="norm",level= 0.9,
                  aes(x=model_pcoa$points[,1],
                      y=model_pcoa$points[,2],color=iris[,5]))+
  theme_bw()

在这里插入图片描述

NMDS

NMDS对距离采用rank排序的方式处理。
According to Clarke and Warwick 2001:

  • stress < 0.05: excellent representation
  • stress < 0.1: good representation
  • stress < 0.2: acceptable representation
  • stress > 0.3: unsatisfactory representation
1
model_nmds<-metaMDS(iris[,-5],distance="bray")
1
2
3
4
5
6
7
8
9
## Run 0 stress 0.03775523
## Run 1 stress 0.03775544
## ... Procrustes: rmse 3.313225e-05  max resid 0.000108817
## ... Similar to previous best
## Run 2 stress 0.03775554
## ... Procrustes: rmse 4.837426e-05  max resid 0.0001586252
## ... Similar to previous best
......
## *** Solution reached
1
print(model_nmds$stress)
1
## [1] 0.03775523
1
head(model_nmds$points)
1
2
3
4
5
6
7
##         MDS1        MDS2
## 1 -0.3306354  0.04482984
## 2 -0.3353751 -0.03146390
## 3 -0.3707664 -0.01345794
## 4 -0.3577813 -0.03315096
## 5 -0.3441011  0.04848315
## 6 -0.2648452  0.10403667
1
2
3
4
5
6
7
8
9
10
ggplot()+
  geom_point(aes(x=model_nmds$points[,1],y=model_nmds$points[,2],color=iris[,5]))+
  labs(x="NMDS1",y="NMDS2")+
  annotate(geom='text',x=median(model_nmds$points[,2]),
           y=max(model_nmds$points[,2]),
           label=paste("stress=",round(model_nmds$stress,3),sep=""))+
  stat_ellipse(type="norm",level= 0.9,
                  aes(x=model_nmds$points[,1],
                      y=model_nmds$points[,2],color=iris[,5]))+
  theme_bw()

在这里插入图片描述

约束性排序

  • DCA

  • RDA

RDA

1
2
3
4
5
6
7
library(vegan)
data(dune)
data(dune.env)
model_rda<-rda(dune~.,data=dune.env,scale=T)
explain<-round(summary(eigenvals(model_rda))[2,1:2]*100) #explain%
rda_scores<-scores(model_rda,display=c("wa","sp","bp","cn"),scaling=1)
rda_scores$sites
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
##           RDA1       RDA2
## 1   0.17012176  0.2048313
## 2   0.68187820  0.2013668
## 3   0.21124601  0.5171633
## 4   0.18100214  0.7088526
## 5   0.69515894 -0.1751914
## 6   0.69696718 -0.4193920
## 7   0.59278613 -0.1098875
## 8  -0.30730636  0.3066177
## 9  -0.01054738  0.5613862
## 10  0.79625359 -0.2566410
## 11  0.26659300 -0.3943883
## 12 -0.26793763  0.4836942
## 13 -0.32866543  0.6546796
## 14 -0.66940722 -0.1760738
## 15 -0.67360200 -0.1135603
## 16 -0.83383112  0.1572147
## 17 -0.03519031 -0.4439119
## 18  0.13726362 -0.4754222
## 19 -0.39800381 -0.9159053
## 20 -0.90477932 -0.3154327
1
rda_scores$species
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
##                 RDA1        RDA2
## Achimill  1.38853776 -0.46240331
## Agrostol -1.13099958  1.29004038
## Airaprae -0.43971307 -1.00612947
## Alopgeni -0.38786559  1.83152559
## Anthodor  0.70586460 -1.12519719
## Bellpere  1.10048587  0.20468799
## Bromhord  1.16709674  0.27991718
## Chenalbu -0.36345744  0.61594353
## Cirsarve  0.17045040  0.74300739
## Comapalu -0.88628201 -0.26489086
## Eleopalu -1.26177786 -0.04904603
## Elymrepe  0.76654345  1.11501096
## Empenigr -0.57537471 -0.72118181
## Hyporadi -0.37510919 -1.11407684
## Juncarti -0.86649439  0.26819947
## Juncbufo -0.19547681  1.23118128
## Lolipere  1.44104660  0.37715419
## Planlanc  1.28192072 -1.07778092
## Poaprat   1.41566983  0.75238004
## Poatriv   0.85067568  1.56875431
## Ranuflam -1.35268248 -0.21521734
## Rumeacet  0.91244945 -0.18679682
## Sagiproc -0.34871475  0.99052287
## Salirepe -0.69696904 -1.31597734
## Scorautu  0.52688790 -1.23513319
## Trifprat  0.90176681 -0.55673270
## Trifrepe  0.61002630 -0.25632153
## Vicilath  0.54665670 -0.87435551
## Bracruta -0.07515391 -0.72475692
## Callcusp -1.17488480 -0.57552593
1
rda_scores$biplot
1
2
3
4
5
6
7
8
9
10
11
12
13
##                     RDA1        RDA2
## A1           -0.22839753  0.01363636
## Moisture.L   -0.41894583  0.05020896
## Moisture.Q   -0.08736320 -0.15683991
## Moisture.C   -0.07699492 -0.01995266
## ManagementHF  0.17618401  0.01728307
## ManagementNM -0.25402460 -0.24369745
## ManagementSF -0.08668791  0.27227133
## Use.L        -0.07336037  0.10751561
## Use.Q        -0.06268156 -0.20902587
## Manure.L      0.13565541  0.28001645
## Manure.Q     -0.20728884 -0.08341861
## Manure.C      0.23839491  0.05732039
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
rda_scores_biplot<-rda_scores$biplot%>%as.data.frame()%>%rownames_to_column()%>%as_tibble()
ggplot()+
  geom_point(data=as.data.frame(rda_scores$sites),
             aes(x=RDA1,y=RDA2),color="steelblue",size=2)+
  stat_ellipse(data=as.data.frame(rda_scores$sites),
               aes(x=RDA1,y=RDA2),type="norm",level= 0.9)+
  geom_point(data=as.data.frame(rda_scores$species),
             aes(x=RDA1,y=RDA2),color="gray",size=1)+
  geom_segment(data=as.data.frame(rda_scores_biplot),
               aes(x=0,y=0,xend=RDA1,yend=RDA2),
               size = 1,arrow = arrow(length = unit(0.2, "inches")),color="black")+
  ggrepel::geom_text_repel(
    data=rda_scores_biplot,
    aes(x=RDA1,y=RDA2,label=rowname),
    nudge_x = 0.4,fontface="bold", color="black")+
    theme_bw()

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-QVOiyl4M-1591709464906)(figure/unnamed-chunk-5-1.png)]

  • CCA
  • dbRDA