cluster

不同聚類方法比較

normmlization，去除一些批次效應(yīng)等外部因素。通過對(duì)表達(dá)矩陣的矩陣的聚類，對(duì)細(xì)胞群體進(jìn)行分類。

無監(jiān)督聚類：hierarchical clustering，k-means clustering，graph-based clustering。

library(SingleCellExperiment)
deng <- readRDS("D:/paopaoR/mass/deng-reads.rds")
deng

## class: SingleCellExperiment 
## dim: 22431 268 
## metadata(0):
## assays(2): counts logcounts
## rownames(22431): Hvcn1 Gbp7 ... Sox5 Alg11
## rowData names(10): feature_symbol is_feature_control ...
##   total_counts log10_total_counts
## colnames(268): 16cell 16cell.1 ... zy.2 zy.3
## colData names(30): cell_type2 cell_type1 ... pct_counts_ERCC
##   is_cell_control
## reducedDimNames(0):
## spikeNames(1): ERCC

table(deng$cell_type2)

## 
##     16cell      4cell      8cell early2cell earlyblast  late2cell 
##         50         14         37          8         43         10 
##  lateblast   mid2cell   midblast         zy 
##         30         12         60          4

table(deng$cell_type1)

## 
## 16cell  2cell  4cell  8cell  blast zygote 
##     50     22     14     37    133     12

library(scater)

## Warning: package 'scater' was built under R version 3.5.2

plotPCA(deng, colour_by = "cell_type2")

## Warning: 'add_ticks' is deprecated.
## Use '+ geom_rug(...)' instead.

image.png

plotPCA(deng, colour_by = "cell_type1")

## Warning: 'add_ticks' is deprecated.
## Use '+ geom_rug(...)' instead.

image.png

SC3

當(dāng)細(xì)胞數(shù)量不是很多的時(shí)候可以用sc3，共識(shí)聚類，可以一步分析到位，也可以根據(jù)需要分步分析。但當(dāng)細(xì)胞數(shù)量很多的時(shí)候速度會(huì)很慢，用seurat會(huì)比較方便。

library(SC3)
deng <- sc3_estimate_k(deng)
metadata(deng)$sc3$k_estimation

## [1] 6

deng <- sc3(deng, ks = 10, biology = TRUE)

## Setting SC3 parameters...

## Calculating distances between the cells...

## Performing transformations and calculating eigenvectors...

## Performing k-means clustering...

## Calculating consensus matrix...

## Calculating biology...

colnames(rowData(deng))

##  [1] "feature_symbol"          "is_feature_control"     
##  [3] "is_feature_control_ERCC" "mean_counts"            
##  [5] "log10_mean_counts"       "rank_counts"            
##  [7] "n_cells_counts"          "pct_dropout_counts"     
##  [9] "total_counts"            "log10_total_counts"     
## [11] "sc3_gene_filter"         "sc3_10_markers_clusts"  
## [13] "sc3_10_markers_padj"     "sc3_10_markers_auroc"   
## [15] "sc3_10_de_padj"

colnames(colData(deng))

##  [1] "cell_type2"                            
##  [2] "cell_type1"                            
##  [3] "total_features"                        
##  [4] "log10_total_features"                  
##  [5] "total_counts"                          
##  [6] "log10_total_counts"                    
##  [7] "pct_counts_top_50_features"            
##  [8] "pct_counts_top_100_features"           
##  [9] "pct_counts_top_200_features"           
## [10] "pct_counts_top_500_features"           
## [11] "total_features_endogenous"             
## [12] "log10_total_features_endogenous"       
## [13] "total_counts_endogenous"               
## [14] "log10_total_counts_endogenous"         
## [15] "pct_counts_endogenous"                 
## [16] "pct_counts_top_50_features_endogenous" 
## [17] "pct_counts_top_100_features_endogenous"
## [18] "pct_counts_top_200_features_endogenous"
## [19] "pct_counts_top_500_features_endogenous"
## [20] "total_features_feature_control"        
## [21] "log10_total_features_feature_control"  
## [22] "total_counts_feature_control"          
## [23] "log10_total_counts_feature_control"    
## [24] "pct_counts_feature_control"            
## [25] "total_features_ERCC"                   
## [26] "log10_total_features_ERCC"             
## [27] "total_counts_ERCC"                     
## [28] "log10_total_counts_ERCC"               
## [29] "pct_counts_ERCC"                       
## [30] "is_cell_control"                       
## [31] "sc3_10_clusters"                       
## [32] "sc3_10_log2_outlier_score"

sc3_plot_consensus(deng, k = 10, show_pdata = "cell_type2")

image.png

sc3_plot_silhouette(deng, k = 10)

image.png

sc3_plot_expression(deng, k = 10, show_pdata = "cell_type2")

image.png

sc3_plot_markers(deng, k = 10, show_pdata = "cell_type2")

image.png

plotPCA(deng, colour_by = "sc3_10_clusters")

## Warning: 'add_ticks' is deprecated.
## Use '+ geom_rug(...)' instead.

image.png

library(mclust)
adjustedRandIndex(colData(deng)$cell_type2, colData(deng)$sc3_10_clusters)

## [1] 0.6616899

pcaReduce

library(pcaReduce)
input <- logcounts(deng[rowData(deng)$sc3_gene_filter, ])
pca.ite <- PCAreduce(t(input), nbt = 1, q = 30, method = 'S')[[1]]#nbt-number of samples,q-number of dimensions to start with
colData(deng)$pcaReduce <- as.character(pca.ite[,32 - 10])#k-(2~q+1),k=10,(q+1)-10+1
plotPCA(deng, colour_by = "pcaReduce")

## Warning: 'add_ticks' is deprecated.
## Use '+ geom_rug(...)' instead.

image.png

adjustedRandIndex(colData(deng)$cell_type2, colData(deng)$pcaReduce)

## [1] 0.3504523

tsne+k-means

set.seed(1234)
deng <- runTSNE(deng,perplexity = 50)#change perplexity to have robust result
colData(deng)$tSNE_kmeans8 <- as.character(kmeans(deng@reducedDims$TSNE, centers = 8)$clust)
plotTSNE(deng,colour_by = "tSNE_kmeans8")

## Warning: 'add_ticks' is deprecated.
## Use '+ geom_rug(...)' instead.

image.png

colData(deng)$tSNE_kmeans10 <- as.character(kmeans(deng@reducedDims$TSNE, centers = 10)$clust)
plotTSNE(deng,colour_by = "tSNE_kmeans10")

## Warning: 'add_ticks' is deprecated.
## Use '+ geom_rug(...)' instead.

image.png

adjustedRandIndex(colData(deng)$cell_type2, colData(deng)$tSNE_kmeans10)

## [1] 0.3753199

deng <- runTSNE(deng,perplexity = 10)
colData(deng)$tSNE_kmeans10.1 <- as.character(kmeans(deng@reducedDims$TSNE, centers = 10)$clust)
adjustedRandIndex(colData(deng)$cell_type2, colData(deng)$tSNE_kmeans10.1)

## [1] 0.4531443

deng <- runTSNE(deng,perplexity = 5)
colData(deng)$tSNE_kmeans10.2 <- as.character(kmeans(deng@reducedDims$TSNE, centers = 10)$clust)
adjustedRandIndex(colData(deng)$cell_type2, colData(deng)$tSNE_kmeans10.2)

## [1] 0.48855

SINCERA

這個(gè)包感覺用的不是很多，感覺就是用了個(gè)zscore轉(zhuǎn)換，根據(jù)相關(guān)層次聚類，迭代找個(gè)k值。

dat <- apply(input, 1, function(y) scRNA.seq.funcs::z.transform.helper(y))# perform z-score transformation
dd <- as.dist((1 - cor(t(dat), method = "pearson"))/2)# hierarchical clustering
hc <- hclust(dd, method = "average")

num.singleton <- 0
kk <- 1
for (i in 2:dim(dat)[2]) {
    clusters <- cutree(hc, k = i)
    clustersizes <- as.data.frame(table(clusters))
    singleton.clusters <- which(clustersizes$Freq < 2)
    if (length(singleton.clusters) <= num.singleton) {
        kk <- i
    } else {
        break;
    }
}
cat(kk)

## 6

library(pheatmap)
pheatmap(
    t(dat),
    cluster_cols = hc,
    cutree_cols = kk,
    kmeans_k = 100,
    show_rownames = FALSE
)

image.png

adjustedRandIndex(colData(deng)$cell_type2,clusters)

## [1] 0.3827252

clusters1 <- cutree(hc, k = 10)
adjustedRandIndex(colData(deng)$cell_type2,clusters1)

## [1] 0.3748432

dynamicTreeCut

deng <- runPCA(deng)
my.dist <- dist(reducedDim(deng))
my.tree <- hclust(my.dist, method="ward.D2")
library(dynamicTreeCut)
my.clusters <- unname(cutreeDynamic(my.tree, distM=as.matrix(my.dist), 
                                      minClusterSize=1, verbose=0))#modify cutheight to change clusters
table(my.clusters)

## my.clusters
##  1  2  3  4  5  6  7  8 
## 55 52 52 34 26 22 14 13

deng$clusters <- factor(my.clusters)
plotTSNE(deng,colour_by="clusters")

## Warning: 'add_ticks' is deprecated.
## Use '+ geom_rug(...)' instead.

image.png

adjustedRandIndex(colData(deng)$cell_type2,my.clusters)

## [1] 0.5354439

SNN clip

基于graph理論，找共有鄰近點(diǎn)，現(xiàn)在的seurat也是基于graph，用knn，snn什么的，可以改變共享鄰近點(diǎn)的數(shù)目改變cluster結(jié)果。

library(igraph)
library(scran)
snn.gr <- buildSNNGraph(deng, use.dimred="PCA")
cluster.out <- igraph::cluster_walktrap(snn.gr)
my.clusters <- cluster.out$membership
table(my.clusters)

## my.clusters
##  1  2  3  4  5  6  7  8  9 
## 53 36 13 22 40 33 36 21 14

deng$cluster <- factor(my.clusters)
plotTSNE(deng, colour_by="cluster")

## Warning: 'add_ticks' is deprecated.
## Use '+ geom_rug(...)' instead.

image.png

set.seed(123)
reducedDim(deng, "force") <- igraph::layout_with_fr(snn.gr, niter=3000)
plotReducedDim(deng, colour_by="cluster", use_dimred="force")

## Warning: 'add_ticks' is deprecated.
## Use '+ geom_rug(...)' instead.

image.png

adjustedRandIndex(colData(deng)$cell_type2,my.clusters)

## [1] 0.4505625

RCA

據(jù)說是現(xiàn)有的聚類效果最好的包，用分析的數(shù)據(jù)投影一個(gè)參考來進(jìn)行聚類，但是模式還不是很成熟，暫時(shí)還沒有小鼠的模式，而且前期流程跑下來的數(shù)據(jù)是fpkm，也許有些影響。并且要求名字格式“XXXX_genenames_YYYY"，前面是位置信息，后面是ens號(hào)，emmm不知道為啥搞成這樣，畢竟最后它還是提取中間的名字進(jìn)行分析。。。。

RCA, short for Reference Component Analysis, is an R package for robust clustering analysis of single cell RNA sequencing data (scRNAseq). It is developed by Prabhakar Group at Genome Institute of Singapore (GIS).

Features: -clustering analysis of scRNAseq data from Human samples -three modes : “GlobalPanel”: default option for clusterig general single cell data sets that include a wide spectrum of cell types. “ColonEpitheliumPanel”: suitable for analyzing human colon/intestine derived samples. “SelfProjection”: suitable for analyzing data sets from not-well-studied tissue types (still under optimization)

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九欧美,1769亚洲,黄色成人av

單細(xì)胞學(xué)習(xí)3

單細(xì)胞學(xué)習(xí)3

cluster

不同聚類方法比較

SC3

pcaReduce

tsne+k-means

SINCERA

dynamicTreeCut

SNN clip

RCA

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九 欧美,1769亚洲,黄色成人av

單細(xì)胞學(xué)習(xí)3

cluster

不同聚類方法比較

SC3

pcaReduce

tsne+k-means

SINCERA

dynamicTreeCut

SNN clip

RCA

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九欧美,1769亚洲,黄色成人av