大香蕉超碰,欧美精品久久久久,亚洲日本视频1区2区

新組裝的基因組并沒(méi)有上傳和生成RefSeq基因組信息。而且一系列需要or.db或者構(gòu)建or.db的步驟嘗試構(gòu)建數(shù)據(jù)庫(kù)都不成功，要么缺乏物種信息，要么缺乏gene symbol。因此，我們利用自己的數(shù)據(jù)在eggNOG-mapper上進(jìn)行功能注釋的信息，同樣可以進(jìn)行富集分析。參考Y叔clusterProfile的手冊(cè)，以及一系列網(wǎng)上搜索的結(jié)果：https://zhuanlan.zhihu.com/p/35510434；https://cloud.tencent.com/developer/article/1607669?from=14588；
http://m.itdecent.cn/p/d484003dced5；
https://www.omicsclass.com/article/1513

直接上整理出來(lái)的腳本吧：

##universal enrichment analysis
library(magrittr)
library(clusterProfiler)
library(stringr)
library(dplyr)
library(ggplot2)
library(enrichplot)

#下載安裝注釋數(shù)據(jù)庫(kù), 有參考基因組的，有注釋結(jié)果的make a database
#source("https://bioconductor.org/biocLite.R")
library("AnnotationHub")
BiocManager::install("biomaRt")
library("biomaRt")
hub <- AnnotationHub::AnnotationHub()  #snapshotDate(): 2020-04-27
query(hub, "Erisoma") #搜索有參考基因組的KEGG注釋信息
#非模式物種以及沒(méi)有參考基因組的物種，不適合用eggGO和eggKEGG

gene_list <- read.table("Sl.expand.gene.txt", header = F)
gene <- as.character(gene_list[,1])


##1、GO 富集分析
#先處理emapper的數(shù)據(jù)，#注意去除title的注釋符號(hào)“#”
egg <- read.table("Sl.out.emapper.annotations", sep="\t", header=T) 

#提取GO數(shù)據(jù)
gene_ids <- egg$query
eggnog_annoations_go <- str_split(egg$GOs, ",")
gene_to_go <- data.frame(gene = rep(gene_ids,
                         times = sapply(eggnog_annoations_go, length)),
                         term = unlist(eggnog_annoations_go))
gene2go <- filter(gene_to_go, term != "-")

#term2gene <- gene_to_go[,c(2,1)]
term2gene <- gene2go[,c(2,1)]
colnames(term2gene)[1] <- "ID"
df1 <- go2term(term2gene$ID) ##根據(jù)ID生成GO注釋信息
df2 <- go2ont(term2gene$ID) ##根據(jù)ID進(jìn)行分類：BP, CC, MF
colnames(df1)[1] <- "ID"
colnames(df2)[1] <- "ID"
df <- left_join(term2gene, df1, by = "ID")
df3 <- left_join(df, df2, by = "ID")

#df3_BP <- filter(df3, Ontology == "BP")
#df_three <- split(df3, with(df3, Ontology))
colnames(df3)[4] <- "Class"
gid2gene <- df3[, c("ID", "gene", "Class")]
gid2name <- df3[, c("ID", "Term", "Class")]
ego <- enricher(gene, TERM2GENE = gid2gene, TERM2NAME = gid2name, 
                pvalueCutoff = 0.05, qvalueCutoff = 0.2, minGSSize = 10)
dotplot(ego)
result <- as.data.frame(ego) #轉(zhuǎn)化為數(shù)據(jù)框方便看富集的結(jié)果

gid2gene <- split(gid2gene, with(gid2gene, Class)) #拆分成BP，MF，CC三個(gè)數(shù)據(jù)框
gid2name <- split(gid2name, with(gid2name, Class)) #拆分成BP，MF，CC三個(gè)數(shù)據(jù)框
#gid2gene <- df3_BP[, c("ID", "gene")]
#gid2name <- df3_BP[, c("ID", "Term")]

#如果只是想關(guān)注某一GO分類的富集情況可運(yùn)行如下：
ego_BP <- enricher(gene, TERM2GENE = gid2gene[['BP']][c(1,2)], 
                   TERM2NAME = gid2name[['BP']][c(1,2)], 
                pvalueCutoff = 0.05, qvalueCutoff = 0.2, minGSSize = 10)
dotplot(ego_BP, title = "BP")
ego_CC <- enricher(gene, TERM2GENE = gid2gene[['CC']][c(1,2)], 
                   TERM2NAME = gid2name[['CC']][c(1,2)], 
                   pvalueCutoff = 0.05, qvalueCutoff = 0.2, minGSSize = 10)
ego_MF <- enricher(gene, TERM2GENE = gid2gene[['MF']][c(1,2)], 
                   TERM2NAME = gid2name[['MF']][c(1,2)], 
                   pvalueCutoff = 0.05, qvalueCutoff = 0.2, minGSSize = 10)
#以下可以將三個(gè)分類都整合到一個(gè)圖內(nèi)
library(ggpubr)
BP <- dotplot(ego_BP, title = "BP")
CC <- dotplot(ego_CC, title = "CC")
MF <- dotplot(ego_MF, title = "MF")
ggarrange(BP, CC, MF, ncol = 1, nrow = 3, align = "hv")

#下面的數(shù)據(jù)是TBtools整理出來(lái)的
#dat <- read.table("out.emapper.annotations.GO.txt", header = F, sep = "\t")
#colnames(dat)[1] <- "gene"
#colnames(dat)[2] <- "Gid"
#term2gene <- dat[,c(2,1)]

#這里是先富集分析再給id注釋和分類,然后用ggplot2畫(huà)，因?yàn)闆](méi)法用dotplot畫(huà)
df<-enricher(gene=gene,
             pvalueCutoff = 0.05,
             pAdjustMethod = "BH",
             TERM2GENE = term2gene)

df <- as.data.frame(df) #一定要轉(zhuǎn)成數(shù)據(jù)框
df1 <- go2term(df$ID)  ##根據(jù)ID生成注釋信息
colnames(df1)[1] <- "ID"
ego <- left_join(df, df1, by = "ID")
df2 <- go2ont(df$ID)   ##根據(jù)ID進(jìn)行分類：BP, CC, MF
colnames(df2)[1] <- "ID"
ego1 <- left_join(ego, df2, by = "ID")
df3 <- ego1 %>% select(c("Term", "Ontology", "p.adjust", "Count"))
df3 <- na.omit(df3)
df_m <- df3[c(1:20),]    #提取前20個(gè)term
df_m <- df_m[order(df_m$Count, decreasing = F),]  #按照count排序
df_m$Term <- factor(df_m$Term, levels = df_m$Term)  #給排好序的term固定變量，作圖時(shí)就能按照這個(gè)順序
ggplot(df_m,aes(x=Term,y=Count))+ 
  geom_point(aes(size = Count, color=-log10(p.adjust))) +  
  facet_grid(Ontology~., scales = "free", space = "free") +
  coord_flip()+labs(x="")+
  theme_bw() + scale_color_steps(low = "blue", high = "red")
#color越紅，p值越小 
#接上面GO的分析，此處不需要count排序只要pvalue的柱形圖，等同于TBtools里的圖
df3 <- ego1 %>% select(c("Term", "Ontology", "pvalue"))
df3 <- na.omit(df3)
ggplot(df3[c(1:20),],aes(x=Term,y=-log10(pvalue)))+
  geom_bar(aes(fill=Ontology))+
  coord_flip()+labs(x="")+
  theme_bw()

接著提取KEGG注釋信息并富集分析。。。。

# 2. KEGG pathway
#提取KEGG數(shù)據(jù),要注意KO pathway與k number的區(qū)別，前者才可轉(zhuǎn)化為代謝功能
gene_ko <- egg %>%
  dplyr::select(GID = query, Ko = KEGG_ko)  #這里的KEGG_ko是K number
gene_ko[,2]<-gsub("ko:","",gene_ko[,2]) #替換去除ko:
gene_ko_list <- str_split(gene_ko$Ko, ",")
gene2ko <- data.frame(gene = rep(gene_ids,
                                 times = sapply(gene_ko_list, length)),
                      term = unlist(gene_ko_list))
gene2ko <- filter(gene2ko, term != "-")

gene_pathway <- egg %>%
  dplyr::select(GID = query, Pathway = KEGG_Pathway)  #這里的才是Pathway
gene_pathway_list <- str_split(gene_pathway$Pathway, ",")
gene2pathway <- data.frame(gene = rep(gene_ids,
                                      times = sapply(gene_pathway_list, length)),
                           term = unlist(gene_pathway_list))
gene2pathway <- filter(gene2pathway, term != "-")

term2gene <- gene2pathway[,c(2,1)]
colnames(term2gene)[1] <- "ID"
pathway2name <- ko2name(term2gene$ID) #根據(jù)pathway的id輸出注釋信息
pathway2name <- na.omit(pathway2name)
pathway2name <- unique.data.frame(pathway2name)
colnames(pathway2name)[1] <- "ID"
ko2gene <- term2gene[grep(pattern = "^ko", term2gene$ID),]
#這里的id有的是map開(kāi)頭的需要去除，因?yàn)閗o2name只能注釋ko開(kāi)頭的

kegg <- enricher(gene, TERM2GENE = ko2gene, TERM2NAME = pathway2name,
                 pvalueCutoff = 0.05, qvalueCutoff = 0.2, minGSSize = 10)
dotplot(kegg)
#browseKEGG(kegg, 'ko05034') #在pathway通路圖上標(biāo)記富集到的基因，會(huì)鏈接到KEGG官網(wǎng)

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九欧美,1769亚洲,黄色成人av

用clusterProfile進(jìn)行非模式物種且沒(méi)有參考基因組的GO和KEGG富集分析

用clusterProfile進(jìn)行非模式物種且沒(méi)有參考基因組的GO和KEGG富集分析

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九 欧美,1769亚洲,黄色成人av

用clusterProfile進(jìn)行非模式物種且沒(méi)有參考基因組的GO和KEGG富集分析

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九欧美,1769亚洲,黄色成人av