2020-05-08 復現(xiàn)GSE137675 RNA-Seq結(jié)果(2):下游分析

  1. 將上游分析最后得到的counts.txt讀入到R中
rm(list = ls())  # 魔幻操作,一鍵清空~
options(stringsAsFactors = F)  # 關(guān)閉字符串轉(zhuǎn)因子選項
# 讀入featureCounts的表達矩陣
data <- read.table("counts.txt",sep = "\t",header = T,row.names = 1)
data[1:3,]

# 查看低/零表達基因情況
dim(data)
## [1] 60662    12
table(rowSums(data==0)==ncol(data))
## FALSE  TRUE 
## 40892 19770 
# 去除低表達和不表達的基因
exprSet <- data[apply(data,1,sum)!=0,]
dim(exprSet)
## [1] 40892    12

#對exprSet行名進行重命名(去除點號和后面的數(shù)字,去除重復的基因名)
tmp0 <- exprSet
tmp0$mean <- apply(tmp0,1,mean)
tmp0 <- tmp0[order(tmp0$mean,decreasing = T),]

str <- stringr::str_split(rownames(tmp0),"\\.",simplify=T)
tmp0$id <- str[,1]
tmp0 <- tmp0[!duplicated(tmp0$id),]
rownames(tmp0) <- tmp0$id

exprSet <- tmp0[,-(ncol(tmp0)-1)]

# 保存預處理后的表達矩陣
save(exprSet,file = "data/Step01-featureCounts2exprSet.Rdata")
  1. 讀入作者上傳的表達矩陣GSE137675_Reads_count_featureCounts.xlsx,
    并跟自己處理的結(jié)果做樣本相關(guān)性分析。
    代碼參考:https://mp.weixin.qq.com/s/b7UHpcicVu6yjj4aM_BGvw
rm(list = ls())  # 魔幻操作,一鍵清空~
options(stringsAsFactors = F)  # 關(guān)閉字符串轉(zhuǎn)因子選項
# 讀入作者上傳的表達矩陣
tmp <- read.csv("GSE137675_Reads_count_featureCounts.csv",row.names = 1)
dim(tmp)
## [1] 40892    12
tmp2 <- tmp[apply(tmp,1,sum)!=0,]
## [1] 35904    12
tmp2$id <- rownames(tmp2)

# 合并作者上傳的表達矩陣和自己走上游分析流程得到的表達矩陣
m <- merge(exprSet,tmp2,by='id')
rownames(m) <- m$id
m <- m[,-1]
dim(m)
## [1] 31523    24
pheatmap::pheatmap(cor(m))
pheatmap::pheatmap(cor(log2(m+1)))

cpmM <- log(edgeR::cpm(m+1))
pheatmap::pheatmap(cor(cpmM))

hg1 <- names(tail(sort(apply(cpmM[,1:12],1,mad)),500))
hg2 <- names(tail(sort(apply(cpmM[,13:24],1,mad)),500))
hg <- intersect(hg1,hg2)
hgM <- cpmM[hg,]
pheatmap::pheatmap(cor(hgM))
corrplot.png

對照表

CM2005.1.RNA-seq.rep1 SRR10139778
CM2005.1.RNA-seq.rep2 SRR10139779
CRMM1.RNA-seq.rep1 SRR10139780
CRMM1.RNA-seq.rep2 SRR10139781
OCM1.RNA-seq.rep1 SRR10139782
OCM1.RNA-seq.rep2 SRR10139783
OCM1a.RNA-seq.rep1 SRR10139784
OCM1a.RNA-seq.rep2 SRR10139785
OM431.RNA-seq.rep1 SRR10139786
OM431.RNA-seq.rep2 SRR10139787
PIG1.RNA-seq.rep1 SRR10139788
PIG1.RNA-seq.rep2 SRR10139789
  1. 使用R包DESeq2分析基因差異表達
rm(list = ls())  # 魔幻操作,一鍵清空~
options(stringsAsFactors = F)  # 關(guān)閉字符串轉(zhuǎn)因子選項

# 讀取基因表達矩陣信息
load(file = "data/Step01-featureCounts2exprSet.Rdata")

# 查看分組信息和表達矩陣數(shù)據(jù)
dim(exprSet)
## [1] 40884    13
colnames(exprSet)
##  [1] "SRR10139778" "SRR10139779" "SRR10139780" "SRR10139781" "SRR10139782"
##  [6] "SRR10139783" "SRR10139784" "SRR10139785" "SRR10139786" "SRR10139787"
## [11] "SRR10139788" "SRR10139789" "id" 
exprSet <- exprSet[,-ncol(exprSet)]
group_list <- c(rep("tumor",10),rep("normal",2))
names(group_list)<-colnames(exprSet)
group_list <- as.factor(group_list)

suppressMessages(library(DESeq2))
#### 第一步,構(gòu)建DESeq2的DESeq對象
colData <- data.frame(row.names=colnames(exprSet),group_list=group_list)
dds <- DESeqDataSetFromMatrix(countData = exprSet,colData = colData,
                              design = ~ group_list)
#### 第二步,進行差異表達分析
f  <- 'data/Step03-DESeq2_dds2.Rdata'
if(!file.exists(f)){
  dds2 <- DESeq(dds)
  # 保存差異表達分析結(jié)果
  save(dds2, file = "data/Step03-DESeq2_dds2.Rdata")
}
#### 第三步,提取差異分析結(jié)果
load(file = "data/Step03-DESeq2_dds2.Rdata")
# 提取差異分析結(jié)果,tumor組對normal組的差異分析結(jié)果
res <- results(dds2,contrast=c("group_list","tumor","normal"))
resOrdered <- res[order(res$padj),]
head(resOrdered)

DEG <- as.data.frame(resOrdered)
# 去除差異分析結(jié)果中包含NA值的行
DESeq2_DEG = na.omit(DEG)

# 取出包含logFC和P值的兩列
nrDEG=DESeq2_DEG[,c(2,6)]
colnames(nrDEG)=c('log2FoldChange','pvalue')  
save(nrDEG, DESeq2_DEG, file = "data/Step03-DESeq2_nrDEG.Rdata")

DEG <- nrDEG
logFC_cutoff <- 1.5
DEG$change = as.factor(ifelse(DEG$pvalue < 0.05 & abs(DEG$log2FoldChange) > logFC_cutoff,
                              ifelse(DEG$log2FoldChange > logFC_cutoff ,'UP','DOWN'),'STABLE'))
table(DEG$change)
##  DOWN STABLE     UP 
##  1555  20684   1964 
DOWN STABLE UP
1555 20684 1964

作者的差異基因分析結(jié)果:


DEG_res.png
最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務。

友情鏈接更多精彩內(nèi)容