Paper:Tumor Evolution and Drug Response in Patient-Derived Organoid Models of Bladder Cancer
Figures:

PCA

ssGSEA
其實,是很簡單的處理:
差異分析后提取top1000進行PCA,要表達的意思是,tumor和TCGA的tumor能聚在一起;- 對差異基因進行GSEA的KEGG富集(還有GO富集,當時眼睛可能有點兒瞎,一直在想為啥extracellular的沒有富集到。。。
那不是KEGG啊,傻孩子,此處感謝jimmy);- 有了其實,必有但是;作者上傳的矩陣是辣個樣子的,
a.GEOquery拿不到矩陣;b.網(wǎng)頁下載的矩陣是DESeq2-normalized counts;c.英文不翻之DESeq2 doesn’t actually use normalized counts, rather it uses the raw counts and models the normalization inside the Generalized Linear Model (GLM). These normalized counts will be useful for downstream visualization of results, but cannot be used as input to DESeq2 or any other tools that peform differential expression analysis which use the negative binomial model.
純代碼:
Step1-download
###一些常規(guī)的設置
rm(list = ls())#清空環(huán)境變量
options(stringsAsFactors = F)##字符不作為因子讀入
#####數(shù)據(jù)下載
library(GEOquery)
h<-'GSE103990.Rdata'
####getGPL獲得平臺的注釋信息,但下載速度會慢很多
####而且注釋文件格式大多不如bioconductor包好用
if(!file.exists(h)){
gset<-getGEO('GSE103990',destdir='.',
AnnotGPL=F,
getGPL=F)
save(gset,file=h)
}
load('GSE103990.Rdata')
ex<- exprs(gset[[1]])
pd <- pData(gset[[1]])
#system('nohup wget ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE103nnn/GSE103990/suppl/GSE103990_Normalized_counts.txt.gz &')
counts_nor <- read.table('GSE103990_Normalized_counts.txt.gz')
save(counts_nor,pd,file='Nor.Rdata')
Step2-差異分析(數(shù)據(jù)不合適,但代碼木有問題)
rm(list = ls())
options(stringsAsFactors = F)
load('Nor.Rdata')
rownames(counts_nor)<- substr(rownames(counts_nor),1,15)
exprSet<- floor(counts_nor)
pd <- pd[match(colnames(exprSet),pd$description.1),]
group_list <- ifelse(grepl('org',pd$title),'org','tumor')
suppressMessages(library(DESeq2))
#### 第一步,構建DESeq2的DESeq對象
colData <- data.frame(row.names=colnames(exprSet),group_list=group_list)
dds <- DESeqDataSetFromMatrix(countData = exprSet,colData = colData,
design = ~ group_list)
#### 第二步,進行差異表達分析
dds2 <- DESeq(dds)
res <- results(dds2,contrast=c("group_list","org","tumor"))
resOrdered <- res[order(res$padj),]
DEG <- as.data.frame(resOrdered)
DESeq2_DEG = na.omit(DEG)
nrDEG=DESeq2_DEG[,c(2,6)]
colnames(nrDEG)=c('log2FoldChange','pvalue')
save(nrDEG, DESeq2_DEG, file = "DEG.Rdata")
Step3-PCA
rm(list=ls())
library(edgeR)
load('Nor.Rdata')
load('BLC.Rdata')
load('DEG.Rdata')
rownames(counts_nor)<- substr(rownames(counts_nor),1,15)
nor_BLCA <- edgeR::cpm(expr_BLC,log=T)
nor_paper <- edgeR::cpm(floor(counts_nor),log=T)
inter_gene<- intersect(rownames(nor_BLCA),rownames(nor_paper))
choose_gene <- rownames(nrDEG)[abs(nrDEG$log2FoldChange)>1.5&nrDEG$pvalue<0.01]
nrDEG <- nrDEG[order(abs(nrDEG$log2FoldChange),nrDEG$pvalue,decreasing = T),]
final_choose<- rownames(nrDEG)[rownames(nrDEG)%in%inter_gene][1:1000]
nr_paper <- nor_paper[final_choose,]
nr_BLC <- nor_BLCA[final_choose,]
nr_pca <- cbind(nr_paper,nr_BLC)
group_list <- ifelse(grepl('TCGA',colnames(nr_pca)),'TCGA',ifelse(grepl('org',pd$title),'org','tumor'))
library("FactoMineR")
library("factoextra")
####
df.pca <- PCA(t(nr_pca), graph = FALSE)
fviz_pca_ind(df.pca,
geom.ind = "point",
col.ind = group_list,
addEllipses = F,
legend.title = "Groups")
Step4-GSEA
如果對GSEA中的細節(jié)感興趣,看http://m.itdecent.cn/p/be8fe1318850
rm(list=ls())
load('DEG.Rdata')
library(org.Hs.eg.db)
library(clusterProfiler)
gene <- bitr(rownames(nrDEG), fromType = "ENSEMBL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db)
gene$logfc <- nrDEG$log2FoldChange[match(gene$ENSEMBL,rownames(nrDEG))]
geneList=nrDEG$log2FoldChange
names(geneList)=gene$ENTREZID
geneList=sort(geneList,decreasing = T)
head(geneList)
library(clusterProfiler)
kk_gse <- gseKEGG(geneList = geneList,
organism = 'hsa',
nPerm = 1000,
minGSSize = 10,
pvalueCutoff = 0.9,
verbose = FALSE)
go_gse <- gseGO(geneList = geneList,
OrgDb=org.Hs.eg.db,
nPerm = 1000,
minGSSize = 10,
pvalueCutoff = 0.9,
verbose = FALSE)
paper_choose <- c('Cell adhesion molecules (CAMs)',
'Cell cycle',
'ErbB signaling pathway',
'extracellular structure organization')
a <- list()
for (i in 1:3){
a[[i]]<- gseaplot2(kk_gse,
geneSetID = kk_gse@result$ID[kk_gse@result$Description==paper_choose[i]],
pvalue_table = T)
}
a
gseaplot2(go_gse,
geneSetID = go_gse@result$ID[go_gse@result$Description==paper_choose[4]],
pvalue_table = T)
Results:
- 寫在最后,其實PCA的目的是為了說明,腫瘤和TCGA的腫瘤可以聚在一起,而org聚在一起;
如果感興趣的話,就自己從上游走一遍RNA-seq流程,再走一遍我這里R的代碼;再如果,你重復出來了,一定一定一定要回復告訴我,我給你個小福利;
image.png

image.png

ErbB

Cell cycle

CAM

extracellular
課程分享
生信技能樹全球公益巡講
(https://mp.weixin.qq.com/s/E9ykuIbc-2Ja9HOY0bn_6g)
B站公益74小時生信工程師教學視頻合輯
(https://mp.weixin.qq.com/s/IyFK7l_WBAiUgqQi8O7Hxw)
招學徒:
(https://mp.weixin.qq.com/s/KgbilzXnFjbKKunuw7NVfw)
