sample dataset:下載Single-cell analysis of mouse cortex或者GEO數(shù)據(jù)庫(kù)下載GSE60361數(shù)據(jù)集
這一矩陣是以UMI作為表達(dá)量單位
#根據(jù)需要構(gòu)建目錄;設(shè)置工作目錄;
dir.create("SCENIC_MouseBrain"); setwd("SCENIC_MouseBrain")?
#加載GEOquery;下載GEO數(shù)據(jù)集
library(GEOquery)
geoFile <- getGEOSuppFiles("GSE60361", makeDirectory=FALSE)
gzFile <- grep("Expression", basename(rownames(geoFile)), value=TRUE)
txtFile <- gsub(".gz", "", gzFile)
gunzip(gzFile, destname=txtFile, remove=TRUE)
#加載R包data.table,讀取textFile數(shù)據(jù)
library(data.table)
geoData <- fread(txtFile, sep="\t")
geneNames <- unname(unlist(geoData[,1, with=FALSE]))
#換為矩陣
exprMatrix <- as.matrix(geoData[,-1, with=FALSE])
rm(geoData)
dim(exprMatrix)
#矩陣的行名為geoData的基因名
rownames(exprMatrix) <- geneNames
exprMatrix[1:5,1:4]
# 移除下載數(shù)據(jù)txtFile
file.remove(txtFile)
加載celltype的數(shù)據(jù):
cellLabels <- paste(file.path(system.file('examples', package='AUCell')), "mouseBrain_cellLabels.tsv", sep="/")
cellLabels <- read.table(cellLabels, row.names=1, header=TRUE, sep="\t")
cellLabels <- as.data.frame(cellLabels)
colnames(cellLabels) <- "CellType"
將上述的數(shù)據(jù)集merge成SCE object
#轉(zhuǎn)為稀疏矩陣
exprMatrix <- exprMatrix[unique(rownames(exprMatrix)),] #移除重復(fù)基因名
dim(exprMatrix)
#加載R包SingleCellExperiment
library(SingleCellExperiment)
#創(chuàng)建SCEobject
sceMouseBrain <- SingleCellExperiment(assays = list(counts = exprMatrix),
colData=data.frame(cellLabels[colnames(exprMatrix),, drop=FALSE]))
#列名為cellLabels(細(xì)胞類型celltype)
#保存為.rds格式到相應(yīng)目錄?
setwd("SCENIC_MouseBrain")
dir.create("data")
saveRDS(sceMouseBrain, file="data/sceMouseBrain.Rds")