#!/usr/bin/env Rscript
# 空間基因表達(dá)分布圖譜分析
# 輸入:質(zhì)控后數(shù)據(jù)
# 輸出:空間分布圖譜、處理后數(shù)據(jù)
# 加載必要的包
suppressMessages({
? library(Seurat)
? library(ggplot2)
? library(dplyr)
? library(patchwork)
? library(viridis)
? library(cowplot)
? library(RColorBrewer)
? library(pheatmap)
})
# 讀取Snakemake參數(shù)
input_qc_data <- snakemake@input[["qc_data"]]
top_genes <- snakemake@params[["top_genes"]]
resolution <- snakemake@params[["resolution"]]
# 輸出文件路徑
output_spatial_plots <- snakemake@output[["spatial_plots"]]
output_spatial_data <- snakemake@output[["spatial_data"]]
# 創(chuàng)建輸出目錄
dir.create(dirname(output_spatial_plots), recursive = TRUE, showWarnings = FALSE)
# 讀取質(zhì)控?cái)?shù)據(jù)
cat("正在讀取質(zhì)控?cái)?shù)據(jù)...\n")
seurat_obj <- readRDS(input_qc_data)
# 數(shù)據(jù)標(biāo)準(zhǔn)化
cat("正在進(jìn)行數(shù)據(jù)標(biāo)準(zhǔn)化...\n")
seurat_obj <- NormalizeData(seurat_obj, normalization.method = "LogNormalize", scale.factor = 10000)
# 尋找高變基因
cat("正在尋找高變基因...\n")
seurat_obj <- FindVariableFeatures(seurat_obj, selection.method = "vst", nfeatures = 2000)
# 獲取高變基因
top_variable_genes <- head(VariableFeatures(seurat_obj), top_genes)
# 數(shù)據(jù)縮放
cat("正在進(jìn)行數(shù)據(jù)縮放...\n")
seurat_obj <- ScaleData(seurat_obj, features = VariableFeatures(seurat_obj))
# 主成分分析
cat("正在進(jìn)行主成分分析...\n")
seurat_obj <- RunPCA(seurat_obj, features = VariableFeatures(seurat_obj), verbose = FALSE)
# 聚類分析
cat("正在進(jìn)行聚類分析...\n")
seurat_obj <- FindNeighbors(seurat_obj, dims = 1:10)
seurat_obj <- FindClusters(seurat_obj, resolution = resolution)
# UMAP降維
cat("正在進(jìn)行UMAP降維...\n")
seurat_obj <- RunUMAP(seurat_obj, dims = 1:10)
# 創(chuàng)建圖表列表
spatial_plots <- list()
# 1. 高變基因圖
spatial_plots[["variable_features"]] <- VariableFeaturePlot(seurat_obj) +
? labs(title = "高變基因識(shí)別") +
? theme_minimal()
# 2. PCA圖
spatial_plots[["pca"]] <- DimPlot(seurat_obj, reduction = "pca", dims = c(1, 2)) +
? labs(title = "主成分分析") +
? theme_minimal()
# 3. PCA貢獻(xiàn)圖
spatial_plots[["pca_loadings"]] <- VizDimLoadings(seurat_obj, dims = 1:2, reduction = "pca") +
? labs(title = "主成分貢獻(xiàn)") +
? theme_minimal()
# 4. 肘部圖
spatial_plots[["elbow"]] <- ElbowPlot(seurat_obj) +
? labs(title = "主成分重要性") +
? theme_minimal()
# 5. UMAP聚類圖
spatial_plots[["umap_clusters"]] <- DimPlot(seurat_obj, reduction = "umap", label = TRUE) +
? labs(title = "UMAP聚類結(jié)果") +
? theme_minimal()
# 6. 聚類統(tǒng)計(jì)圖
cluster_counts <- table(Idents(seurat_obj))
cluster_df <- data.frame(
? Cluster = names(cluster_counts),
? Count = as.numeric(cluster_counts)
)
spatial_plots[["cluster_counts"]] <- ggplot(cluster_df, aes(x = Cluster, y = Count, fill = Cluster)) +
? geom_bar(stat = "identity") +
? labs(title = "聚類細(xì)胞數(shù)量分布", x = "聚類", y = "細(xì)胞數(shù)量") +
? theme_minimal() +
? theme(legend.position = "none")
# 7. 基因表達(dá)熱圖(top基因)
if (length(top_variable_genes) > 0) {
? # 計(jì)算每個(gè)聚類的平均表達(dá)
? Idents(seurat_obj) <- seurat_obj$seurat_clusters
? cluster_avg <- AverageExpression(seurat_obj, features = top_variable_genes, group.by = "seurat_clusters")
? cluster_avg_matrix <- as.matrix(cluster_avg$RNA)
? # 創(chuàng)建熱圖
? spatial_plots[["heatmap"]] <- pheatmap(
? ? cluster_avg_matrix,
? ? scale = "row",
? ? cluster_rows = TRUE,
? ? cluster_cols = TRUE,
? ? color = colorRampPalette(c("blue", "white", "red"))(100),
? ? main = paste("Top", top_genes, "基因表達(dá)熱圖"),
? ? fontsize = 8,
? ? silent = TRUE
? )
}
# 8. 特征基因表達(dá)圖(如果有空間信息)
# 由于輸入是h5文件,可能沒(méi)有空間坐標(biāo)信息,這里創(chuàng)建一個(gè)模擬的空間圖
if (!"spatial" %in% names(seurat_obj@images)) {
? cat("注意:沒(méi)有檢測(cè)到空間坐標(biāo)信息,將創(chuàng)建模擬空間圖...\n")
? # 基于UMAP坐標(biāo)創(chuàng)建模擬空間圖
? umap_coords <- Embeddings(seurat_obj, "umap")
? spatial_plots[["spatial_clusters"]] <- ggplot(data.frame(
? ? UMAP_1 = umap_coords[, 1],
? ? UMAP_2 = umap_coords[, 2],
? ? Cluster = Idents(seurat_obj)
? ), aes(x = UMAP_1, y = UMAP_2, color = Cluster)) +
? ? geom_point(size = 0.5) +
? ? labs(title = "空間聚類分布(基于UMAP)", x = "UMAP_1", y = "UMAP_2") +
? ? theme_minimal() +
? ? theme(panel.grid = element_blank())
}
# 9. 特定基因的空間表達(dá)圖
if (length(top_variable_genes) >= 4) {
? selected_genes <- top_variable_genes[1:4]
? for (i in 1:length(selected_genes)) {
? ? gene <- selected_genes[i]
? ? if (gene %in% rownames(seurat_obj)) {
? ? ? spatial_plots[[paste0("gene_", i)]] <- FeaturePlot(seurat_obj, features = gene, reduction = "umap") +
? ? ? ? labs(title = paste(gene, "基因表達(dá)")) +
? ? ? ? theme_minimal()
? ? }
? }
}
# 10. 質(zhì)控指標(biāo)在聚類中的分布
spatial_plots[["qc_violin"]] <- VlnPlot(seurat_obj,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?pt.size = 0.1, ncol = 3) +
? labs(title = "質(zhì)控指標(biāo)在聚類中的分布") +
? theme_minimal()
# 保存所有圖表
pdf(output_spatial_plots, width = 16, height = 12)
# 布局1:基本分析
print(wrap_plots(spatial_plots[["variable_features"]],
? ? ? ? ? ? ? ? ?spatial_plots[["pca"]],
? ? ? ? ? ? ? ? ?spatial_plots[["elbow"]],
? ? ? ? ? ? ? ? ?spatial_plots[["cluster_counts"]],
? ? ? ? ? ? ? ? ?ncol = 2))
# 布局2:聚類結(jié)果
print(wrap_plots(spatial_plots[["umap_clusters"]],
? ? ? ? ? ? ? ? ?spatial_plots[["spatial_clusters"]],
? ? ? ? ? ? ? ? ?ncol = 2))
# 布局3:基因表達(dá)熱圖
if (!is.null(spatial_plots[["heatmap"]])) {
? print(spatial_plots[["heatmap"]])
}
# 布局4:特定基因表達(dá)
if (length(top_variable_genes) >= 4) {
? gene_plots <- spatial_plots[grep("^gene_", names(spatial_plots))]
? if (length(gene_plots) > 0) {
? ? print(wrap_plots(gene_plots, ncol = 2))
? }
}
# 布局5:質(zhì)控指標(biāo)分布
print(spatial_plots[["qc_violin"]])
dev.off()
# 保存處理后的數(shù)據(jù)
saveRDS(seurat_obj, output_spatial_data)
# 輸出分析摘要
cat("空間分布圖譜分析完成!\n")
cat("聚類數(shù)量:", length(unique(Idents(seurat_obj))), "\n")
cat("高變基因數(shù)量:", length(VariableFeatures(seurat_obj)), "\n")
cat("分析結(jié)果已保存到:", output_spatial_plots, "\n")
cat("處理后數(shù)據(jù)已保存到:", output_spatial_data, "\n")
# 打印聚類統(tǒng)計(jì)信息
cat("\n聚類統(tǒng)計(jì)信息:\n")
cluster_stats <- table(Idents(seurat_obj))
for (i in 1:length(cluster_stats)) {
? cat("聚類", names(cluster_stats)[i], ":", cluster_stats[i], "個(gè)細(xì)胞\n")
}