02.SP

#!/usr/bin/env Rscript

# 空間基因表達(dá)分布圖譜分析

# 輸入:質(zhì)控后數(shù)據(jù)

# 輸出:空間分布圖譜、處理后數(shù)據(jù)

# 加載必要的包

suppressMessages({

? library(Seurat)

? library(ggplot2)

? library(dplyr)

? library(patchwork)

? library(viridis)

? library(cowplot)

? library(RColorBrewer)

? library(pheatmap)

})

# 讀取Snakemake參數(shù)

input_qc_data <- snakemake@input[["qc_data"]]

top_genes <- snakemake@params[["top_genes"]]

resolution <- snakemake@params[["resolution"]]

# 輸出文件路徑

output_spatial_plots <- snakemake@output[["spatial_plots"]]

output_spatial_data <- snakemake@output[["spatial_data"]]

# 創(chuàng)建輸出目錄

dir.create(dirname(output_spatial_plots), recursive = TRUE, showWarnings = FALSE)

# 讀取質(zhì)控?cái)?shù)據(jù)

cat("正在讀取質(zhì)控?cái)?shù)據(jù)...\n")

seurat_obj <- readRDS(input_qc_data)

# 數(shù)據(jù)標(biāo)準(zhǔn)化

cat("正在進(jìn)行數(shù)據(jù)標(biāo)準(zhǔn)化...\n")

seurat_obj <- NormalizeData(seurat_obj, normalization.method = "LogNormalize", scale.factor = 10000)

# 尋找高變基因

cat("正在尋找高變基因...\n")

seurat_obj <- FindVariableFeatures(seurat_obj, selection.method = "vst", nfeatures = 2000)

# 獲取高變基因

top_variable_genes <- head(VariableFeatures(seurat_obj), top_genes)

# 數(shù)據(jù)縮放

cat("正在進(jìn)行數(shù)據(jù)縮放...\n")

seurat_obj <- ScaleData(seurat_obj, features = VariableFeatures(seurat_obj))

# 主成分分析

cat("正在進(jìn)行主成分分析...\n")

seurat_obj <- RunPCA(seurat_obj, features = VariableFeatures(seurat_obj), verbose = FALSE)

# 聚類分析

cat("正在進(jìn)行聚類分析...\n")

seurat_obj <- FindNeighbors(seurat_obj, dims = 1:10)

seurat_obj <- FindClusters(seurat_obj, resolution = resolution)

# UMAP降維

cat("正在進(jìn)行UMAP降維...\n")

seurat_obj <- RunUMAP(seurat_obj, dims = 1:10)

# 創(chuàng)建圖表列表

spatial_plots <- list()

# 1. 高變基因圖

spatial_plots[["variable_features"]] <- VariableFeaturePlot(seurat_obj) +

? labs(title = "高變基因識(shí)別") +

? theme_minimal()

# 2. PCA圖

spatial_plots[["pca"]] <- DimPlot(seurat_obj, reduction = "pca", dims = c(1, 2)) +

? labs(title = "主成分分析") +

? theme_minimal()

# 3. PCA貢獻(xiàn)圖

spatial_plots[["pca_loadings"]] <- VizDimLoadings(seurat_obj, dims = 1:2, reduction = "pca") +

? labs(title = "主成分貢獻(xiàn)") +

? theme_minimal()

# 4. 肘部圖

spatial_plots[["elbow"]] <- ElbowPlot(seurat_obj) +

? labs(title = "主成分重要性") +

? theme_minimal()

# 5. UMAP聚類圖

spatial_plots[["umap_clusters"]] <- DimPlot(seurat_obj, reduction = "umap", label = TRUE) +

? labs(title = "UMAP聚類結(jié)果") +

? theme_minimal()

# 6. 聚類統(tǒng)計(jì)圖

cluster_counts <- table(Idents(seurat_obj))

cluster_df <- data.frame(

? Cluster = names(cluster_counts),

? Count = as.numeric(cluster_counts)

)

spatial_plots[["cluster_counts"]] <- ggplot(cluster_df, aes(x = Cluster, y = Count, fill = Cluster)) +

? geom_bar(stat = "identity") +

? labs(title = "聚類細(xì)胞數(shù)量分布", x = "聚類", y = "細(xì)胞數(shù)量") +

? theme_minimal() +

? theme(legend.position = "none")

# 7. 基因表達(dá)熱圖(top基因)

if (length(top_variable_genes) > 0) {

? # 計(jì)算每個(gè)聚類的平均表達(dá)

? Idents(seurat_obj) <- seurat_obj$seurat_clusters

? cluster_avg <- AverageExpression(seurat_obj, features = top_variable_genes, group.by = "seurat_clusters")

? cluster_avg_matrix <- as.matrix(cluster_avg$RNA)


? # 創(chuàng)建熱圖

? spatial_plots[["heatmap"]] <- pheatmap(

? ? cluster_avg_matrix,

? ? scale = "row",

? ? cluster_rows = TRUE,

? ? cluster_cols = TRUE,

? ? color = colorRampPalette(c("blue", "white", "red"))(100),

? ? main = paste("Top", top_genes, "基因表達(dá)熱圖"),

? ? fontsize = 8,

? ? silent = TRUE

? )

}

# 8. 特征基因表達(dá)圖(如果有空間信息)

# 由于輸入是h5文件,可能沒(méi)有空間坐標(biāo)信息,這里創(chuàng)建一個(gè)模擬的空間圖

if (!"spatial" %in% names(seurat_obj@images)) {

? cat("注意:沒(méi)有檢測(cè)到空間坐標(biāo)信息,將創(chuàng)建模擬空間圖...\n")


? # 基于UMAP坐標(biāo)創(chuàng)建模擬空間圖

? umap_coords <- Embeddings(seurat_obj, "umap")

? spatial_plots[["spatial_clusters"]] <- ggplot(data.frame(

? ? UMAP_1 = umap_coords[, 1],

? ? UMAP_2 = umap_coords[, 2],

? ? Cluster = Idents(seurat_obj)

? ), aes(x = UMAP_1, y = UMAP_2, color = Cluster)) +

? ? geom_point(size = 0.5) +

? ? labs(title = "空間聚類分布(基于UMAP)", x = "UMAP_1", y = "UMAP_2") +

? ? theme_minimal() +

? ? theme(panel.grid = element_blank())

}

# 9. 特定基因的空間表達(dá)圖

if (length(top_variable_genes) >= 4) {

? selected_genes <- top_variable_genes[1:4]


? for (i in 1:length(selected_genes)) {

? ? gene <- selected_genes[i]

? ? if (gene %in% rownames(seurat_obj)) {

? ? ? spatial_plots[[paste0("gene_", i)]] <- FeaturePlot(seurat_obj, features = gene, reduction = "umap") +

? ? ? ? labs(title = paste(gene, "基因表達(dá)")) +

? ? ? ? theme_minimal()

? ? }

? }

}

# 10. 質(zhì)控指標(biāo)在聚類中的分布

spatial_plots[["qc_violin"]] <- VlnPlot(seurat_obj,

? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),

? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?pt.size = 0.1, ncol = 3) +

? labs(title = "質(zhì)控指標(biāo)在聚類中的分布") +

? theme_minimal()

# 保存所有圖表

pdf(output_spatial_plots, width = 16, height = 12)

# 布局1:基本分析

print(wrap_plots(spatial_plots[["variable_features"]],

? ? ? ? ? ? ? ? ?spatial_plots[["pca"]],

? ? ? ? ? ? ? ? ?spatial_plots[["elbow"]],

? ? ? ? ? ? ? ? ?spatial_plots[["cluster_counts"]],

? ? ? ? ? ? ? ? ?ncol = 2))

# 布局2:聚類結(jié)果

print(wrap_plots(spatial_plots[["umap_clusters"]],

? ? ? ? ? ? ? ? ?spatial_plots[["spatial_clusters"]],

? ? ? ? ? ? ? ? ?ncol = 2))

# 布局3:基因表達(dá)熱圖

if (!is.null(spatial_plots[["heatmap"]])) {

? print(spatial_plots[["heatmap"]])

}

# 布局4:特定基因表達(dá)

if (length(top_variable_genes) >= 4) {

? gene_plots <- spatial_plots[grep("^gene_", names(spatial_plots))]

? if (length(gene_plots) > 0) {

? ? print(wrap_plots(gene_plots, ncol = 2))

? }

}

# 布局5:質(zhì)控指標(biāo)分布

print(spatial_plots[["qc_violin"]])

dev.off()

# 保存處理后的數(shù)據(jù)

saveRDS(seurat_obj, output_spatial_data)

# 輸出分析摘要

cat("空間分布圖譜分析完成!\n")

cat("聚類數(shù)量:", length(unique(Idents(seurat_obj))), "\n")

cat("高變基因數(shù)量:", length(VariableFeatures(seurat_obj)), "\n")

cat("分析結(jié)果已保存到:", output_spatial_plots, "\n")

cat("處理后數(shù)據(jù)已保存到:", output_spatial_data, "\n")

# 打印聚類統(tǒng)計(jì)信息

cat("\n聚類統(tǒng)計(jì)信息:\n")

cluster_stats <- table(Idents(seurat_obj))

for (i in 1:length(cluster_stats)) {

? cat("聚類", names(cluster_stats)[i], ":", cluster_stats[i], "個(gè)細(xì)胞\n")

}

?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書(shū)系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容