散點圖常用于展示兩個變量之間的關系。下面將首先展示如何在R中繪制散點圖;使用ggpubr包中的函數(shù)來添加相關系數(shù)和顯著性水平;還將介紹如何進行分組著色以及如何在每個組周圍添加橢圓。此外,還將展示如何繪制氣泡圖,以及如何添加邊際圖(直方圖,密度圖或箱線圖)。

加載數(shù)據(jù)
library(ggpubr)
# Load data
data("mtcars")
df <- mtcars
# Convert cyl as a grouping variable
df$cyl <- as.factor(df$cyl)
# Inspect the data
head(df[, c("wt", "mpg", "cyl", "qsec")])
## wt mpg cyl qsec
## Mazda RX4 2.62 21.0 6 16.5
## Mazda RX4 Wag 2.88 21.0 6 17.0
## Datsun 710 2.32 22.8 4 18.6
## Hornet 4 Drive 3.21 21.4 6 19.4
## Hornet Sportabout 3.44 18.7 8 17.0
## Valiant 3.46 18.1 6 20.2
基本散點圖
ggscatter(df, x = "wt", y = "mpg",
add = "reg.line", # Add regression line
conf.int = TRUE, # Add confidence interval
add.params = list(color = "blue",
fill = "lightgray")
)+
stat_cor(method = "pearson", label.x = 3, label.y = 30) # Add correlation coefficient

可以通過shape參數(shù)來修改點的形狀:
ggscatter(df, x = "wt", y = "mpg",
shape = 18)
要查看其他的點形狀,可以輸入如下代碼:
show_point_shapes()

點分組著色
ggscatter(df, x = "wt", y = "mpg",
add = "reg.line", # Add regression line
conf.int = TRUE, # Add confidence interval
color = "cyl", palette = "jco", # Color by groups "cyl"
shape = "cyl" # Change point shape by groups "cyl"
)+
stat_cor(aes(color = cyl), label.x = 3) # Add correlation coefficient

#延伸回歸線-> fullrange = TRUE
#添加邊際地毯(marginal density)---> rug = TRUE
ggscatter(df, x = "wt", y = "mpg",
add = "reg.line", # Add regression line
color = "cyl", palette = "jco", # Color by groups "cyl"
shape = "cyl", # Change point shape by groups "cyl"
fullrange = TRUE, # Extending the regression line
rug = TRUE # Add marginal rug
)+
stat_cor(aes(color = cyl), label.x = 3) # Add correlation coefficient

添加分組橢圓
主要參數(shù):
- ellipse = TRUE: 在分組周圍添加橢圓
- ellipse.level: 以正常概率表示橢圓的大小,默認值為0.95。
- ellipse.type: 橢圓類型,可選值可以是‘convex’, ‘confidence’ 或ggplot2::stat_ellipse支持的類型,包括
c(“t”, “norm”, “euclid”), 默認值為“norm”
ggscatter(df, x = "wt", y = "mpg",
color = "cyl", palette = "jco",
shape = "cyl",
ellipse = TRUE)

#將橢圓類型更改為'convex'
ggscatter(df, x = "wt", y = "mpg",
color = "cyl", palette = "jco",
shape = "cyl",
ellipse = TRUE, ellipse.type = "convex")

#添加組均值和星星圖
ggscatter(df, x = "wt", y = "mpg",
color = "cyl", palette = "jco",
shape = "cyl",
ellipse = TRUE,
mean.point = TRUE,
star.plot = TRUE)

添加點標簽
主要參數(shù):
- label: 包含點標簽的列名稱。
- font.label: 一個列表,可以包含以下元素的組合: 點的大小(例如:14),樣式(例如:“plain”, “bold”, “italic”, “bold.italic”),顏色(例如:“red”)。例如,
font.label = list(size = 14, face = “bold”, color =“red”)。 - label.select: 字符向量,指定要顯示的一些標簽。
- repel = TRUE: 避免標簽重疊。
#使用行名作為點標簽
df$name <- rownames(df)
ggscatter(df, x = "wt", y = "mpg",
color = "cyl", palette = "jco",
label = "name", repel = TRUE)

# 指定要顯示的標簽
ggscatter(df, x = "wt", y = "mpg",
color = "cyl", palette = "jco",
label = "name", repel = TRUE,
label.select = c("Toyota Corolla", "Merc 280", "Duster 360"))

#根據(jù)一些標準顯示標簽
ggscatter(df, x = "wt", y = "mpg",
color = "cyl", palette = "jco",
label = "name", repel = TRUE,
label.select = list(criteria = "`x` > 4 & `y` < 15"))

氣泡圖
在氣泡圖中,點大小由連續(xù)變量(此處為“qsec”)控制,參數(shù)alpha用于控制顏色的透明度,取值在0到1之間。
ggscatter(df, x = "wt", y = "mpg",
color = "cyl", palette = "jco",
size = "qsec", alpha = 0.5)+
scale_size(range = c(0.5, 15)) # Adjust the range of points size

設置連續(xù)變量的顏色
下面將根據(jù)連續(xù)變量的值(此處為“mpg”)對點進行著色。默認情況下,將繪制藍色漸變顏色,可以使用函數(shù)gradient_color()修改。
# 連續(xù)變量的顏色
p <- ggscatter(df, x = "wt", y = "mpg",
color = "mpg")
p
# 修改漸變色
p + gradient_color(c("blue", "white", "red"))


添加邊際圖
ggExtra包中的函數(shù)ggMarginal()可用于向散點圖添加邊際直方圖,密度圖或箱線圖。
首先,安裝ggExtra包:
install.packages("ggExtra")
繪制散點圖:
# 添加邊際密度圖
library("ggExtra")
p <- ggscatter(iris, x = "Sepal.Length", y = "Sepal.Width",
color = "Species", palette = "jco",
size = 3, alpha = 0.6)
ggMarginal(p, type = "density")
# 更改邊際圖類型
ggMarginal(p, type = "boxplot")


ggExtra包的局限性之一是它無法處理散點圖和邊際圖中的多個分組,可以使用cowplot包來解決。
# Scatter plot colored by groups ("Species")
sp <- ggscatter(iris, x = "Sepal.Length", y = "Sepal.Width",
color = "Species", palette = "jco",
size = 3, alpha = 0.6)+
border()
# Marginal density plot of x (top panel) and y (right panel)
xplot <- ggdensity(iris, "Sepal.Length", fill = "Species",
palette = "jco")
yplot <- ggdensity(iris, "Sepal.Width", fill = "Species",
palette = "jco")+
rotate()
# Cleaning the plots
sp <- sp + rremove("legend")
yplot <- yplot + clean_theme() + rremove("legend")
xplot <- xplot + clean_theme() + rremove("legend")
# Arranging the plot using cowplot
library(cowplot)
plot_grid(xplot, NULL, sp, yplot, ncol = 2, align = "hv",
rel_widths = c(2, 1), rel_heights = c(1, 2))

添加邊際箱線圖:
# Scatter plot colored by groups ("Species")
sp <- ggscatter(iris, x = "Sepal.Length", y = "Sepal.Width",
color = "Species", palette = "jco",
size = 3, alpha = 0.6, ggtheme = theme_bw())
# Marginal boxplot of x (top panel) and y (right panel)
xplot <- ggboxplot(iris, x = "Species", y = "Sepal.Length",
color = "Species", fill = "Species", palette = "jco",
alpha = 0.5, ggtheme = theme_bw())+
rotate()
yplot <- ggboxplot(iris, x = "Species", y = "Sepal.Width",
color = "Species", fill = "Species", palette = "jco",
alpha = 0.5, ggtheme = theme_bw())
# Cleaning the plots
sp <- sp + rremove("legend")
yplot <- yplot + clean_theme() + rremove("legend")
xplot <- xplot + clean_theme() + rremove("legend")
# Arranging the plot using cowplot
library(cowplot)
plot_grid(xplot, NULL, sp, yplot, ncol = 2, align = "hv",
rel_widths = c(2, 1), rel_heights = c(1, 2))

但是,上面的圖美中不足的是在主圖和邊際密度圖之間存在多余的空隙,不夠美觀,有一種解決方案如下:
library(cowplot)
# 主圖
pmain <- ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, color = Species))+
geom_point()+
ggpubr::color_palette("jco")
# 沿x軸的邊際密度圖
xdens <- axis_canvas(pmain, axis = "x")+
geom_density(data = iris, aes(x = Sepal.Length, fill = Species),
alpha = 0.7, size = 0.2)+
ggpubr::fill_palette("jco")
# 沿y軸的邊際密度圖
# 如果想使用coord_flip(),需要設置coord_flip = TRUE
ydens <- axis_canvas(pmain, axis = "y", coord_flip = TRUE)+
geom_density(data = iris, aes(x = Sepal.Width, fill = Species),
alpha = 0.7, size = 0.2)+
coord_flip()+
ggpubr::fill_palette("jco")
p1 <- insert_xaxis_grob(pmain, xdens, grid::unit(.2, "null"), position = "top")
p2<- insert_yaxis_grob(p1, ydens, grid::unit(.2, "null"), position = "right")
ggdraw(p2)
