1.密度圖 ggdensity
1.1 用法
# 密度圖 ggdensity
ggdensity(
# 數(shù)據(jù),數(shù)據(jù)框
data,
# 橫坐標(biāo),繪制的變量
x,
# 縱坐標(biāo),引用數(shù)據(jù)需""
y = "..density..",
# 邏輯值。默認(rèn)值為FALSE。僅當(dāng)y是包含要繪制的多個(gè)變量的向量時(shí)使用。
# 如果為T(mén)RUE,則通過(guò)組合y變量的圖來(lái)創(chuàng)建多面板圖
combine = FALSE,
# 邏輯或字符值。默認(rèn)值為FALSE。僅當(dāng)y是包含要繪制的多個(gè)變量的向量時(shí)使用。
# 如果為T(mén)RUE,則在同一繪圖區(qū)域中合并多個(gè)y變量。允許的值還包括"asis”(TRUE)和“ flip”。如果merge =“ flip”,則y變量用作x刻度標(biāo)簽,而x變量用作分組變量。
merge = FALSE,
# 密度線(xiàn)顏色和填充顏色
color = "black",
fill = NA,
# 用于按組著色或填充的調(diào)色板
palette = NULL,
# 更改點(diǎn)和輪廓的大小
size = NULL,
linetype = "solid",
# 透明度
alpha = 0.5,
# 標(biāo)題及標(biāo)簽
title = NULL,
xlab = NULL,
ylab = NULL,
# 指定用于將圖面劃分為多個(gè)面板的分組變量
facet.by = NULL,
panel.labs = NULL,
short.panel.labs = TRUE,
# 允許的值是“平均值”或“中位數(shù)”之一(分別用于添加平均值或中位數(shù)線(xiàn))
add = c("none", "mean", "median"),
add.params = list(linetype = "dashed"),
# 輯值值,如果為T(mén)RUE,則添加邊緣地毯。
rug = FALSE,
label = NULL,
# 字體標(biāo)簽,一個(gè)可以包含以下元素的組合的列表:
# 大?。ɡ纾?4),樣式(例如:“純”,“粗體”,“斜體”,“ bold.italic”)
# 顏色(例如:“紅色”)的標(biāo)簽。例如font.label = list(size = 14,face =“ bold”,顏色=“ red”)。
# 若要僅指定大小和樣式,請(qǐng)使用font.label = list(size = 14,face =“ plain”)
font.label = list(size = 11, color = "black"),
label.select = NULL,
# 是否使用ggrepel避免過(guò)度繪制文本標(biāo)簽的邏輯值
repel = FALSE,
label.rectangle = FALSE,
ggtheme = theme_pubr(),
...
)
1.2 自定義參數(shù)ggpar()
使用函數(shù)ggpar()可以輕松自定義繪圖, 閱讀?ggpar進(jìn)行更改:
- 主標(biāo)題和軸標(biāo)簽:main,xlab,ylab
- 軸區(qū)間:xlim,ylim(例如:ylim = c(0,30))
- 軸比例尺:xscale,yscale(例如:yscale ="log2")
- 調(diào)色板:palette="Dark2"或調(diào)色板= c("gray","blue","red")
- 圖例標(biāo)題,標(biāo)簽和位置:legend="right"
- 繪圖方向:orientation = c("vertical", "horizontal", "reverse")
1.3 實(shí)例
# Create some data format
set.seed(1234)
wdata = data.frame(
sex = factor(rep(c("F", "M"), each=200)),
weight = c(rnorm(200, 55), rnorm(200, 58)))
head(wdata, 4)#> sex weight
#> 1 F 53.79293
#> 2 F 55.27743
#> 3 F 56.08444
#> 4 F 52.65430
# Basic density plot
# Add mean line and marginal rug
p1 <- ggdensity(wdata, x = "weight", fill = "lightgray",
add = "mean", rug = TRUE)
p1
#> Warning: geom_vline(): Ignoring `mapping` because `xintercept` was provided.#> Warning: geom_vline(): Ignoring `data` because `xintercept` was provided.
# Change outline colors by groups ("sex")
# Use custom palette
p2 <- ggdensity(wdata, x = "weight",
add = "mean", rug = TRUE,
color = "sex", palette = c("#00AFBB", "#E7B800"))
p2
# Change outline and fill colors by groups ("sex")
# Use custom palette
p3 <- ggdensity(wdata, x = "weight",
add = "mean", rug = TRUE,
color = "sex", fill = "sex",
palette = c("#00AFBB", "#E7B800"))
p3
p4 <- ggdensity(wdata, x = "weight",
add = "mean", rug = TRUE,
color = "sex", fill = "sex",
palette = c("#00AFBB", "#E7B800"),
facet.by = "sex")
p4




2. 疊加正常密度圖 stat_overlay_normal_density
在視覺(jué)上檢查偏離正常程度的程度
2.1 用法
stat_overlay_normal_density(
# 由aes()或aes_()創(chuàng)建的美學(xué)映射集。如果指定且繼承.aes = TRUE(默認(rèn)值),它將與繪圖頂層的默認(rèn)映射結(jié)合。如果沒(méi)有繪圖映射,則必須提供映射。
mapping = NULL,
data = NULL,
geom = "line",
position = "identity",
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE,
...
)
2.2 舉例
# Simpledensity plot
data("mtcars")
p1 <- ggdensity(mtcars, x = "mpg", fill = "red") +
scale_x_continuous(limits = c(-1, 50)) +
stat_overlay_normal_density(color = "red", linetype = "dashed")
p1
# Color by groups
data(iris)
p2 <- ggdensity(iris, "Sepal.Length", color = "Species") +
stat_overlay_normal_density(aes(color = Species), linetype = "dashed")
p2
# Facet
p3 <- ggdensity(iris, "Sepal.Length", facet.by = "Species") +
stat_overlay_normal_density(color = "red", linetype = "dashed")
p3



3.經(jīng)驗(yàn)累積密度函數(shù) Empirical cumulative density function
樣本分布函數(shù)(sample distribution function)亦稱(chēng)經(jīng)驗(yàn)分布函數(shù),統(tǒng)計(jì)學(xué)中的基本概念之一。樣本分布函數(shù)Fn(x)具有分布函數(shù)的性質(zhì),我們可以將其看成是以等概率1/n 取值X1,X2,…,Xn的離散型隨機(jī)變量的分布函數(shù),且該函數(shù)的圖形呈跳躍式一條臺(tái)階形折線(xiàn),如觀(guān)測(cè)值不重復(fù),則每一跳躍為1/n ,如有重復(fù),則按1/n的倍數(shù)跳躍上升。
3.1 用法
ggecdf(
data,
x,
combine = FALSE,
merge = FALSE,
color = "black",
palette = NULL,
size = NULL,
linetype = "solid",
title = NULL,
xlab = NULL,
ylab = NULL,
facet.by = NULL,
panel.labs = NULL,
short.panel.labs = TRUE,
ggtheme = theme_pubr(),
...
)
3.2 舉例
# Create some data format
set.seed(1234)
wdata = data.frame(
sex = factor(rep(c("F", "M"), each=200)),
weight = c(rnorm(200, 55), rnorm(200, 58)))
head(wdata, 4)#> sex weight
#> 1 F 53.79293
#> 2 F 55.27743
#> 3 F 56.08444
#> 4 F 52.65430
# Basic ECDF plot
p1 <- ggecdf(wdata, x = "weight")
p1
# Change colors and linetype by groups ("sex")
# Use custom palette
p2 <- ggecdf(wdata, x = "weight",
color = "sex", linetype = "sex",
palette = c("#00AFBB", "#E7B800"))
p2


4.直方圖 Histogram plot
4.1 用法
gghistogram(
# 數(shù)據(jù),數(shù)據(jù)框
data,
# 橫坐標(biāo),繪制的變量
x,
# 縱坐標(biāo),引用數(shù)據(jù)需" "
y = "..count..",
# 邏輯值。默認(rèn)值為FALSE。僅當(dāng)y是包含要繪制的多個(gè)變量的向量時(shí)使用。
# 如果為T(mén)RUE,則通過(guò)組合y變量的圖來(lái)創(chuàng)建多面板圖
combine = FALSE,
# 邏輯或字符值。默認(rèn)值為FALSE。僅當(dāng)y是包含要繪制的多個(gè)變量的向量時(shí)使用。
# 如果為T(mén)RUE,則在同一繪圖區(qū)域中合并多個(gè)y變量。允許的值還包括"asis”(TRUE)和“ flip”。如果merge =“ flip”,則y變量用作x刻度標(biāo)簽,而x變量用作分組變量。
merge = FALSE,
weight = NULL,
color = "black",
fill = NA,
# 著色板,用于按組著色或填充的調(diào)色板
palette = NULL,
size = NULL,
linetype = "solid",
alpha = 0.5,
# bin數(shù)默認(rèn)為30。
bins = NULL,
# 指定箱寬的數(shù)值。當(dāng)您有很強(qiáng)的密集點(diǎn)圖時(shí),請(qǐng)使用介于0和1之間的值。例如binwidth = 0.2。
binwidth = NULL,
title = NULL,
xlab = NULL,
ylab = NULL,
# 長(zhǎng)度為1或2的字符向量,指定用于將圖面劃分為多個(gè)面板的分組變量。應(yīng)該在數(shù)據(jù)中
facet.by = NULL,
# 用于修改構(gòu)面面板標(biāo)簽的一個(gè)或兩個(gè)字符向量的列表。例如,panel.labs = list(sex = c(“ Male”,“ Female”))指定“ sex”變量的標(biāo)簽。對(duì)于兩個(gè)分組變量,您可以使用例如panel.labs = list(sex = c(“ Male”,“ Female”),rx = c(“ Obs”,“ Lev”,“ Lev2”)))。
panel.labs = NULL,
short.panel.labs = TRUE,
add = c("none", "mean", "median"),
# 參數(shù)'add'的參數(shù)(顏色,大小,線(xiàn)型);例如:add.params = list(顏色=“紅色”)。
add.params = list(linetype = "dashed"),
rug = FALSE,
add_density = FALSE,
label = NULL,
font.label = list(size = 11, color = "black"),
label.select = NULL,
repel = FALSE,
label.rectangle = FALSE,
position = position_identity(),
ggtheme = theme_pubr(),
...
)
4.2 自定義參數(shù)ggpar()
使用函數(shù)ggpar()可以輕松自定義繪圖, 閱讀?ggpar進(jìn)行更改:
- 主標(biāo)題和軸標(biāo)簽:main,xlab,ylab
- 軸區(qū)間:xlim,ylim(例如:ylim = c(0,30))
- 軸比例尺:xscale,yscale(例如:yscale ="log2")
- 調(diào)色板:palette="Dark2"或調(diào)色板= c("gray","blue","red")
- 圖例標(biāo)題,標(biāo)簽和位置:legend="right"
- 繪圖方向:orientation = c("vertical", "horizontal", "reverse")
4.3 實(shí)例
# Create some data format
set.seed(1234)
wdata = data.frame(
sex = factor(rep(c("F", "M"), each=200)),
weight = c(rnorm(200, 55), rnorm(200, 58)))
head(wdata, 4)
#> sex weight
#> 1 F 53.79293
#> 2 F 55.27743
#> 3 F 56.08444
#> 4 F 52.65430
# Basic density plot
# Add mean line and marginal rug
p1 <- gghistogram(wdata, x = "weight", fill = "lightgray",
add = "mean", rug = TRUE)
p1
#> Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.#> Warning: geom_vline(): Ignoring `mapping` because `xintercept` was provided.#> Warning: geom_vline(): Ignoring `data` because `xintercept` was provided.
# Change outline colors by groups ("sex")
# Use custom color palette
p2 <- gghistogram(wdata, x = "weight",
add = "mean", rug = TRUE,
color = "sex", palette = c("#00AFBB", "#E7B800"))
p2
#> Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
# Change outline and fill colors by groups ("sex")
# Use custom color palette
p3 <- gghistogram(wdata, x = "weight",
add = "mean", rug = TRUE,
color = "sex", fill = "sex",
palette = c("#00AFBB", "#E7B800"))
p3
#> Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
# Combine histogram and density plots
p4 <- gghistogram(wdata, x = "weight",
add = "mean", rug = TRUE,
fill = "sex", palette = c("#00AFBB", "#E7B800"),
add_density = TRUE)
p4
#> Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
# Weighted histogram
p5 <- gghistogram(iris, x = "Sepal.Length", weight = "Petal.Length")
p5
#> Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.





5.分位數(shù)圖示法 Quantile Quantile Plot
統(tǒng)計(jì)學(xué)里Q-Q圖(Q代表分位數(shù))是一個(gè)概率圖,用圖形的方式比較兩個(gè)概率分布,把他們的兩個(gè)分位數(shù)放在一起比較。首先選好分位數(shù)間隔。圖上的點(diǎn)(x,y)反映出其中一個(gè)第二個(gè)分布(y坐標(biāo))的分位數(shù)和與之對(duì)應(yīng)的第一分布(x坐標(biāo))的相同分位數(shù)。因此,這條線(xiàn)是一條以分位數(shù)間隔為參數(shù)的曲線(xiàn)。如果兩個(gè)分布相似,則該Q-Q圖趨近于落在y=x線(xiàn)上。如果兩分布線(xiàn)性相關(guān),則點(diǎn)在Q-Q圖上趨近于落在一條直線(xiàn)上,但不一定在y=x線(xiàn)上。Q-Q圖可以用來(lái)可在分布的位置-尺度范疇上可視化的評(píng)估參數(shù)。
從定義中可以看出Q-Q圖主要用于檢驗(yàn)數(shù)據(jù)分布的相似性,如果要利用Q-Q圖來(lái)對(duì)數(shù)據(jù)進(jìn)行正態(tài)分布的檢驗(yàn),則可以令x軸為正態(tài)分布的分位數(shù),y軸為樣本分位數(shù),如果這兩者構(gòu)成的點(diǎn)分布在一條直線(xiàn)上,就證明樣本數(shù)據(jù)與正態(tài)分布存在線(xiàn)性相關(guān)性,即服從正態(tài)分布。
5.1 用法
ggqqplot(
data,
x,
combine = FALSE,
merge = FALSE,
color = "black",
palette = NULL,
size = NULL,
shape = NULL,
add = c("qqline", "none"),
add.params = list(linetype = "solid"),
conf.int = TRUE,
conf.int.level = 0.95,
title = NULL,
xlab = NULL,
ylab = NULL,
facet.by = NULL,
panel.labs = NULL,
short.panel.labs = TRUE,
ggtheme = theme_pubr(),
...
)
5.2 自定義參數(shù)ggpar()
使用函數(shù)ggpar()可以輕松自定義繪圖, 閱讀?ggpar進(jìn)行更改:
- 主標(biāo)題和軸標(biāo)簽:main,xlab,ylab
- 軸區(qū)間:xlim,ylim(例如:ylim = c(0,30))
- 軸比例尺:xscale,yscale(例如:yscale ="log2")
- 調(diào)色板:palette="Dark2"或調(diào)色板= c("gray","blue","red")
- 圖例標(biāo)題,標(biāo)簽和位置:legend="right"
- 繪圖方向:orientation = c("vertical", "horizontal", "reverse")
5.3 實(shí)例
# Create some data format
set.seed(1234)
wdata = data.frame(
sex = factor(rep(c("F", "M"), each=200)),
weight = c(rnorm(200, 55), rnorm(200, 58)))
head(wdata, 4)#> sex weight
#> 1 F 53.79293
#> 2 F 55.27743
#> 3 F 56.08444
#> 4 F 52.65430
# Basic QQ plot
p1 <- ggqqplot(wdata, x = "weight")
p1
# Change colors and shape by groups ("sex")
# Use custom palette
p2 <- ggqqplot(wdata, x = "weight",
color = "sex", palette = c("#00AFBB", "#E7B800"))
p2


Reference
https://rpkgs.datanovia.com/ggpubr/reference/index.html
http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/