个性化火山图来啦!小果带你绘制自己的无参转录组火山图
公众号后台回复“111”
领取本篇代码、基因集或示例数据等文件
文件编号:240430-1
需要租赁服务器的小伙伴可以扫码添加小果,此外小果还提供生信分析,思路设计,文献复现等,有需要的小伙伴欢迎来撩~
调用包及载入数据
library(ggrepel)
library(tidyverse)
library(ggplot2)
这个函数的确能读入数据,但是通过dim函数可以发现该文件具有31338行和35列,而35列并不是我们全部需要的,可以在载入数据的时候进行数据筛选
'ArFvsArP_deg_all.xls') dt <- read.delim(
View(dt)
#dim(dt)
dt <- read.delim('C:\Users\10395\Desktop\r画图教程\数据\1.9火山图\ArFvsArP_deg_all.xls') %>%
select(GeneID,ArP_readcount,ArF_readcount,log2FoldChange,pval) %>%
filter(ArP_readcount > 0 | ArF_readcount > 0) %>%
column_to_rownames(var = 'GeneID')
colnames(dt) <- c('arp','arf','logFC','Pval')
head(dt)
#添加catogrey行,并对每一个基因的状态进行分类
dt$catogrey <- if_else(dt$logFC > 2 & dt$Pval<0.001,'up',
if_else(dt$logFC < -2 & dt$Pval<0.001,'down','normal'))
head(dt)
获得catogrey的状态,以下是一样的效果
dt %>% group_by(catogrey) %>% summarise(freq = n())
$catogrey) table(dt
#添加name行,为加标签方便
dt$name <- rownames(dt)
########开始进行火山图绘制
# 简单火山图的绘制
p1 <- ggplot(data = dt,
aes(x = logFC,
y = -log10(Pval))) + # 设置x轴为logFC,y轴为-P.Value的对数值
geom_point(alpha = 0.4, size = 2.5,
aes(color = catogrey))
p1
#改变颜色
p2 <- ggplot(data = dt,
aes(x = logFC,
y = -log10(Pval))) + # 设置x轴为logFC,y轴为-P.Value的对数值
geom_point(alpha = 0.4, size = 2.5,
aes(color = catogrey)) + # 添加散点,根据change列着色
ylab("-log10(Pvalue)") + # 设置y轴标签
scale_color_manual(values = c("blue4", "grey", "red3"))
p2
#增加区域划分并改变主题
p3 <- ggplot(data = dt,
aes(x = logFC,
y = -log10(Pval))) + # 设置x轴为logFC,y轴为-P.Value的对数值
geom_point(alpha = 0.4, size = 2.5,
aes(color = catogrey)) + # 添加散点,根据change列着色
ylab("-log10(Pvalue)") + # 设置y轴标签
scale_color_manual(values = c("blue4", "grey", "red3")) + # 设置颜色映射,蓝色表示下调,灰色表示稳定,红色表示上调
geom_vline(xintercept = c(-2, 2), lty = 4, col = "black", lwd = 0.8) + # 添加垂直参考线,用于标记logFC阈值
geom_hline(yintercept = -log10(0.001), lty = 4, col = "black", lwd = 0.8) + # 添加水平参考线,用于标记-P.Value阈值
theme_bw() + theme(panel.grid=element_blank())
# 使用网格白底主题
p3
########
#对符合筛选条件的基因添加标签
#筛选上调中差异倍数最大的10个基因
up_list <- dt %>%
filter(catogrey == 'up') %>%
distinct(name,.keep_all = T) %>% #其实如果像是我这种没有具体的gene symbol的数据是不用这一步的
top_n(10,logFC)
head(up_list)
#筛选下调中差异倍数最大的10个基因
down_list <- dt %>%
filter(catogrey == 'down') %>%
distinct(name,.keep_all = T) %>%
top_n(10,-logFC)
head(down_list)
#对火山图添加标签
P4 <- p3 + geom_text_repel(data = up_list, aes(x = logFC, y = -log10(Pval), label = name)) + # 添加上调基因的标签
geom_text_repel(data = down_list, aes(x = logFC, y = -log10(Pval), label = name)) # 添加下调基因的标签
P4
#改变标签样式
p5 <- p3 +
geom_label_repel(data = up_list, aes(x = logFC, y = -log10(Pval), label = name)) + # 添加上调基因的标签
geom_label_repel(data = down_list, aes(x = logFC, y = -log10(Pval), label = name))
p5
#大家可以看到,在进行以上标签时,的确会报warnning,但是
#在对需要注释的点重新绘制点就不会出现这个错误
p6 <- p3 + # 基于普通火山图p
geom_point(data = up_list, # 上调数据集
aes(x = logFC, y = -log10(Pval)),
color = 'red3', size = 4.5, alpha = 0.2) + # 三点颜色应与p3相同
geom_label_repel(data = up_list, # 添加标签
aes(x = logFC, y = -log10(Pval), label = name),
seed = 233, # 随机数种子,有过介绍哦
size = 2.5, # 字体大小改变
color = 'black', # 标签的字体颜色
min.segment.length = 0, # 是否始终添加引线
force = 2, # 若标签重叠,则出现排斥
force_pull = 2, # 标签与数据点间的吸引力,可以自己改变试一下
box.padding = 0.4, # 标签周围空间
max.overlaps = Inf) + # 保持始终显示所有标签
geom_point(data = down_list,
aes(x = logFC, y = -log10(Pval)),
color = 'blue4', size = 4.5, alpha = 0.2) +
geom_label_repel(data = down_list,
aes(x = logFC, y = -log10(Pval), label = name),
seed = 233,
size = 2.5,
color = 'black',
min.segment.length = 0,
force = 2,
force_pull = 2,
box.padding = 0.4,
max.overlaps = Inf)
p6
小果还提供思路设计、定制生信分析、文献思路复现;有需要的小伙伴欢迎直接扫码咨询小果,竭诚为您的科研助力!
定制生信分析
服务器租赁
扫码咨询小果
往期回顾
01 |
02 |
03 |
04 |