热点新闻
CUT&Tag 数据分析(3)
2023-12-19 03:05  浏览:2796  搜索引擎搜索“手机低淘网”
温馨提示:信息一旦丢失不一定找得到,请务必收藏信息以备急用!本站所有信息均是注册会员发布如遇到侵权请联系文章中的联系方式或客服删除!
联系我时,请说明是在手机低淘网看到的信息,谢谢。
展会发布 展会网站大全 报名观展合作 软文发布

8. 输出COUNT矩阵

前文得到了每个样本的peak,需要对peak的染色体位置、起始和终止位置、还有读取count进行统计,方便后续差异分析及基因组注释。

library("GenomicFeatures")#处理基因组数据 library(magrittr)#管道操作符 mPeak = GRanges()#存储峰值的基因组区域信息 #创建peak列表,合并每个样本的peaks projPath = "G:/linux/20231129cuttag/bam/bed1/bedgraph/" histL <- c("sample1","sample2","sample3") repL <- c("1","2") #迭代处理每个样本(hist 和 rep 的组合) #读取 SEACR 生成的峰值文件,并将它们合并到 mPeak 对象中。 for (hist in histL) { for (rep in repL) { peakRes = read.table(paste0(projPath,hist,"-",rep,".seacr_control.peaks.stringent.bed"), header = FALSE, fill = TRUE) mPeak = GRanges(seqnames = peakRes$V1,IRanges(start = peakRes$V2,end = peakRes$V3),strand = "*")%>% append(mPeak, .) } } #合并 mPeak 中重叠的峰值 masterPeak = reduce(mPeak) library(DESeq2) # 初始化 countMat 矩阵 projPath = "G:/linux/20231129cuttag/bam/" countMat <- matrix(NA, nrow = length(masterPeak), ncol = length(histL) * length(repL)) colnames(countMat) <- paste(rep(histL, each = length(repL)), rep(repL, length(histL)), sep = "_") # 获取重叠区域的计数 i <- 1 library(chromVAR)#分析染色体可变性 for (hist in histL) { for (rep in repL) { bamFile = paste0(projPath,hist,"-",rep,"_sortname.bam") fragment_counts <- getCounts(bamFile,masterPeak,paired = TRUE,by_rg =FALSE, format = "bam") countMat[,i] = counts(fragment_counts)[,1] i=i+1 } } # 从 masterPeak 中提取基因组区域信息作为行名 row_names <- paste0(seqnames(masterPeak), "_", start(masterPeak), "_", end(masterPeak)) rownames(countMat) <- row_names View(countMat) write.csv(countMat,"countMat.csv")

9. DESeq2差异分析

样本有生物学重复,且读入的count符合DESeq2输入要求,官网也用的这个。
多组DESeq2建议多组一起比较,我是进行的两两单独比较,后续再改。

library(DESeq2) #测序深度标准化和差异富集peaks检测 countMat <- read.csv("countMat.csv") View(countMat) row.names(countMat) <- countMat$X data <- countMat[,-1] View(data) selectR <- which(rowSums(data)>5) dataS <- data[selectR,] dim(dataS) dim(data) m <- data[,c(3,4,1,2)] View(m) # group settings colnames(m) group <- factor(c(rep("sample1", 2),rep("sample2",2))) names(group) <- colnames(m) group <- data.frame(ID = colnames(m), group = factor(c(rep("sample1", 2),rep("sample2",2)))) View(group) rownames(group) <- group$ID vs_ <- factor(group$group,levels = c("sample1","sample2")) design = model.matrix(~0 + vs_) rownames(design) <- group$ID DESeq2_design <- design View(design) #确保表达矩阵的列名与分组矩阵行名相一致 all(rownames(DESeq2_design)==colnames(m)) coldata <- data.frame(row.names = colnames(m), group) dds <- DESeqDataSetFromMatrix(countData = m, colData = as.data.frame(vs_), design = ~vs_) #DESeq2数据格式的构建 #dds <- dds[ rowSums(counts(dds)) > 1, ] #过滤一些low count的数据; DDS <- DESeq(dds)#DESeq进行标准化; resultsNames(DDS) res <- results(DDS) summary(res)#查看经过标准化矩阵的基本情况; # 获取标准化后的计数 normDDS <- counts(DDS,normalized = TRUE)#normalization with the respect to the sequencing depth colnames(normDDS) <- paste0(colnames(normDDS),"_norm") #获得差异结果 res <- as.data.frame(res) a <- cbind(m,normDDS,res) View(a) a$anno <- row.names(a) #基因注释 library(ChIPseeker) library("ChIPpeakAnno") library("GenomicFeatures") library("org.Hs.eg.db") ?makeTxDbFromGFF txdb <- makeTxDbFromGFF(file="F:/GENCODE_hisat2/gencode.v43.primary_assembly.annotation.gff3",format="gff3") peakAnno <- annotatePeak(masterPeak, tssRegion = c(-3000, 3000), TxDb = txdb, annoDb = "org.Hs.eg.db") write.csv(as.data.frame(peakAnno),"peakAnno.csv") data_anno <- read.csv("peakAnno.csv") View(data_anno) data_anno$anno<- paste0(data_anno$seqnames,"_",data_anno$start,"_",data_anno$end) names(data_anno) data <- data_anno[,c("SYMBOL","anno")] View(data) nnn <- merge(a,data,by="anno") View(nnn) write.csv(nnn,".csv")

得到差异分析结果并进行了基因注释,方便后续和转录组联合分析。




image.png

DESeq2: vignettes/DESeq2.Rmd (rdrr.io)

发布人:4bab****    IP:124.223.189***     举报/删稿
展会推荐
让朕来说2句
评论
收藏
点赞
转发