setwd("./1.GEO_datasets/GSE37815")library(GEOquery) gset = getGEO('GSE37815',destdir = '.',getGPL = F, AnnotGPL = T) gset = gset[[1]] expr = exprs(gset) # 表達(dá)矩陣 pdata = pData(gset) # 樣本信息 gset@annotation # 查看芯片平臺(tái)
probe = read.table(file = 'GPL6102-11574.txt', sep = '\t', quote = '', comment.char = '#', # 過(guò)濾掉'GPL6102-11574.txt'文件中以‘#’開(kāi)頭的注釋信息 header = T, fill = T, # 如果文件中某行的數(shù)據(jù)少于其他行,則自動(dòng)添加空白域。 stringsAsFactors = F) # 字符串不改為因子ids = probe[probe$Symbol != '', c(1,13)] # 提取探針和geneID
library(dplyr) colnames(ids) expr = as.data.frame(expr) expr$ID = rownames(expr) #ids = ids[-grep('///',ids$Gene.Symbol),] # 一個(gè)探針對(duì)應(yīng)多個(gè)基因,去除 exprSet = inner_join(ids,expr,by = 'ID') # 探針沒(méi)有對(duì)應(yīng)基因,去除library(limma) exprSet= avereps(exprSet[,-c(1,2)], # 多個(gè)探針對(duì)應(yīng)一個(gè)基因,取均值 ID = exprSet$Symbol) exprSet = as.data.frame(exprSet)pdf(file = 'rowbox.pdf') p <- boxplot(exprSet,outline=FALSE,las=2,col = 'blue',xaxt = 'n',ann = F) title(main = list('Before normalization',cex = 2 ,font = 2), xlab = list('Sample list',cex = 1.5,font = 2), ylab = '',line = 0.7) mtext('Expression value',side = 2,padj = -3,font = 2,cex = 1.5) dev.off()
library(limma) normalized_expr = normalizeBetweenArrays(exprSet) # 分位數(shù)標(biāo)準(zhǔn)化 method默認(rèn)為quantile #rt=log2(rt) 有時(shí)還需log2變換pdf(file = 'normalized_box.pdf') p1 <- boxplot(normalized_expr,outline=FALSE,las=2,col = 'red',xaxt = 'n',ann = F) title(main = list('Normalization',cex = 2 ,font = 2), xlab = list('Sample list',cex = 1.5,font = 2), ylab = '',line = 0.7) mtext('Expression value',side = 2,padj = -3,font = 2,cex = 1.5) dev.off()
group_list = pdata$title #table(pdata$title) # 查看樣品信息 control = normalized_expr[,grep('Control',group_list)] tumor = normalized_expr[,grep('cancer',group_list)] exprSet1 = cbind(tumor,control) group_list = c(rep('tumor',ncol(tumor)), rep('normal',ncol(control)))
data = exprSet1
group_list = factor(group_list) design <- model.matrix( ~0 + group_list) colnames( design ) = levels(group_list) rownames( design ) = colnames(data) contrast.matrix <- makeContrasts( "tumor-normal", levels = design)
fit <- lmFit( data, design ) fit2 <- contrasts.fit( fit, contrast.matrix ) fit2 <- eBayes( fit2 ) allDiff=topTable(fit2,adjust='fdr',number=200000) write.table(allDiff,file="alldiff.xls",sep="\t",quote=F)
allLimma=allDiff allLimma=allLimma[order(allLimma$logFC),] allLimma=rbind(Gene=colnames(allLimma),allLimma) write.table(allLimma,file="GSE37815_limmaTab.txt",sep="\t",quote=F,col.names=F)
logFoldChange=1 adjustP=0.05 diffSig <- allDiff[with(allDiff, (abs(logFC)>logFoldChange & adj.P.Val < adjustP )), ] write.table(diffSig,file="diff.xls",sep="\t",quote=F) diffUp <- allDiff[with(allDiff, (logFC>logFoldChange & adj.P.Val < adjustP )), ] write.table(diffUp,file="up.xls",sep="\t",quote=F) diffDown <- allDiff[with(allDiff, (logFC<(-logFoldChange) & adj.P.Val < adjustP )), ] write.table(diffDown,file="down.xls",sep="\t",quote=F)hmExp=data[rownames(diffSig),] diffExp=rbind(id=colnames(hmExp),hmExp) write.table(diffExp,file="diffExp.txt",sep="\t",quote=F,col.names=F
xMax=max(-log10(allDiff$adj.P.Val)) yMax=max(abs(allDiff$logFC)) library(ggplot2) allDiff$change <- ifelse(allDiff$adj.P.Val < 0.05 & abs(allDiff$logFC) > 1, ifelse(allDiff$logFC > 1,'UP','DOWN'), 'NOT') table(allDiff$change) pdf(file = 'volcano.pdf') ggplot(data= allDiff, aes(x = -log10(adj.P.Val), y = logFC, color = change)) + geom_point(alpha=0.8, size = 1) + theme_bw(base_size = 15) + theme(plot.title=element_text(hjust=0.5), # 標(biāo)題居中 panel.grid.minor = element_blank(), panel.grid.major = element_blank()) + # 網(wǎng)格線設(shè)置為空白 geom_hline(yintercept= 0 ,linetype= 2 ) + scale_color_manual(name = "", values = c("red", "green", "black"), limits = c("UP", "DOWN", "NOT")) + xlim(0,xMax) + ylim(-yMax,yMax) + labs(title = 'Volcano', x = '-Log10(adj.P.Val)', y = 'LogFC') dev.off()
top100exp = exprSet1[rownames(diffSig)[1:100],] # 按P值大小排序,選擇前100個(gè) annotation_col = data.frame(group = group_list) rownames(annotation_col) = colnames(exprSet1) library(pheatmap) pdf(file = 'heatmap.pdf') pheatmap(top100exp,annotation_col = annotation_col, color = colorRampPalette(c("green", "black", "red"))(50), fontsize = 5) dev.off()
聯(lián)系客服