qc.r

###
### Quality control of microarray data
###

### Include MicroArray package
source("D:\\Projects\\_Appl_\\_R_\\MicroArray.r")
library(limma);
library(ggplot2);

### Set up paths and import data
datadir=file.path(dataroot,"MetAction","mRNA");
exportroot=file.path(datadir,"Export");
annotationdir=file.path(dataroot,"_Public_","Agilent","Annot");
rawdatadir=file.path(datadir,'RawData');

array.options=c('main_probe','Main','ByProbeID');
array=ArrayData.new(path=file.path(exportroot,array.options[2],array.options[3]));
array=ArrayData.import(array,pastename(array.options[1],'raw_log2.dat'),name='raw');
array=ArrayData.import(array,pastename(array.options[1],'qnorm_log2.dat'),name='qnorm');
array=ArrayData.import(array,pastename(array.options[1],'qnorm_missimp_log2.dat'),name='missimp');

samples=read.delim(file.path(rawdatadir,"samples.dat"),header=TRUE);
file.pattern='^(.*\\/)?(\\w*\\d+)_(\\d+)_([^_]+_){4}(\\d)_(\\d)(\\.txt)?\\s*$';
file.to.id="\\3-\\5-\\6";
samples$ArrayID=gsub(file.pattern,file.to.id,samples$Agilent.file);
samples$SampleID=gsub('^(\\w+)[ -](\\d{4})\\s*(\\w+\\d)(-\\d)?\\s*$','\\1-\\2\\3\\4',samples$Sample.ID);
id.pattern='^(\\w+)-(\\d+)(([NM])\\w*)(\\d)(.*)$';


# Set up functions
trans.unit=function(s) s/(1+s);

# Set up statistics
array.mean=apply(array$missimp,1,mean);
array.sd=apply(array$missimp,1,sd);
sd.devmean.weight=function(sd) 1/sd**2;
array.devmean=apply((array$missimp-array.mean)**2*sd.devmean.weight(array.sd),2,mean);
sd.max=max(array.sd);
col.sd=heat.colors(50)[50*trans.unit(array.sd)];

# Plots (same plot in different versions)
plot(array.mean,array.sd,xlab='Mean array',ylab='Standard deviation',pch='.',col=col.sd);
qplot(array.mean,array.sd,color=array.sd,size=I(0.2))+
  scale_colour_gradient(limits=c(0,sd.max),low='darkblue',high='lightgreen')+
  geom_point(alpha=0.03,colour='magenta')+
  geom_point(alpha=0.01,colour='red')+
  geom_point(alpha=0.003,colour='yellow');

###
### Let x be matrix to analyse at any stage
###

### Quality controls on raw data
x=ArrayData.get(array,name='raw');
x.samples=colnames(x);

# Select sample
no=1;
plot(array.mean,x[,no],xlab='Mean array',ylab=x.samples[no],pch='.',col=col.sd);
qplot(array.mean,x[,no],xlab='Mean array',ylab=colnames(x)[no],color=array.sd,size=I(0.2))+
  scale_colour_gradient(limits=c(0,sd.max),low='darkblue',high='lightgreen')+
  geom_point(alpha=0.03,colour='magenta')+
  geom_point(alpha=0.01,colour='red')+
  geom_point(alpha=0.003,colour='yellow');

### Quality controls on normalised data
x=ArrayData.get(array,name='missimp');
x.samples=colnames(x);
x.ProjectID=gsub(id.pattern,'\\1',x.samples);
x.SampleType=gsub(id.pattern,'\\4',x.samples);
x.Group=gsub(id.pattern,'\\1-\\4',x.samples);

x.pca=pca(x,nPcs=5);
x.pc=data.frame(x.pca@loadings);
qplot(PC1,PC2,data=x.pc,color=x.Group)
qplot(PC2,PC3,data=x.pc,color=x.Group)
qplot(array.devmean,x.pc$PC1,color=x.Group)
Last modified March 06, 2014.