merge.r

### Read in array data and normalise

## Setup
source("D:\\Projects\\_Appl_\\_R_\\MicroArray.r")
library(limma);
filter.ok=TRUE; MAKE.RAW.FILE=TRUE;

## Data specification
dataname='main';
#dataname='all'; filter.ok=FALSE;

## Set to FALSE to use saved raw data table
#MAKE.RAW.FILE=FALSE;

## Global settings
ArrayData.write.digits=7;

## Core paths
datadir=file.path(dataroot,"BC","Oslo3");
exportdir=file.path(datadir,"Export");
annotationdir=file.path(dataroot,"_Public_","Agilent","Annot");

## Filename pattern
file.pattern='^(\\d+)_(\\d+)_([^_]+_){4}(\\d)_(\\d)\\.txt$';
file.to.id="\\2-\\4-\\5";

## Data sets

## Raw data: Batch 1
# setup
rawdatadir=file.path(datadir,'RawData','NeoAva_rawfiles_60K_bolk1');
annotationfile=file.path(annotationdir,"028004_D_GeneList_20120411.txt");
arrayinfofile=file.path(datadir,'RawData','samples_bolk1.dat');
# read
annotation=read.delim(annotationfile,na.strings=c(""));
rownames(annotation)=annotation$ProbeID;
if (MAKE.RAW.FILE) {
  ## Read and pre-process array data (filter bad spots and combine replicate probes)
  targets=readTargets(arrayinfofile);
  if (filter.ok) targets=subset(targets,Inclusion.tag=="ok");
  targets$ArrayID=gsub(file.pattern,file.to.id,targets$Filename);
  targets=within(targets,Sample.ID<-sub('\\s*$','',Sample.ID));
  targets=within(targets,SampleArrayID<-paste(Sample.ID,':',ArrayID,sep=''));
  rawdata1=read.agilent(targets$Filename,path=rawdatadir,names=targets$SampleArrayID);
  rawdata1=preprocess.agilent(rawdata1);
  summary(rawdata1);
}


## Raw data: Batch 2
# setup
rawdatadir=file.path(datadir,'RawData','NeoAva_rawfiles_60K_bolk2');
annotationfile=file.path(annotationdir,"039494_D_GeneList_20120628.txt");
arrayinfofile=file.path(datadir,'RawData','samples_bolk2.dat');
# read
annotation=read.delim(annotationfile,na.strings=c(""));
rownames(annotation)=annotation$ProbeID;
if (MAKE.RAW.FILE) {
  ## Read and pre-process array data (filter bad spots and combine replicate probes)
  targets=readTargets(arrayinfofile);
  if (filter.ok) targets=subset(targets,Inclusion.tag=="ok");
  targets$ArrayID=gsub(file.pattern,file.to.id,targets$Filename);
  targets=within(targets,Sample.ID<-sub('\\s*$','',Sample.ID));
  targets=within(targets,SampleArrayID<-paste(Sample.ID,':',ArrayID,sep=''));
  rawdata2=read.agilent(targets$Filename,path=rawdatadir,names=targets$SampleArrayID);
  rawdata2=preprocess.agilent(rawdata2);
  summary(rawdata2);
}

## Merge data
arrays=NULL;
arrays.name=pastename(dataname,'probe');
Array.normalize.default.path=file.path(exportdir,'ByProbeID');
if (MAKE.RAW.FILE) {
  expr1=rawdata1$log2.probe;
  expr2=rawdata2$log2.probe;
  rows.shared=intersect(rownames(expr1),rownames(expr2));
  expr=cbind(expr1[rows.shared,],expr2[rows.shared,]);
  arrays=ArrayData.new('raw',expr,label='ProbeID*ArrayID=gDetrendedSignal');
  ArrayData.write(arrays,file.path(Array.normalize.default.path,pastename(arrays.name,'raw_log2.dat')));
}

## Normalise data in arrays, or from file if arrays is NULL
arrays=Array.normalize(arrays.name,arraydata=arrays);
ls(arrays);

## Aggregate array data to gene level
arrays.gene.name=pastename(dataname,'gene');
Array.normalize.default.path=file.path(exportdir,'ByGene');
arrays.gene=ArrayData.aggregate(arrays,map=annotation,to='GeneSymbol',filename=arrays.gene.name);
ls(arrays.gene);


Last modified March 06, 2014.