process_arrays.r

### Read in array data and normalise

## Import R functions and packages
source("D:\\Projects\\_Appl_\\_R_\\MicroArray.r")
library(limma);

## Global settings
ArrayData.write.digits=7;

## Read feature extraction files? (Or read saved raw data matrix)
MAKE.RAW.FILE=TRUE;
#MAKE.RAW.FILE=FALSE;

## Core paths
# !!! set paths for data reading, writing, and annotation files !!!
datadir=file.path(dataroot,"_Appl_","R","MicroArrayTest");
rawdatadir=file.path(datadir,"GSE44666_RAW");
exportroot=file.path(datadir,"Export");
annotationdir=file.path(dataroot,"_Public_","Agilent","Annot");

## Data set names and paths
annotationfile=file.path(annotationdir,"039494_D_GeneList_20120628.txt");
sampleinfofile=file.path(datadir,"sample_arrays.dat");

## Data subset specificatons specification
SubsetSelection('main','Main',c('yes'));
#SubsetSelection('all','All');

## Filename pattern
file.pattern='(.+)_(\\d+)_([^_]+_){4}(\\d)_(\\d)(\\.txt)?\\s*$';
file.to.id="\\2-\\4-\\5";

## Read annotation file (better than annotation from Feature Extraction)
annotation=read.delim(annotationfile,na.strings=c(""));
rownames(annotation)=annotation$ProbeID;

## Read, pre-process and write raw data
arrays.name=pastename(dataname,'probe');
Array.normalize.default.path=ensure.path(exportdir,'ByProbeID');

if (MAKE.RAW.FILE) {
  ## Read and pre-process array data (filter bad spots and combine replicate probes)
  #-- Get list of files from data folder
  filename=dir(rawdatadir,pattern=".txt$");
  filelist=as.data.frame(filename);
  filelist$ArrayID=gsub(file.pattern,file.to.id,filelist$filename);
  #-- Read sample information
  samples=read.delim(sampleinfofile,header=TRUE);
  #-- Match samples with files, set SampleArrayID, and select array set
  targets=merge(filelist,samples,by='ArrayID');
  targets=within(targets,ShortArrayID<-sub('^2528004','',ArrayID));
  targets=within(targets,SampleArrayID<-paste(SampleID,':',ShortArrayID,sep=''));
  if (!is.null(taglist)) targets=subset(targets,Include %in% taglist);
  #-- Read feature extraction files
  rawdata=read.agilent(targets$filename,path=rawdatadir,names=targets$SampleArrayID);
  # Perform filtering and probe level aggregation
  rawdata=preprocess.agilent(rawdata);
  summary(rawdata);
  #-- Create ArrayData container, and save raw data matrix to file
  arrays=ArrayData.new('raw',rawdata$log2.probe,label='ProbeID*SampleArrayID=gDetrendedSignal');
  ArrayData.write(arrays,file.path(Array.normalize.default.path,pastename(arrays.name,'raw_log2.dat')));
  ##---
} else {
  ## Make Array.normalize read raw data from previously saved file
  arrays=NULL;
};

## Normalise data in arrays, or from file if arrays is NULL
arrays=Array.normalize(arrays.name,arraydata=arrays); # k=20 by default
ls(arrays);

## Aggregate array data to gene level and save to file
arrays.gene.name=pastename(dataname,'gene');
Array.normalize.default.path=ensure.path(exportdir,'ByGene');
arrays.gene=ArrayData.aggregate(arrays,map=annotation,to='GeneSymbol',filename=arrays.gene.name);
ls(arrays.gene);

Last modified March 06, 2014.