AlignTest

From victor
Revision as of 20:12, 18 August 2014 by Layla (Talk | contribs)

Jump to: navigation, search

A simple program to test the basic alignment features.

#include <ScoringS2S.h>
#include <NWAlign.h>
#include <FSAlign.h>
#include <SWAlign.h>
#include <SubMatrix.h>
#include <AGPFunction.h>
#include <Sec.h>
#include <Alignment.h>
#include <AlignmentBase.h>
#include <SequenceData.h>
#include <SecSequenceData.h>
#include <GetArg.h>
#include <iostream>
using namespace Biopool;
/// Show command line options and help text.
void
sShowHelp(){
 cout << "\nALIGNMENT TEST"
 << "\nA simple program to test the basic alignment features.\n"
 << "\nOptions:"
 << "\n"
 << "\n   [--in <name>]     \t Name of input FASTA file (alternative to s1 & s2)"
 << "\n   [--s1 <seq>]      \t Target sequence (default = HEAGAWGHEE)"
 << "\n   [--s2 <seq>]      \t Template sequence (default = PAWHEAE)"
 << "\n   [--out <name>]    \t Name of output file (default = to screen)"
 << "\n"
 << "\n   [-m <name>]       \t Name of substitution matrix file (default = blosum62.dat)"
 << "\n   [-M <name>]       \t Name of structural substitution matrix file (default = secid.dat)"
 << "\n"
 << "\n   [-o <double>]     \t Open gap penalty (default = 12.00)"
 << "\n   [-e <double>]     \t Extension gap penalty (default = 3.00)"
 << "\n"
 << "\n   [--sec <name>]    \t Name of secondary structure FASTA file"
 << "\n   [--cSeq <double>] \t Coefficient for sequence alignment (default = 0.80)"
 << "\n   [--cStr <double>] \t Coefficient for structural alignment (default = 0.20)"
 << "\n"
 << "\n   [--verbose]       \t Verbose mode"
 << "\n" << endl;
}


int main(int argc, char **argv){
 // Default parameters
 string inputFileName, outputFileName, matrixFileName, matrixStrFileName, secFileName;
 string seq1, seq2, sec1, sec2;
 double openGapPenalty, extensionGapPenalty;
 double cSeq, cStr;
 bool verbose;
 // --------------------------------------------------
 // 0. Treat options
 // --------------------------------------------------
 if (getArg("h", argc, argv))	{
  sShowHelp();
  return 1;
 }
 getArg("-in", inputFileName, argc, argv, "!");
// --------------------------------------------------
string path = getenv("VICTOR_ROOT");
if (path.length() < 3)
 cout << "Warning: environment variable VICTOR_ROOT is not set." << endl;
string examplesPath;  
string dataPath = path + "data/";
if (inputFileName != "!"){
 inputFileName = examplesPath + inputFileName;
 ifstream inputFile(inputFileName.c_str());
 if (!inputFile)
  ERROR("Error opening input FASTA file.", exception);
 Alignment ali;
 ali.loadFasta(inputFile);
 if (ali.size() < 1)
  ERROR("Input FASTA file must contain two sequences.", exception);
 seq1 = Alignment::getPureSequence(ali.getTarget());
 seq2 = Alignment::getPureSequence(ali.getTemplate());
}
matrixFileName = dataPath + matrixFileName;
ifstream matrixFile(matrixFileName.c_str());
if (!matrixFile)
 ERROR("Error opening substitution matrix file.", exception);
matrixStrFileName = dataPath + matrixStrFileName;
ifstream matrixStrFile(matrixStrFileName.c_str());
if (!matrixStrFile)
 ERROR("Error opening structural substitution matrix file.", exception);
if (secFileName != "!"){
 secFileName = examplesPath + secFileName;
 ifstream secFile(secFileName.c_str());
 if (!secFile)
  ERROR("Error opening secondary structure FASTA file.", exception);
 Alignment aliSec;
 aliSec.loadFasta(secFile);
 if (aliSec.size() < 1)
  ERROR("Secondary structure FASTA file must contain two sequences.", exception);
 sec1 = Alignment::getPureSequence(aliSec.getTarget());
 sec2 = Alignment::getPureSequence(aliSec.getTemplate());
}


// --------------------------------------------------
// 2. Output test data
// --------------------------------------------------
if (verbose){
 fillLine(cout);
 if (secFileName != "!")
  cout << "\nTarget sequence:\n"                << seq1
       << "\n\nTarget secondary structure:\n"   << sec1
       << "\n\nTemplate sequence:\n"            << seq2
       << "\n\nTemplate secondary structure:\n" << sec2
       << "\n" << endl;
 else
  cout << "\nTarget sequence:\n"                << seq1
       << "\n\nTemplate sequence:\n"            << seq2
       << "\n" << endl;
}
// --------------------------------------------------
// 3. Calculate alignments
// --------------------------------------------------
SubMatrix sub(matrixFile);
SubMatrix subStr(matrixStrFile);
AGPFunction gf(openGapPenalty, extensionGapPenalty);
AlignmentData *ad;
Structure *str;
ScoringScheme *ss;
if (secFileName != "!") {
 ad = new SecSequenceData(4, seq1, seq2, sec1, sec2);
 str = new Sec(&subStr, ad, cStr);
 ss = new ScoringS2S(&sub, ad, str, cSeq);
}
else{
 ad = new SequenceData(2, seq1, seq2);
 ss = new ScoringS2S(&sub, ad, 0, 1.00);
}
NWAlign nwAlign(ad, &gf, ss);
SWAlign swAlign(ad, &gf, ss);
FSAlign fsAlign(ad, &gf, ss);


// --------------------------------------------------
// 4. Output alignments
// --------------------------------------------------
if (outputFileName != "!"){
 outputFileName = examplesPath + outputFileName;
 ofstream outputFile(outputFileName.c_str());
 if (!outputFile)
  ERROR("Error opening output file.", exception);
 cout << "\nSaving output to file: " << outputFileName
      << "\n" <<endl;
 outputFile << "\nGLOBAL" << endl;
 nwAlign.doMatch(outputFile);
 outputFile << "\nLOCAL:" << endl;
 swAlign.doMatch(outputFile);
 outputFile << "\nFS:" << endl;
 fsAlign.doMatch(outputFile);
}
else{
 cout << "\nGLOBAL" << endl;
 nwAlign.doMatch(cout);
 cout << "\nLOCAL" << endl;
 swAlign.doMatch(cout);
 cout << "\nFS" << endl;
 fsAlign.doMatch(cout);
}
return 0;

}