AlignTest

From victor
Revision as of 19:10, 18 August 2014 by Layla (Talk | contribs)

Jump to: navigation, search

A simple program to test the basic alignment features.

#include <ScoringS2S.h>
#include <NWAlign.h>
#include <FSAlign.h>
#include <SWAlign.h>
#include <SubMatrix.h>
#include <AGPFunction.h>
#include <Sec.h>
#include <Alignment.h>
#include <AlignmentBase.h>
#include <SequenceData.h>
#include <SecSequenceData.h>
#include <GetArg.h>
#include <iostream>
using namespace Biopool;
/// Show command line options and help text.
void
sShowHelp(){
 cout << "\nALIGNMENT TEST"
 << "\nA simple program to test the basic alignment features.\n"
 << "\nOptions:"
 << "\n"
 << "\n   [--in <name>]     \t Name of input FASTA file (alternative to s1 & s2)"
 << "\n   [--s1 <seq>]      \t Target sequence (default = HEAGAWGHEE)"
 << "\n   [--s2 <seq>]      \t Template sequence (default = PAWHEAE)"
 << "\n   [--out <name>]    \t Name of output file (default = to screen)"
 << "\n"
 << "\n   [-m <name>]       \t Name of substitution matrix file (default = blosum62.dat)"
 << "\n   [-M <name>]       \t Name of structural substitution matrix file (default = secid.dat)"
 << "\n"
 << "\n   [-o <double>]     \t Open gap penalty (default = 12.00)"
 << "\n   [-e <double>]     \t Extension gap penalty (default = 3.00)"
 << "\n"
 << "\n   [--sec <name>]    \t Name of secondary structure FASTA file"
 << "\n   [--cSeq <double>] \t Coefficient for sequence alignment (default = 0.80)"
 << "\n   [--cStr <double>] \t Coefficient for structural alignment (default = 0.20)"
 << "\n"
 << "\n   [--verbose]       \t Verbose mode"
 << "\n" << endl;
}


int main(int argc, char **argv){
 // Default parameters
 string inputFileName, outputFileName, matrixFileName, matrixStrFileName, secFileName;
 string seq1, seq2, sec1, sec2;
 double openGapPenalty, extensionGapPenalty;
 double cSeq, cStr;
 bool verbose;

// -------------------------------------------------- // 0. Treat options // --------------------------------------------------

if (getArg("h", argc, argv)) { sShowHelp(); return 1; }

getArg("-in", inputFileName, argc, argv, "!"); getArg("-s1", seq1, argc, argv, "HEAGAWGHEE"); getArg("-s2", seq2, argc, argv, "PAWHEAE"); getArg("-out", outputFileName, argc, argv, "!"); getArg("m", matrixFileName, argc, argv, "blosum62.dat"); getArg("M", matrixStrFileName, argc, argv, "secid.dat"); getArg("o", openGapPenalty, argc, argv, 12.00); getArg("e", extensionGapPenalty, argc, argv, 3.00); getArg("-sec", secFileName, argc, argv, "!"); getArg("-cSeq", cSeq, argc, argv, 0.80); getArg("-cStr", cStr, argc, argv, 0.20); verbose = getArg("-verbose", argc, argv);

// -------------------------------------------------- // 1. Load data // --------------------------------------------------

string path = getenv("VICTOR_ROOT"); if (path.length() < 3) cout << "Warning: environment variable VICTOR_ROOT is not set." << endl; string examplesPath; string dataPath = path + "data/"; if (inputFileName != "!"){ inputFileName = examplesPath + inputFileName; ifstream inputFile(inputFileName.c_str()); if (!inputFile) ERROR("Error opening input FASTA file.", exception); Alignment ali; ali.loadFasta(inputFile); if (ali.size() < 1) ERROR("Input FASTA file must contain two sequences.", exception); seq1 = Alignment::getPureSequence(ali.getTarget()); seq2 = Alignment::getPureSequence(ali.getTemplate()); } matrixFileName = dataPath + matrixFileName; ifstream matrixFile(matrixFileName.c_str()); if (!matrixFile) ERROR("Error opening substitution matrix file.", exception); matrixStrFileName = dataPath + matrixStrFileName; ifstream matrixStrFile(matrixStrFileName.c_str()); if (!matrixStrFile) ERROR("Error opening structural substitution matrix file.", exception); if (secFileName != "!"){ secFileName = examplesPath + secFileName; ifstream secFile(secFileName.c_str()); if (!secFile) ERROR("Error opening secondary structure FASTA file.", exception); Alignment aliSec; aliSec.loadFasta(secFile); if (aliSec.size() < 1) ERROR("Secondary structure FASTA file must contain two sequences.", exception); sec1 = Alignment::getPureSequence(aliSec.getTarget()); sec2 = Alignment::getPureSequence(aliSec.getTemplate()); }


// -------------------------------------------------- // 2. Output test data // --------------------------------------------------

if (verbose){ fillLine(cout); if (secFileName != "!") cout << "\nTarget sequence:\n" << seq1 << "\n\nTarget secondary structure:\n" << sec1 << "\n\nTemplate sequence:\n" << seq2 << "\n\nTemplate secondary structure:\n" << sec2 << "\n" << endl; else cout << "\nTarget sequence:\n" << seq1 << "\n\nTemplate sequence:\n" << seq2 << "\n" << endl; }

// -------------------------------------------------- // 3. Calculate alignments // --------------------------------------------------

SubMatrix sub(matrixFile); SubMatrix subStr(matrixStrFile); AGPFunction gf(openGapPenalty, extensionGapPenalty);

AlignmentData *ad; Structure *str; ScoringScheme *ss;

if (secFileName != "!") { ad = new SecSequenceData(4, seq1, seq2, sec1, sec2); str = new Sec(&subStr, ad, cStr); ss = new ScoringS2S(&sub, ad, str, cSeq); } else{ ad = new SequenceData(2, seq1, seq2); ss = new ScoringS2S(&sub, ad, 0, 1.00); }

NWAlign nwAlign(ad, &gf, ss); SWAlign swAlign(ad, &gf, ss); FSAlign fsAlign(ad, &gf, ss);


// -------------------------------------------------- // 4. Output alignments // --------------------------------------------------

if (outputFileName != "!"){ outputFileName = examplesPath + outputFileName; ofstream outputFile(outputFileName.c_str()); if (!outputFile) ERROR("Error opening output file.", exception); cout << "\nSaving output to file: " << outputFileName << "\n" <<endl;

outputFile << "\nGLOBAL" << endl; nwAlign.doMatch(outputFile);

outputFile << "\nLOCAL:" << endl; swAlign.doMatch(outputFile);

outputFile << "\nFS:" << endl; fsAlign.doMatch(outputFile);

} else{ cout << "\nGLOBAL" << endl; nwAlign.doMatch(cout);

cout << "\nLOCAL" << endl; swAlign.doMatch(cout);

cout << "\nFS" << endl; fsAlign.doMatch(cout);

}

return 0; }