AlignTest

From victor
Revision as of 20:09, 18 August 2014 by Layla (Talk | contribs)

(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to: navigation, search

A simple program to test the basic alignment features.

#include <ScoringS2S.h>
#include <NWAlign.h>
#include <FSAlign.h>
#include <SWAlign.h>
#include <SubMatrix.h>
#include <AGPFunction.h>
#include <Sec.h>
#include <Alignment.h>
#include <AlignmentBase.h>
#include <SequenceData.h>
#include <SecSequenceData.h>
#include <GetArg.h>
#include <iostream>
using namespace Biopool;
/// Show command line options and help text.
void
sShowHelp(){

cout << "\nALIGNMENT TEST" << "\nA simple program to test the basic alignment features.\n" << "\nOptions:" << "\n" << "\n [--in <name>] \t Name of input FASTA file (alternative to s1 & s2)" << "\n [--s1 <seq>] \t Target sequence (default = HEAGAWGHEE)" << "\n [--s2 <seq>] \t Template sequence (default = PAWHEAE)" << "\n [--out <name>] \t Name of output file (default = to screen)" << "\n" << "\n [-m <name>] \t Name of substitution matrix file (default = blosum62.dat)" << "\n [-M <name>] \t Name of structural substitution matrix file (default = secid.dat)" << "\n" << "\n [-o <double>] \t Open gap penalty (default = 12.00)" << "\n [-e <double>] \t Extension gap penalty (default = 3.00)" << "\n" << "\n [--sec <name>] \t Name of secondary structure FASTA file" << "\n [--cSeq <double>] \t Coefficient for sequence alignment (default = 0.80)" << "\n [--cStr <double>] \t Coefficient for structural alignment (default = 0.20)" << "\n" << "\n [--verbose] \t Verbose mode" << "\n" << endl;

}


int main(int argc, char **argv){

// Default parameters string inputFileName, outputFileName, matrixFileName, matrixStrFileName, secFileName; string seq1, seq2, sec1, sec2; double openGapPenalty, extensionGapPenalty; double cSeq, cStr; bool verbose; // -------------------------------------------------- // 0. Treat options // --------------------------------------------------

if (getArg("h", argc, argv)) { sShowHelp(); return 1; }

getArg("-in", inputFileName, argc, argv, "!"); getArg("-s1", seq1, argc, argv, "HEAGAWGHEE"); getArg("-s2", seq2, argc, argv, "PAWHEAE"); getArg("-out", outputFileName, argc, argv, "!"); getArg("m", matrixFileName, argc, argv, "blosum62.dat"); getArg("M", matrixStrFileName, argc, argv, "secid.dat"); getArg("o", openGapPenalty, argc, argv, 12.00); getArg("e", extensionGapPenalty, argc, argv, 3.00); getArg("-sec", secFileName, argc, argv, "!"); getArg("-cSeq", cSeq, argc, argv, 0.80); getArg("-cStr", cStr, argc, argv, 0.20); verbose = getArg("-verbose", argc, argv);

// -------------------------------------------------- // 1. Load data // --------------------------------------------------

string path = getenv("VICTOR_ROOT"); if (path.length() < 3) cout << "Warning: environment variable VICTOR_ROOT is not set." << endl; string examplesPath; string dataPath = path + "data/"; if (inputFileName != "!"){ inputFileName = examplesPath + inputFileName; ifstream inputFile(inputFileName.c_str()); if (!inputFile) ERROR("Error opening input FASTA file.", exception); Alignment ali; ali.loadFasta(inputFile); if (ali.size() < 1) ERROR("Input FASTA file must contain two sequences.", exception); seq1 = Alignment::getPureSequence(ali.getTarget()); seq2 = Alignment::getPureSequence(ali.getTemplate()); } matrixFileName = dataPath + matrixFileName; ifstream matrixFile(matrixFileName.c_str()); if (!matrixFile) ERROR("Error opening substitution matrix file.", exception); matrixStrFileName = dataPath + matrixStrFileName; ifstream matrixStrFile(matrixStrFileName.c_str()); if (!matrixStrFile) ERROR("Error opening structural substitution matrix file.", exception); if (secFileName != "!"){ secFileName = examplesPath + secFileName; ifstream secFile(secFileName.c_str()); if (!secFile) ERROR("Error opening secondary structure FASTA file.", exception); Alignment aliSec; aliSec.loadFasta(secFile); if (aliSec.size() < 1) ERROR("Secondary structure FASTA file must contain two sequences.", exception); sec1 = Alignment::getPureSequence(aliSec.getTarget()); sec2 = Alignment::getPureSequence(aliSec.getTemplate()); }


// -------------------------------------------------- // 2. Output test data // --------------------------------------------------

if (verbose){ fillLine(cout); if (secFileName != "!") cout << "\nTarget sequence:\n" << seq1 << "\n\nTarget secondary structure:\n" << sec1 << "\n\nTemplate sequence:\n" << seq2 << "\n\nTemplate secondary structure:\n" << sec2 << "\n" << endl; else cout << "\nTarget sequence:\n" << seq1 << "\n\nTemplate sequence:\n" << seq2 << "\n" << endl; }

// -------------------------------------------------- // 3. Calculate alignments // --------------------------------------------------

SubMatrix sub(matrixFile); SubMatrix subStr(matrixStrFile); AGPFunction gf(openGapPenalty, extensionGapPenalty);

AlignmentData *ad; Structure *str; ScoringScheme *ss;

if (secFileName != "!") { ad = new SecSequenceData(4, seq1, seq2, sec1, sec2); str = new Sec(&subStr, ad, cStr); ss = new ScoringS2S(&sub, ad, str, cSeq); } else{ ad = new SequenceData(2, seq1, seq2); ss = new ScoringS2S(&sub, ad, 0, 1.00); }

NWAlign nwAlign(ad, &gf, ss); SWAlign swAlign(ad, &gf, ss); FSAlign fsAlign(ad, &gf, ss);


// -------------------------------------------------- // 4. Output alignments // --------------------------------------------------

if (outputFileName != "!"){ outputFileName = examplesPath + outputFileName; ofstream outputFile(outputFileName.c_str()); if (!outputFile) ERROR("Error opening output file.", exception); cout << "\nSaving output to file: " << outputFileName << "\n" <<endl;

outputFile << "\nGLOBAL" << endl; nwAlign.doMatch(outputFile);

outputFile << "\nLOCAL:" << endl; swAlign.doMatch(outputFile);

outputFile << "\nFS:" << endl; fsAlign.doMatch(outputFile);

} else{ cout << "\nGLOBAL" << endl; nwAlign.doMatch(cout);

cout << "\nLOCAL" << endl; swAlign.doMatch(cout);

cout << "\nFS" << endl; fsAlign.doMatch(cout);

}

return 0; }