SequenceAligner
 
Loading...
Searching...
No Matches
main_fmindex.cpp File Reference
#include <immintrin.h>
#include <mpi.h>
#include <omp.h>
#include <algorithm>
#include <array>
#include <chrono>
#include <climits>
#include <cstring>
#include <filesystem>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <memory>
#include <numeric>
#include <sstream>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <vector>

Data Structures

class  FMIndex
 
struct  AffineDPScores
 
struct  Loc
 
struct  Seed
 
struct  ChainedSeed
 
struct  AlignmentResult
 
struct  LcsSegmentResult
 

Macros

#define EDNAFULL_MATRIX_DEFINED
 
#define EBLOSUM62_MATRIX_DEFINED
 
#define RESET   "\033[0m"
 
#define GREEN   "\033[32m"
 
#define RED   "\033[31m"
 
#define CYAN   "\033[36m"
 

Typedefs

using ScoreFn = int(*)(char, char)
 

Enumerations

enum  ScoreMode { MODE_DNA , MODE_PROTEIN }
 

Functions

std::vector< int > suffix_array_construction (const std::string &s)
 
int edna_score (char x, char y)
 
int blosum62_score (char x, char y)
 
int score (char x, char y, ScoreMode mode)
 
void showProgressBar (int progress, int total)
 
std::string getAccession (const std::string &header, ScoreMode mode)
 
std::string getGeneSymbol (const std::string &header, ScoreMode mode)
 
void processFasta (const std::string &filename, std::string &header_out, std::string &sequence_out)
 
void savePlainAlignment (const std::string &h1, const std::string &h2, const std::string &a1, const std::string &a2, std::ostream &os)
 
void saveLCS (const std::string &id, const std::string &lcs_str_val, std::ostream &os)
 
void printColoredAlignment (const std::string &seq1_aln, const std::string &seq2_aln, std::ostream &os=std::cout)
 
void writeRawDPMatrix (const std::vector< std::vector< int > > &dp_matrix, const std::string &filename)
 
void writeDPMatrix (const std::vector< std::vector< int > > &dp_matrix, const std::string &filename)
 
void writeRawCharMatrix (const std::vector< std::vector< char > > &char_matrix, const std::string &filename)
 
void writeCharMatrix (const std::vector< std::vector< char > > &char_matrix, const std::string &filename)
 
void initAffineDP (int n_len, std::vector< int > &prev_row_s, std::vector< int > &prev_row_e, std::vector< int > &prev_row_f, bool isGlobal)
 
void computeAffineDPRow (int i_row, const std::string &x_str, const std::string &y_str, std::vector< int > &prev_s_row, std::vector< int > &prev_e_row, std::vector< int > &prev_f_row, std::vector< int > &curr_s_row, std::vector< int > &curr_e_row, std::vector< int > &curr_f_row, std::vector< char > &curr_trace_s_row, ScoreFn score_fn_local, bool isGlobal)
 
std::vector< Seedgenerate_raw_seeds (const std::string &query_seq, const FMIndex &target_fm_index, int kmer_len, int mpi_rank_val=0, int mpi_num_procs_val=1)
 
ChainedSeed find_best_seed_chain (std::vector< Seed > &seeds_vec, int min_diag_gap_val=0, int max_diag_gap_val=50000, int max_offset_dev_val=50)
 
AlignmentResult perform_sw_in_window (const std::string &sub1, const std::string &sub2, ScoreFn sfn, double go, double ge, int q_off, int t_off)
 
AlignmentResult align_segment_globally (const std::string &seg1, const std::string &seg2, ScoreFn sfn, double go, double ge)
 
LcsSegmentResult compute_lcs_for_segment (const std::string &seg1, const std::string &seg2)
 
void globalalign (const std::string &x_orig, const std::string &y_orig, const std::string &header1, const std::string &header2, const std::string &outdir, ScoreMode mode_val, ScoreFn score_fn_val, const FMIndex *target_fm_idx)
 
void localalign (const std::string &x, const std::string &y, const std::string &h1, const std::string &h2, const std::string &odir, ScoreMode mval, ScoreFn sfn_val, const FMIndex *tfm_idx)
 
void lcs (const std::string &x_o, const std::string &y_o, const std::string &h1_lcs, const std::string &h2_lcs, const std::string &odir_lcs, ScoreMode mode_lcs, const FMIndex *tfm_idx_lcs)
 
int main (int argc, char **argv)
 

Variables

const int EDNAFULL_SIZE = 15
 
int EDNAFULL_matrix [EDNAFULL_SIZE][EDNAFULL_SIZE]
 
const int EBLOSUM62_SIZE = 24
 
int EBLOSUM62_matrix [EBLOSUM62_SIZE][EBLOSUM62_SIZE]
 
bool verbose = false
 
bool binary = false
 
bool txt = false
 
int rank_val
 
double GAP_OPEN = -5.0
 
double GAP_EXTEND = -1.0
 

Macro Definition Documentation

◆ CYAN

#define CYAN   "\033[36m"

◆ EBLOSUM62_MATRIX_DEFINED

#define EBLOSUM62_MATRIX_DEFINED

◆ EDNAFULL_MATRIX_DEFINED

#define EDNAFULL_MATRIX_DEFINED

◆ GREEN

#define GREEN   "\033[32m"

◆ RED

#define RED   "\033[31m"

◆ RESET

#define RESET   "\033[0m"

Typedef Documentation

◆ ScoreFn

using ScoreFn = int (*)(char, char)

Enumeration Type Documentation

◆ ScoreMode

enum ScoreMode
Enumerator
MODE_DNA 
MODE_PROTEIN 

Function Documentation

◆ align_segment_globally()

AlignmentResult align_segment_globally ( const std::string &  seg1,
const std::string &  seg2,
ScoreFn  sfn,
double  go,
double  ge 
)

◆ blosum62_score()

int blosum62_score ( char  x,
char  y 
)
inline

◆ compute_lcs_for_segment()

LcsSegmentResult compute_lcs_for_segment ( const std::string &  seg1,
const std::string &  seg2 
)

◆ computeAffineDPRow()

void computeAffineDPRow ( int  i_row,
const std::string &  x_str,
const std::string &  y_str,
std::vector< int > &  prev_s_row,
std::vector< int > &  prev_e_row,
std::vector< int > &  prev_f_row,
std::vector< int > &  curr_s_row,
std::vector< int > &  curr_e_row,
std::vector< int > &  curr_f_row,
std::vector< char > &  curr_trace_s_row,
ScoreFn  score_fn_local,
bool  isGlobal 
)

◆ edna_score()

int edna_score ( char  x,
char  y 
)
inline

◆ find_best_seed_chain()

ChainedSeed find_best_seed_chain ( std::vector< Seed > &  seeds_vec,
int  min_diag_gap_val = 0,
int  max_diag_gap_val = 50000,
int  max_offset_dev_val = 50 
)

◆ generate_raw_seeds()

std::vector< Seed > generate_raw_seeds ( const std::string &  query_seq,
const FMIndex target_fm_index,
int  kmer_len,
int  mpi_rank_val = 0,
int  mpi_num_procs_val = 1 
)

◆ getAccession()

std::string getAccession ( const std::string &  header,
ScoreMode  mode 
)

◆ getGeneSymbol()

std::string getGeneSymbol ( const std::string &  header,
ScoreMode  mode 
)

◆ globalalign()

void globalalign ( const std::string &  x_orig,
const std::string &  y_orig,
const std::string &  header1,
const std::string &  header2,
const std::string &  outdir,
ScoreMode  mode_val,
ScoreFn  score_fn_val,
const FMIndex target_fm_idx 
)

◆ initAffineDP()

void initAffineDP ( int  n_len,
std::vector< int > &  prev_row_s,
std::vector< int > &  prev_row_e,
std::vector< int > &  prev_row_f,
bool  isGlobal 
)

◆ lcs()

void lcs ( const std::string &  x_o,
const std::string &  y_o,
const std::string &  h1_lcs,
const std::string &  h2_lcs,
const std::string &  odir_lcs,
ScoreMode  mode_lcs,
const FMIndex tfm_idx_lcs 
)

◆ localalign()

void localalign ( const std::string &  x,
const std::string &  y,
const std::string &  h1,
const std::string &  h2,
const std::string &  odir,
ScoreMode  mval,
ScoreFn  sfn_val,
const FMIndex tfm_idx 
)

◆ main()

int main ( int  argc,
char **  argv 
)

◆ perform_sw_in_window()

AlignmentResult perform_sw_in_window ( const std::string &  sub1,
const std::string &  sub2,
ScoreFn  sfn,
double  go,
double  ge,
int  q_off,
int  t_off 
)

◆ printColoredAlignment()

void printColoredAlignment ( const std::string &  seq1_aln,
const std::string &  seq2_aln,
std::ostream &  os = std::cout 
)

◆ processFasta()

void processFasta ( const std::string &  filename,
std::string &  header_out,
std::string &  sequence_out 
)

◆ saveLCS()

void saveLCS ( const std::string &  id,
const std::string &  lcs_str_val,
std::ostream &  os 
)

◆ savePlainAlignment()

void savePlainAlignment ( const std::string &  h1,
const std::string &  h2,
const std::string &  a1,
const std::string &  a2,
std::ostream &  os 
)

◆ score()

int score ( char  x,
char  y,
ScoreMode  mode 
)
inline

◆ showProgressBar()

void showProgressBar ( int  progress,
int  total 
)

◆ suffix_array_construction()

std::vector< int > suffix_array_construction ( const std::string &  s)

◆ writeCharMatrix()

void writeCharMatrix ( const std::vector< std::vector< char > > &  char_matrix,
const std::string &  filename 
)

◆ writeDPMatrix()

void writeDPMatrix ( const std::vector< std::vector< int > > &  dp_matrix,
const std::string &  filename 
)

◆ writeRawCharMatrix()

void writeRawCharMatrix ( const std::vector< std::vector< char > > &  char_matrix,
const std::string &  filename 
)

◆ writeRawDPMatrix()

void writeRawDPMatrix ( const std::vector< std::vector< int > > &  dp_matrix,
const std::string &  filename 
)

Variable Documentation

◆ binary

bool binary = false

◆ EBLOSUM62_matrix

int EBLOSUM62_matrix[EBLOSUM62_SIZE][EBLOSUM62_SIZE]

◆ EBLOSUM62_SIZE

const int EBLOSUM62_SIZE = 24

◆ EDNAFULL_matrix

int EDNAFULL_matrix[EDNAFULL_SIZE][EDNAFULL_SIZE]
Initial value:
= {
{5, -4, -4, -4, 1, -4, 1, 1, -4, 1, -4, 1, 1, 1, -2},
{-4, 5, -4, -4, -4, 1, 1, -4, 1, -4, 1, 1, -4, 1, -2},
{-4, -4, 5, -4, 1, 1, -4, -4, 1, -4, 1, -4, 1, 1, -2},
{-4, -4, -4, 5, -4, 1, -4, 1, 1, -4, 1, 1, -4, 1, -2},
{1, -4, 1, -4, -1, -4, -2, -2, -2, -2, -3, -2, -2, -2, -1},
{-4, 1, 1, 1, -4, -1, -2, -2, -2, -2, -2, -3, -2, -2, -1},
{1, 1, -4, -4, -2, -2, -1, -4, -2, -4, -2, -2, -2, -2, -1},
{1, -4, -4, 1, -2, -2, -4, -1, -4, -2, -2, -2, -2, -2, -1},
{-4, 1, 1, 1, -2, -2, -2, -4, -1, -4, -2, -2, -2, -2, -1},
{1, -4, 1, -4, -2, -4, -2, -2, -4, -1, -2, -2, -2, -2, -1},
{-4, 1, 1, 1, -3, -2, -2, -2, -2, -2, -1, -2, -3, -3, -1},
{1, 1, -4, 1, -2, -3, -2, -2, -2, -2, -2, -1, -3, -3, -1},
{1, 1, 1, -4, -2, -2, -2, -2, -2, -2, -3, -3, -1, -3, -1},
{1, 1, 1, 1, -2, -2, -2, -2, -2, -2, -3, -3, -3, -1, -1},
{-2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}

◆ EDNAFULL_SIZE

const int EDNAFULL_SIZE = 15

◆ GAP_EXTEND

double GAP_EXTEND = -1.0

◆ GAP_OPEN

double GAP_OPEN = -5.0

◆ rank_val

int rank_val

◆ txt

bool txt = false

◆ verbose

bool verbose = false