Bsoft 2.1.4
Bernard's software package
seq_util.cpp File Reference

Sequence utility functions. More...

#include "rwgencode.h"
#include "seq_analysis.h"
#include "seq_util.h"
#include "linked_list.h"
#include "utilities.h"
#include <map>

Functions

int seq_from_residues (Bmolgroup *molgroup)
 Obtain sequences from residues. More...
 
int seq_show (Bmolgroup *molgroup)
 Shows all molecular sequences. More...
 
int seq_mass (Bmolgroup *molgroup)
 Shows the masses of all molecular sequences. More...
 
vector< double > seq_elements (Bmolgroup *molgroup, Bstring &paramfile)
 Shows the elemental composition of all molecular sequences. More...
 
int seq_complement_all (Bmolgroup *molgroup)
 Complements all nucleotide sequences. More...
 
int seq_translate_all (Bmolgroup *molgroup, int frame, Bstring &gcfile)
 Translates all nucleotide sequences to protein sequences. More...
 
long seq_find_dna (Bmolgroup *molgroup, Bstring &seq)
 Finds a nucleotide sequence. More...
 
long seq_find_protein (Bmolgroup *molgroup, Bstring &seq)
 Finds an amino acid sequence. More...
 
Bstring seq_find_protein_in_dna (Bmolgroup *molgroup, Bstring &seq, int seqlenmin, int seqlenmax, int side1, int side2, double threshold, Bstring &gcfile)
 Finds the coding region for an amino acid sequence. More...
 
int getcode3 (char c, char *cod)
 Converts a one-letter amino acid designation to the three-letter equivalent. More...
 
char getcode1 (char *acode)
 Converts a three-letter amino acid designation to the one-letter equivalent. More...
 
int complement_sequence (Bstring &nucseq)
 Complements a nucleotide sequence in place. More...
 
char get_complement (char nuc)
 Get the Watson-Crick complement of a nucleotide base. More...
 
Bstring sequence_translate (Bstring &nucseq, long frame, Bstring &gencode)
 Translates a nucleotide sequence to a protein sequence. More...
 

Variables

int verbose
 
const map< char, string > res_code
 

Detailed Description

Sequence utility functions.

Author
Bernard Heymann
Date
Created: 20001029
Modified: 20220713

Function Documentation

◆ complement_sequence()

int complement_sequence ( Bstring nucseq)

Complements a nucleotide sequence in place.

Parameters
&nucseqnucleotide sequence to be translated.
Returns
int 0.

◆ get_complement()

char get_complement ( char  nuc)

Get the Watson-Crick complement of a nucleotide base.

Parameters
nucnucleotide.
Returns
char complementing nucleotide.

◆ getcode1()

char getcode1 ( char *  acode)

Converts a three-letter amino acid designation to the one-letter equivalent.

Parameters
*acodethe desired amino acid three-letter code
Returns
char the corresponding one-letter code
Search through a list of 3-1 mappings for the desired three-letter code.

◆ getcode3()

int getcode3 ( char  c,
char *  cod 
)

Converts a one-letter amino acid designation to the three-letter equivalent.

Parameters
cthe desired amino acid code letter
*codthe corresponding three-letter code
Returns
int 0.
Search through a list of 1-3 mappings for the desired letter.

◆ seq_complement_all()

int seq_complement_all ( Bmolgroup molgroup)

Complements all nucleotide sequences.

Parameters
*molgroupthe molecule group.
Returns
int 0.
Search through a list of 1-3 mappings for the desired letter.

◆ seq_elements()

vector< double > seq_elements ( Bmolgroup molgroup,
Bstring paramfile 
)

Shows the elemental composition of all molecular sequences.

Parameters
*molgroupset of sequences.
&paramfilefile of residue parameters.
Returns
vector<double> array of element numbers: HCNOS

◆ seq_find_dna()

long seq_find_dna ( Bmolgroup molgroup,
Bstring seq 
)

Finds a nucleotide sequence.

Parameters
*molgroupthe molecule group.
&seqsequence to find.
Returns
long position.

◆ seq_find_protein()

long seq_find_protein ( Bmolgroup molgroup,
Bstring seq 
)

Finds an amino acid sequence.

Parameters
*molgroupthe molecule group.
&seqsequence to find.
Returns
long position.

◆ seq_find_protein_in_dna()

Bstring seq_find_protein_in_dna ( Bmolgroup molgroup,
Bstring seq,
int  seqlenmin,
int  seqlenmax,
int  side1,
int  side2,
double  threshold,
Bstring gcfile 
)

Finds the coding region for an amino acid sequence.

Parameters
*molgroupthe molecule group.
&seqsequence to find.
seqlenminsequence length minimum.
seqlenmaxsequence length maximum.
side1preceding sequence length to include.
side2succeeding sequence length to include.
thresholdthreshold for reporting possible hits.
&gcfilefile with genetic code.
Returns
Bstring coding sequence.
All molecules in the group are searched in all 6 possible frames.

◆ seq_from_residues()

int seq_from_residues ( Bmolgroup molgroup)

Obtain sequences from residues.

Parameters
*molgroupset of molecules.
Returns
int 0

◆ seq_mass()

int seq_mass ( Bmolgroup molgroup)

Shows the masses of all molecular sequences.

Parameters
*molgroupset of sequences.
Returns
int 0

◆ seq_show()

int seq_show ( Bmolgroup molgroup)

Shows all molecular sequences.

Parameters
*molgroupset of sequences.
Returns
int 0

◆ seq_translate_all()

int seq_translate_all ( Bmolgroup molgroup,
int  frame,
Bstring gcfile 
)

Translates all nucleotide sequences to protein sequences.

Parameters
*molgroupthe molecule group.
framethe frame for translation.
&gcfilefile with genetic code.
Returns
int 0.
Each nucleic acid sequence in the molecule group is translated to the
protein sequence.

◆ sequence_translate()

Bstring sequence_translate ( Bstring nucseq,
long  frame,
Bstring gencode 
)

Translates a nucleotide sequence to a protein sequence.

Parameters
&nucseqnucleotide sequence to be translated.
framecoding frame.
&gencodegenetic code: array of amino acids.
Returns
Bstring translated protein sequence.

Variable Documentation

◆ res_code

const map<char, string> res_code
Initial value:
= {
{'-', "GAP"},
{'*', "UNK"},
{'A', "ALA"},
{'B', "ASX"},
{'C', "CYS"},
{'D', "ASP"},
{'E', "GLU"},
{'F', "PHE"},
{'G', "GLY"},
{'H', "HIS"},
{'I', "ILE"},
{'K', "LYS"},
{'L', "LEU"},
{'M', "MET"},
{'N', "ASN"},
{'P', "PRO"},
{'Q', "GLN"},
{'R', "ARG"},
{'S', "SER"},
{'T', "THR"},
{'V', "VAL"},
{'W', "TRP"},
{'Y', "TYR"},
{'Z', "GLX"},
{'X', "UNK"}
}

◆ verbose

int verbose
extern