Bsoft 2.1.4
Bernard's software package
rwmolecule.cpp File Reference

Library routines to read and write molecule file, including sequences and coordinates. More...

#include "rwmolecule.h"
#include "rwatomprop.h"
#include "mol_transform.h"
#include "mol_util.h"
#include "seq_util.h"
#include "linked_list.h"
#include "utilities.h"
#include "rwmol_star.h"
#include "rwmol_text.h"
#include "rwClustal.h"
#include "rwEMBL.h"
#include "rwFASTA.h"
#include "rwGenBank.h"
#include "rwGROMACS.h"
#include "rwPDB.h"
#include "rwPhylip.h"
#include "rwPIR.h"
#include "rwWAH.h"

Functions

int molgroup_sequence_check (Bmolgroup *molgroup)
 
Bmolgroupmolgroup_init ()
 Initializes and allocates a new molecule group. More...
 
Bmoleculemolecule_add (Bmolecule **mol, char *name)
 Adds a molecule to a linked list. More...
 
Bmoleculemolecule_add (Bmolecule **mol, Bstring &name)
 
Bresidueresidue_add (Bresidue **res, const char *type)
 Adds a residue to a linked list. More...
 
Bresidueresidue_add (Bresidue **res, Bstring &type)
 
Batomatom_add (Batom **atom, const char *type)
 Adds an atom to a linked list. More...
 
Batomatom_add (Batom **atom, Bstring &type)
 
Batomatom_copy (Batom *atom)
 
long residue_count (Bmolgroup *molgroup)
 Counts the number of residues in a molecule group. More...
 
long atom_count (Bmolgroup *molgroup)
 Counts the number of atoms in a molecule group. More...
 
int atom_clean_type (Batom *atom, const char *type)
 Cleans up the type string and assigns an element code to an atom. More...
 
Bbondbond_add (Bbond **bond, Batom *atom1, Batom *atom2, double l, double k)
 Adds a bond to a linked list. More...
 
Bangleangle_add (Bangle **angle, Batom *atom1, Batom *atom2, Batom *atom3, double a, double k)
 Adds an angle to a linked list. More...
 
int molgroup_list_kill (Bmolgroup *molgroup)
 Destroys a molecule group linked list. More...
 
int molgroup_kill (Bmolgroup *molgroup)
 Destroys a molecule group. More...
 
int molecule_kill (Bmolecule *mol)
 Destroys a molecule. More...
 
int residue_kill (Bresidue *res)
 Destroys a residue. More...
 
int bond_kill (Bbond *bond)
 Dealocates a list of bonds. More...
 
int angle_kill (Bangle *angle)
 Dealocates a list of angles. More...
 
Bmolgroupmolgroup_list_copy (Bmolgroup *molgroup)
 Copies a molecule group list. More...
 
Bmolgroupmolgroup_copy (Bmolgroup *molgroup)
 Copies a molecule group. More...
 
Bmoleculemolecule_copy (Bmolecule *mol)
 Copies a molecule. More...
 
Bmoleculemol_copy_and_add_to_molgroup (Bmolgroup *molgroup, Bmolecule *mol)
 Copies a molecule and assign to a new pointer in the molgroup. More...
 
Bbondmolgroup_bond_list_copy (Bmolgroup *molgroup, Bmolgroup *molgroupcopy)
 Copies a bond list. More...
 
int molgroup_from_molgroup_list (Bmolgroup *molgroup)
 Converts a molecule group list to a single molecule group. More...
 
Bmolgroupread_molecule (const char *filename, const char *atom_select, const char *paramfile)
 The generalized function for reading molecular files. More...
 
Bmolgroupread_molecule (Bstring &filename, Bstring &atom_select, Bstring &paramfile)
 
Bmolgroupread_molecule (Bstring *file_list, int set_pbc, Vector3< double > box, Bstring atom_select, Bstring paramfile)
 Reads and catenates multiple molecule files. More...
 
int write_molecule (char *filename, Bmolgroup *molgroup)
 Writes a molecule group. More...
 
int write_molecule (Bstring &filename, Bmolgroup *molgroup)
 
int molgroup_list_write (Bstring &filename, Bmolgroup *molgroup)
 Writes a molecule group list. More...
 
long molgroup_count_molecules (Bmolgroup *molgroup)
 Counts the total number of molecules in a molecule group. More...
 
long molgroup_count_residues (Bmolgroup *molgroup)
 Counts the total number of residues in a molecule group. More...
 
long mol_count_residues (Bmolecule *mol)
 Counts the total number of residues in a molecule. More...
 
long molgroup_count_atoms (Bmolgroup *molgroup)
 Counts the total number of atoms in a molecule group. More...
 
long mol_count_atoms (Bmolecule *mol)
 Counts the total number of atoms in a molecule. More...
 
int molgroup_consolidate_gaps (Bmolgroup *molgroup)
 Removes redundant gaps from an alignment. More...
 
long molgroup_stats (Bmolgroup *molgroup, int show)
 Calculates the statistics of a molecule group. More...
 
long molgroup_stats (Bmolgroup *molgroup)
 
long mol_stats (Bmolecule *mol, int show)
 Calculates the statistics of a molecule. More...
 
long mol_stats (Bmolecule *mol)
 
int molecule_update_comment (Bmolgroup *molgroup, int n, char **strings)
 Puts a set of strings and time in the main comment of a molecule group. More...
 
int molecule_get_masses (Bmolgroup *molgroup, Bstring &paramfile)
 Gets atomic masses from a parameter file. More...
 
int bond_exists (Bbond *bondlist, Batom *atom1, Batom *atom2)
 
Bbondmolgroup_bond_list_generate (Bmolgroup *molgroup, double maxlength, int wrap)
 Generates a bond list based on atom separation. More...
 
Bbondmol_bond_list_generate (Bmolgroup *molgroup, double bondlength, int wrap)
 Generates an intramolecular distance-based bond list. More...
 
int molecules_to_molgroups (Bmolgroup *molgroup)
 Converts molecules in a molecule group to individual molecule groups. More...
 

Variables

int verbose
 

Detailed Description

Library routines to read and write molecule file, including sequences and coordinates.

Author
Bernard Heymann
Date
Created: 19980822
Modified: 20220427

Function Documentation

◆ angle_add()

Bangle * angle_add ( Bangle **  angle,
Batom atom1,
Batom atom2,
Batom atom3,
double  a,
double  k 
)

Adds an angle to a linked list.

Parameters
**anglepointer to any angle in the list.
*atom1atom1 of angle.
*atom2atom2 of angle (central atom).
*atom3atom3 of angle.
areference angle.
kangle strength.
Returns
Bangle* new angle.
The function allocates memory for a new angle structure.
If the content of the pointer is null, the new structure is
the first in the list. Otherwise, the end of the list is found
and the new structure added to it.

◆ angle_kill()

int angle_kill ( Bangle angle)

Dealocates a list of angles.

Parameters
*anglefirst angle in the list.
Returns
int 0.
All angles downstream are deallocated.

◆ atom_add() [1/2]

Batom * atom_add ( Batom **  atom,
Bstring type 
)

◆ atom_add() [2/2]

Batom * atom_add ( Batom **  atom,
const char *  type 
)

Adds an atom to a linked list.

Parameters
**atompointer to any atom in the list.
*typeatom type.
Returns
Batom* new atom.
The function allocates memory for a new atom structure.
If the content of the pointer is null, the new structure is
the first in the list. Otherwise, the end of the list is found
and the new structure added to it.

◆ atom_clean_type()

int atom_clean_type ( Batom atom,
const char *  type 
)

Cleans up the type string and assigns an element code to an atom.

Parameters
*atomatom.
*typeatom type.
Returns
int 0.
The first two alphanumeric characters of the type string is used to
determine the element.

◆ atom_copy()

Batom * atom_copy ( Batom atom)

◆ atom_count()

long atom_count ( Bmolgroup molgroup)

Counts the number of atoms in a molecule group.

Parameters
*molgroupthe molecule group.
Returns
long number of atoms.

◆ bond_add()

Bbond * bond_add ( Bbond **  bond,
Batom atom1,
Batom atom2,
double  l,
double  k 
)

Adds a bond to a linked list.

Parameters
**bondpointer to any bond in the list.
*atom1atom1 of bond.
*atom2atom2 of bond.
lreference bond length.
kbond strength.
Returns
Bbond* new bond.
The function allocates memory for a new bond structure.
If the content of the pointer is null, the new structure is
the first in the list. Otherwise, the end of the list is found
and the new structure added to it.

◆ bond_exists()

int bond_exists ( Bbond bondlist,
Batom atom1,
Batom atom2 
)

◆ bond_kill()

int bond_kill ( Bbond bond)

Dealocates a list of bonds.

Parameters
*bondfirst bond in the list.
Returns
int 0.
All bonds downstream are deallocated.

◆ mol_bond_list_generate()

Bbond * mol_bond_list_generate ( Bmolgroup molgroup,
double  bondlength,
int  wrap 
)

Generates an intramolecular distance-based bond list.

This function defines bonds on distance and within molecules.
If the molecule group already has a bond list, no new bonds are generated. 
Parameters
*molgroupmolecule group structure.
bondlengthmaximum bond length.
wrapwrap around periodic boundaries if !=0.
Returns
Bbond* new bond list.

◆ mol_copy_and_add_to_molgroup()

Bmolecule * mol_copy_and_add_to_molgroup ( Bmolgroup molgroup,
Bmolecule mol 
)

Copies a molecule and assign to a new pointer in the molgroup.

Parameters
*molgroupthe molecule group.
*molthe molecule to be copied.
Returns
Bmolecule* the new molecule, NULL if copy failed.
Adds a new molecule to the molecule group identical to the given
molecule and returns a pointer to the new molecule.

◆ mol_count_atoms()

long mol_count_atoms ( Bmolecule mol)

Counts the total number of atoms in a molecule.

Parameters
*molthe molecule.
Returns
long number of atoms.

◆ mol_count_residues()

long mol_count_residues ( Bmolecule mol)

Counts the total number of residues in a molecule.

Parameters
*molthe molecule.
Returns
long number of residues.

◆ mol_stats() [1/2]

long mol_stats ( Bmolecule mol)

◆ mol_stats() [2/2]

long mol_stats ( Bmolecule mol,
int  show 
)

Calculates the statistics of a molecule.

Parameters
*molthe molecule.
showflag to show statistics.
Returns
long number of atoms (<0 if writing failed).

◆ molecule_add() [1/2]

Bmolecule * molecule_add ( Bmolecule **  mol,
Bstring name 
)

◆ molecule_add() [2/2]

Bmolecule * molecule_add ( Bmolecule **  mol,
char *  name 
)

Adds a molecule to a linked list.

Parameters
**molpointer to any molecule in the list.
*namemolecule name.
Returns
Bmolecule* new molecule.
The function allocates memory for a new molecule structure.
If the content of the pointer is null, the new structure is
the first in the list. Otherwise, the end of the list is found
and the new structure added to it.

◆ molecule_copy()

Bmolecule * molecule_copy ( Bmolecule mol)

Copies a molecule.

Parameters
*molthe molecule to be copied.
Returns
Bmolecule* the new molecule, NULL if copy failed.
Generates a new molecule with the same structure as the given molecule.

◆ molecule_get_masses()

int molecule_get_masses ( Bmolgroup molgroup,
Bstring paramfile 
)

Gets atomic masses from a parameter file.

Parameters
*molgroupthe molecule group.
&paramfileparameter file name.
Returns
int 0.

◆ molecule_kill()

int molecule_kill ( Bmolecule mol)

Destroys a molecule.

Parameters
*molthe molecule.
Returns
int 0.

◆ molecule_update_comment()

int molecule_update_comment ( Bmolgroup molgroup,
int  n,
char **  strings 
)

Puts a set of strings and time in the main comment of a molecule group.

This is designed to pack the command line into a string followed by
a second string for the time.
Parameters
*molgroupthe molecule group.
nthe number of strings.
**stringsan array of strings.
Returns
int string length of the new comment.

◆ molecules_to_molgroups()

int molecules_to_molgroups ( Bmolgroup molgroup)

Converts molecules in a molecule group to individual molecule groups.

A new linked list of molecule groups is created and the links to the
individual molecules set. 
Parameters
*molgroupmolecule group structure (modified).
Returns
int 0.

◆ molgroup_bond_list_copy()

Bbond * molgroup_bond_list_copy ( Bmolgroup molgroup,
Bmolgroup molgroupcopy 
)

Copies a bond list.

Parameters
*molgroupmolecule group structure.
*molgroupcopymolecule group structure to copy bonds to.
Returns
Bbond* new bond list.
A copy of the molecule group bond list is generated and returned. 

◆ molgroup_bond_list_generate()

Bbond * molgroup_bond_list_generate ( Bmolgroup molgroup,
double  maxlength,
int  wrap 
)

Generates a bond list based on atom separation.

Parameters
*molgroupthe molecule group.
maxlengthmaximum bond length.
wrapwrap around periodic boundaries if !=0.
Returns
Bbond* new bond list.

◆ molgroup_consolidate_gaps()

int molgroup_consolidate_gaps ( Bmolgroup molgroup)

Removes redundant gaps from an alignment.

All positions in an alignment with only gaps are removed.
Parameters
*molgroupthe molecule group.
Returns
int 0.

◆ molgroup_copy()

Bmolgroup * molgroup_copy ( Bmolgroup molgroup)

Copies a molecule group.

Parameters
*molgroupthe molecule group.
Returns
Bmolgroup* new molecule group.
All parts of a molecule group are copied to a completely new structure
hierarchy, except sequence flag array.

◆ molgroup_count_atoms()

long molgroup_count_atoms ( Bmolgroup molgroup)

Counts the total number of atoms in a molecule group.

Parameters
*molgroupthe molecule group.
Returns
long number of atoms.

◆ molgroup_count_molecules()

long molgroup_count_molecules ( Bmolgroup molgroup)

Counts the total number of molecules in a molecule group.

Parameters
*molgroupthe molecule group.
Returns
long number of molecules.

◆ molgroup_count_residues()

long molgroup_count_residues ( Bmolgroup molgroup)

Counts the total number of residues in a molecule group.

Parameters
*molgroupthe molecule group.
Returns
long number of residues.

◆ molgroup_from_molgroup_list()

int molgroup_from_molgroup_list ( Bmolgroup molgroup)

Converts a molecule group list to a single molecule group.

Parameters
*molgroupmolecule group list.
Returns
int 0.
The input molecule group list is replace by a single molecule group. 

◆ molgroup_init()

Bmolgroup * molgroup_init ( )

Initializes and allocates a new molecule group.

Returns
Bmolgroup* the new molecule group, NULL if initialization failed.
The selection string is set to "all".
The spacegroup is set to 1, the space group string to "P 1".
The point group is set to "C1".
The unit cell is set to 1,1,1,90,90,90.

◆ molgroup_kill()

int molgroup_kill ( Bmolgroup molgroup)

Destroys a molecule group.

Parameters
*molgroupthe molecule group.
Returns
int 0.

◆ molgroup_list_copy()

Bmolgroup * molgroup_list_copy ( Bmolgroup molgroup)

Copies a molecule group list.

Parameters
*molgroupthe molecule group list.
Returns
Bmolgroup* new molecule group list.
All molecule groups are copied to a completely new list.

◆ molgroup_list_kill()

int molgroup_list_kill ( Bmolgroup molgroup)

Destroys a molecule group linked list.

Parameters
*molgroupthe molecule group linked list.
Returns
int 0.

◆ molgroup_list_write()

int molgroup_list_write ( Bstring filename,
Bmolgroup molgroup 
)

Writes a molecule group list.

The output files are numbered if the list constains more than one molecule group.
Parameters
*filenamethe file name.
*molgroupthe molecule group.
Returns
int number of molecules written (<0 if writing failed).

◆ molgroup_sequence_check()

int molgroup_sequence_check ( Bmolgroup molgroup)

◆ molgroup_stats() [1/2]

long molgroup_stats ( Bmolgroup molgroup)

◆ molgroup_stats() [2/2]

long molgroup_stats ( Bmolgroup molgroup,
int  show 
)

Calculates the statistics of a molecule group.

Parameters
*molgroupmolecule group.
showflag to show statistics.
Returns
long number of molecules (<0 if writing failed).

◆ read_molecule() [1/3]

Bmolgroup * read_molecule ( Bstring filename,
Bstring atom_select,
Bstring paramfile 
)

◆ read_molecule() [2/3]

Bmolgroup * read_molecule ( Bstring file_list,
int  set_pbc,
Vector3< double >  box,
Bstring  atom_select,
Bstring  paramfile 
)

Reads and catenates multiple molecule files.

Parameters
file_listlist of file names.
set_pbcflag to fit within periodic boundaries.
boxperiodic boundary box.
atom_selectatomic selection.
paramfileatomic parameters.
Returns
Bmolgroup* new molecule group.

◆ read_molecule() [3/3]

Bmolgroup * read_molecule ( const char *  filename,
const char *  atom_select,
const char *  paramfile 
)

The generalized function for reading molecular files.

Parameters
*filenamethe file name.
*atom_selecta selection string.
*paramfileparameter file name.
Returns
Bmolgroup* new molecule group, NULL if reading failed.
All sequence and atomic coordinate information is read from a file into
an internal hierarchy of structures in linked lists:
    Bmolgroup   molecule group or collection of molecules
    Bmolecule   linked list of molecules in the group
    Bresidue    linked list of residues in a molecule
    Batom       linked list of atoms in a residue
    Bbond       linked list of bonds in the molecule group
The selection string is used to select for specific atom types:
    CA          C-alpha atoms only
The parameter file is used to load atomic properties, such as mass
    and charge. The default file is bsoft/parameters/atom_prop.star.
The input format is based on the file name extension.

◆ residue_add() [1/2]

Bresidue * residue_add ( Bresidue **  res,
Bstring type 
)

◆ residue_add() [2/2]

Bresidue * residue_add ( Bresidue **  res,
const char *  type 
)

Adds a residue to a linked list.

Parameters
**respointer to any residue in the list.
*typeresidue type.
Returns
Bresidue* new residue.
The function allocates memory for a new residue structure.
If the content of the pointer is null, the new structure is
the first in the list. Otherwise, the end of the list is found
and the new structure added to it.

◆ residue_count()

long residue_count ( Bmolgroup molgroup)

Counts the number of residues in a molecule group.

Parameters
*molgroupthe molecule group.
Returns
long number of residues.

◆ residue_kill()

int residue_kill ( Bresidue res)

Destroys a residue.

Parameters
*resthe residue.
Returns
int 0.

◆ write_molecule() [1/2]

int write_molecule ( Bstring filename,
Bmolgroup molgroup 
)

◆ write_molecule() [2/2]

int write_molecule ( char *  filename,
Bmolgroup molgroup 
)

Writes a molecule group.

The output format is based on the file name extension.
Parameters
*filenamethe file name.
*molgroupthe molecule group.
Returns
int number of molecules written (<0 if writing failed).

Variable Documentation

◆ verbose

int verbose
extern