#include <molecule.h>
Inheritance diagram for Molecule:
Public Member Functions | |
Molecule construction functions | |
Molecule () | |
virtual | ~Molecule () |
Molecule (Molecule &aMolecule, bool bool_resetMorganIndex=true) | |
Molecule (Molecule &molecule1, Molecule &molecule2, double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(Bond *, Bond *)) | |
Molecule (Molecule &m1, Molecule &m2, double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(float, float, float), float edgeKernelParameter) | |
Molecule & | operator= (const Molecule &aMolecule) |
virtual Atom * | addAtom (string aSymbol, bool resetSSSR=true) throw ( CError ) |
Atom * | addAtom (Atom *, bool resetSSSR=true, bool resetMorganIndex=true) throw ( CError) |
Bond * | linkAtoms (Atom *aSource, Atom *aTarget, int aBondLabel, int aBondStereo=0, int aBondNotUsed=0, int aBondTopology=0, int aBondReactionCenter=0, bool resetSSSR=true) |
Bond * | linkAtoms (string aSource, string aTarget, int aBondLabel, int aBondStereo=0, int aBondNotUsed=0, int aBondTopology=0, int aBondReactionCenter=0, bool resetSSSR=true) throw ( CError ) |
Bond * | linkAtoms (int firstAtom, int secondAtom, int aBondLabel, int aBondStereo=0, int aBondNotUsed=0, int aBondTopology=0, int aBondReactionCenter=0, bool resetSSSR=true) throw ( CError ) |
void | readMOL (string aFilename, bool genericAtomType=false) throw (CError) |
void | readMOLOld (string aFilename) throw (CError) |
void | erase () |
void | eraseHiddenAtoms () |
void | eraseRings () |
void | eraseAdjacency () |
void | eraseWalks () |
Accessor functions | |
int | getId () const |
string | getIdString () |
string | getName () |
void | setName (string aName) |
void | setActivity (float aNumber) |
void | setActivity (string aNumber) |
float | getActivity (bool silentMode=false) throw ( CError ) |
float | hasActivity () |
void | unsetActivity () |
vector< Atom * > & | getAtoms () |
Atom * | getAtom (int anId) throw ( CError ) |
Atom * | getAtomByIndex (int ind) |
bool | atomExists (Atom *anAtom) |
int | numAtoms () |
int | numHiddenAtoms () |
int | numBonds () |
int | numHiddenBonds () |
long | bondSum () |
void | unsetBondFlags () |
void | unsetBondFlagsOriginal () |
bool | isChiral () |
void | select () |
void | unSelect () |
bool | isSelected () |
void | setSortDescriptor (string aName, int aType) |
int | getSortDescriptorType () |
string | getSortDescriptorName () |
Ring * | getRingWithID (int anID, bool createIfMissing) throw ( CError ) |
bool | hasRing (Ring *newRing, bool detectingRing=false) throw ( CError ) |
bool | hasRing () throw ( CError ) |
int | numRings () throw ( CError ) |
bool | hasSSSRDetected () |
float | getMW (bool silentError=false) throw ( CError ) |
int | getNumAtoms () |
string | getLocation () |
int | getOriginalFormat () |
void | setOriginalFormat (int a) |
Molecule manipulation functions | |
int | hideAtomsByIntDescriptor (string aDescriptorName, int aValue, bool refreshBonds=true) |
void | hideAtom (vector< Atom * >::iterator anAtomI) |
void | hideAtomAndToFromBonds (vector< Atom * >::iterator anAtomI) |
void | hideAtomAndToFromBonds (Atom *anAtom) |
int | hideHydrogens () |
bool | isHiddenAtom (Atom *anAtom) |
int | restoreHiddenAtoms (bool flagRestoreBonds=true) |
int | restoreHiddenBonds () |
int | refreshBonds () |
void | eraseAtom (Atom *anAtom) throw ( CError ) |
void | deleteBonds () |
void | deleteHiddenAtoms () |
void | setMorganLabels (int anOrder) |
void | setPerretLabels () |
int | setUniqueMorganIndices () |
void | resetMorganIndex () |
int | getNumberOfDistinctMorganIndices (int anOrder) |
int | getMaxMorganIteration () |
int | getNumCarbonsOfComponent (string aDescriptorName, int aValue) |
int | getNumNitrogensOfComponent (string aDescriptorName, int aValue) |
int | numAtomsNonCSkeleton () |
float | atomicDistance (Atom *atom1, Atom *atom2) |
void | compute () |
void | binClassifyFromDescriptor (string descriptorName, float value, bool smallerOrEqual=true) |
void | atomsLabelsListing (vector< string > *) |
void | atomsSymbolsListing (vector< string > *) |
void | bondsListing (vector< int > *) |
void | noTottersTransform () |
void | threeDtransform (int nBins, double distMin, double distMax) |
void | readPartialCharges (string charges) |
void | setMorganChargesLabels (double threshold) |
int | hideSalts (stringstream *out) |
int | markFragments () |
void | unmarkFragments () |
void | hideAllFragmentsBut (int aFragmentNumber) |
void | writeFragments (ofstream *outStream) |
int | DFS (Atom *startAtom, string intDescriptorName, int markValue) |
int | detectSSSR () |
void | resetSSSR () |
void | setHasSSSR () |
Bond * | checkEdges (Atom *) |
Iterators | |
vector< Atom * >::iterator | beginAtom () |
vector< Atom * >::iterator | endAtom () |
map< Atom *, Bond * >::iterator | beginBond (Atom *anAtom) |
map< Atom *, Bond * >::iterator | endBond (Atom *anAtom) |
map< Atom *, Bond * >::iterator | beginBond (int anId) |
map< Atom *, Bond * >::iterator | endBond (int anId) |
map< int, int >::iterator | beginComponentSizes () |
map< int, int >::iterator | endComponentSizes () |
Marginalized and general kernel related functions | |
void | setKashimaKernelProb (double aPq, bool skipSkeleton=false) |
double | sumPT () |
double | sumProbabilities () |
double | sumProbabilitiesFast () |
double | sumPQPS () |
double | sumPQPSFast () |
void | raisePowerFast () |
double | computeKernel (Molecule *anotherMolecule, double(*pt2GraphKernel)(Molecule *mol1, Molecule *mol2, double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(Bond *, Bond *), int parameter1, int parameter2), double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(Bond *, Bond *), int parameter1, int parameter2=1) |
double | getSelfKernel (double(*pt2GraphKernel)(Molecule *mol1, Molecule *mol2, double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(Bond *, Bond *), int, int), double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(Bond *, Bond *), int parameter1, int parameter2=1) |
double | calculateSelfKernel (double(*pt2GraphKernel)(Molecule *mol1, Molecule *mol2, double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(Bond *, Bond *), int, int), double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(Bond *, Bond *), int paramter1, int parameter2) |
double | getSelfKernel () throw ( CError ) |
void | setSelfKernel (double value) |
void | addToSelfKernel (double) |
void | substractToSelfKernel (double) |
void | resetSelfKernel () |
Matrix utilities functions related to the 3D pharmacophore kernel (Mahe 2006) | |
void | setAdjacency (int i, int j, double value) |
double | getAdjacency (int i, int j) |
void | setWalks (int i, int j, double value) |
double | getWalks (int i, int j) |
void | raisePowerAdjacency () |
double | traceWalks () |
double | traceDiagWalks () |
Output functions | |
string | toString () |
string | toStringShort () |
string | toStringLong () |
void | describe () |
void | describeShort () |
void | describeLong () |
void | describeEachAtom () |
void | writeMOL (string aFileName) |
void | writeDOT (string aFilename, bool perretLabels=false) |
void | writeSD (string aFileName) |
Protected Member Functions | |
void | moleculeChanged (bool resetSSSR=true, bool resetMorganIndex=true) |
Protected Attributes | |
vector< Atom * > | atoms |
vector< Atom * > | hiddenAtoms |
vector< Ring * > | sssr |
bool | flagHasSSSRDetected |
int | id |
bool | selectedFlag |
double | selfKernel |
bool | selfKernelCalculated |
bool | chiral |
bool | flagActivity |
int | sortDescriptorType |
string | sortDescriptorName |
int | maxMorganIteration |
Static Protected Attributes | |
static int | counter |
Private Member Functions | |
Bond * | linkAtomsNoReturn (Atom *aSource, Atom *aTarget, int aBondLabel, int aBondStereo=0, int aBondNotUsed=0, int aBondTopology=0, int aBondReactionCenter=0) |
void | linkAtomsNoReturn (int firstAtom, int secondAtom, int aBondLabel) throw ( CError ) |
Private Attributes | |
map< Atom *, map< Atom *, double > * > * | fastPT |
map< Atom *, map< Atom *, double > * > * | fastPTNext |
map< Atom *, map< Atom *, double > * > * | fastPTSave |
map< Atom *, double > | fastPQ |
map< Atom *, double > | fastPS |
map< int, int > | componentSizes |
float | activity |
string | location |
int | originalFormat |
vector< vector< double > > * | adjacency |
vector< vector< double > > * | walks |
FOR: SNSF SPONSORED PROJECT
PURPOSE: This class implements the notion of molecule. Molecules have
Properties are implemented by deriving the Molecule from the DataContainer class which takes care of memory allocation for these descriptors. Therefore descriptors can be added and removed at runtime using the DataContainer functions.
The Molecule class can be seen as a graph containing a list of vertex (Atom), each vertex containing a list of edges (Bond) to the neighbour vertex (Atom). Edges are directed and have a label. Chemical bonds are not directed so bonds are always double, one in each direction. So a Molecule can be seen as a directed labeled graph with bidirectional edges. ... many other changes
|
class constructor. |
|
class destructor. deletes all atoms in the molecule. |
|
copy constructor. |
|
product graph constructor. this constructor constructs a fusion molecule from two molecules using a product graph operation. two atoms which are connected in both molecules have an edge connecting them in the fused graph. |
|
"3D" constructor. Note : BondKernel prototype is different of those used on the "product graph constructor". |
|
adds an Atom to the atoms vector. Returns a pointer to the created Atom. WARNING this atom will be deleted when the molecule is deleted so don't use this pointer! if resetSSSR = true, then this action invalidates previous sssr computations. |
|
adds an Atom with aSymbol chemical symbol to the molecule. aSymbol can be in any case. The first letter of aSymbol will be automatically changed to capital and the second letter to normal caps. Returns a pointer to the created Atom. WARNING this atom will be deleted when the molecule is deleted so don't use this pointer! if resetSSSR = true, then this action invalidates previous sssr computations.
|
|
adds a value to the self kernel. |
|
checks if an atom is part of the molecule. returns true if anAtom is included in the atom vector, false otherwise. |
|
computes the Euclidian distance between two atoms. |
|
fills in a vector of String with the distinct labels of the atoms of the molecule. |
|
fills in vector of String with the distinct symbols of the atoms of the molecule. |
|
returns an iterator to the first Atom. |
|
returns an iterator to the first Bond of an Atom designated by its id. |
|
|
NOT DOCUMENTED |
|
sets the activity according to the value of a descriptor and a value. If true is given as third argument (default) then descriptorName <= value are considered positive. If false is given as third argument then molecules with descriptorName >= value are considered positive. if a descriptor is missing then the molecule is left without activity. |
|
fills in vector of Int with the distinct types of bonds of the molecule. |
|
returns the sum of the bond types. |
|
calculates the molecule's self Kashima kernel and stores the result in the private selfKernel variable. |
|
helper function for detectSSSR. selects an optimum edge for elimination in structures without N2 nodes. |
|
this function should be called everytime a molecule is created / modified. automatically called at the end of Molecule::readMol() and MoleculeUtils::readMDLCtabBlock(). |
|
compares this molecule with anotherMolecule and returns the value defined by the function pt2GraphKernel, using the function pt2AtomKernel to compare atoms and pt2BondKernel to compare bonds. |
|
deletes all bonds in all atoms. |
|
deletes all hidden atoms. |
|
prints a description of the molecule to stdout using toString() and DataContainer describe(). Reimplemented from DataContainer. |
|
prints a description of each atom (using describe()) and each bond (using describe())to stdout. |
|
prints a description of the molecule to stdout using the molecule toString(), all atoms toStringShort(), and all bonds toStringShort().
|
|
prints a description of the molecule to stdout using toString(). Reimplemented from DataContainer. |
|
detects the smallest set of smallest rings and returns the number of rings. this procedure is automatically called when a molecule is loaded from a mol or sd file through the function compute. call compute() explicitly after modifying atom or bond composition. this function also sets the ring membership of atoms and bonds. according to: Ring Perception Using Breadth-First Search, John Figueras 399 Baker s Pond Road, Orleans, Massachusetts 02653, J. Chem. Inf. Comput. Sci. 1996, 36, 986-991. |
|
performs a DFS search on the graph starting at startAtom, marking atoms with markValue in descriptor intDescriptorName, and returning the number of nodes in the connected component. |
|
returns an iterator to the last Atom. |
|
returns an iterator to the first Bond of an Atom designated by its id. |
|
|
NOT DOCUMENTED |
|
erases all atoms in the current molecule. returns the molecule in the state of construction by the default constructor. |
|
erases the adjacency matrix of the molecule. |
|
erases anAtom from the list. Does not delete the atom though! |
|
erases hidden atoms of the molecule. |
|
erases the rings of the molecule. |
|
erases the 'walks' matrix of the molecule (used to compute the pharmacophore kernel, see (Mahe, 2006)). |
|
returns the activity of the molecule. |
|
returns the value of an entry of the adjacency matrix of the molecule. |
|
returns a pointer to an atom designated by its unique Id. raises a CError exception if no atom with this Id is present in the molecule. |
|
returns a pointer to the atom's vector. |
|
returns the unique id of the molecule. |
|
returns the unique id of the molecule as a string. |
|
returns the location where the molecule is stored. |
|
returns the iteration of the Morgan index at which the different connectivity values have reached a maximum. |
|
returns the molecular weight. |
|
returns the molecule name, i.e., the name descriptor value (Nb :not necessarily unique). |
|
returns the number of atoms in the molecule (ignoring hidden atoms). |
|
returns the number of distinct morgan indices (of order anOrder) present in the molecule. |
|
returns the number of carbons in a connected component graph defined with an Int descriptor named aDescriptorName having value aValue. |
|
returns the number of nitrogens in a connected component graph defined with an Int descriptor named aDescriptorName having value aValue. |
|
returns the format of the original file. one of MDL2000, MDL3000, MDL2000JLP. |
|
returns the ring of the corresponding Id. |
|
emits an error if the kernel was not calculated before. |
|
returns the kernel value for the molecule with itself. |
|
returns the name of the descriptor used to sort molecules. |
|
returns the type of the descriptor used to sort molecules. |
|
returns the value of an entry of the walks matrix of the molecule. |
|
returns true if the activity of the molecule was initialized. |
|
returns true if the molecule has a ring. If sssr was not detected before this call, detectSSSR() is called. |
|
returns true if the molecule already has newRing in sssr. If sssr was not detected before this call, detectSSSR() is called. |
|
returns the value of the 'hasSSRDetected' flag. |
|
NOT DOCUMENTED |
|
hides an atom (but not the bonds to this atom). call refreshBonds() after hiding all atoms. |
|
hides anAtom and all to / from bonds for this atom. |
|
hides anAtom and all to / from bonds for this atom. |
|
hides all atoms (and associated edges) with intDescriptor aDescriptorName == aValue. returns the number of hidden atoms |
|
moves the hydrogen atoms to the hiddenAtoms container, so that further algorithms will run without hydrogens. returns the number of hydrogens hidden. hidden hydrogens can be restored using restoreHiddenAtoms(). |
|
hides all but the biggest (in atom numbers) connected atoms in the molecule. returns the number of components removed. WARNING: does not test if two compounds have identical maximum size. in that case, it will hide all but the first one. |
|
returns true if the molecule has chiral information. |
|
returns true if an atom is hidden. |
|
returns selectedFlag. |
|
links two atoms using their position in the atom container. if firstAtom or secondAtom are not between 0 and numAtoms()-1 a CError error is thrown. returns a pointer to the forward bond just created (the return bond can be retrived with forwardBond->getReverse()). |
|
links two atoms using their name. If no atom with name aSource or aTarget are found a CError error is thrown. returns a pointer to the forward bond just created (the return bond can be retrived with forwardBond->getReverse()). |
|
links two atoms poited by aSource and aTarget using aBondLabel bond type. If the two atoms are not in the atoms vector they are first added to the molecule. returns a pointer to the forward bond just created (the return bond can be retrived with forwardBond->getReverse()).
|
|
links a pair of atoms in a single direction only : firstAtom -> secondAtom. |
|
links two atoms poited by aSource and aTarget using aBondLabel bond type. If the two atoms are not in the atoms vector they are first added to the molecule WARNING this function adds a single directed bond, only to be used with in Molecule::raisePower(). |
|
NOT DOCUMENTED |
|
this function is called internally every time a molecule is changed (atom or bonding), reset flags. |
|
transforms the molecule into a graph preventing 'totters' (see (Mahe 2004)). |
|
returns the number of atoms of the molecule. |
|
returns the number of terminal atoms of the molecule. |
|
returns the number of bonds of the molecule. |
|
returns the number of hidden atoms of the molecule. |
|
returns the number of hidden bonds of the molecule. |
|
returns the number of rings present in the molecule. |
|
assignment operator. |
|
multiplies the walks matrix by the adjacency matrix. |
|
function used to compute the graph kernel using the fused graph + sum of the powers of the transition matrix approach. see MoleculeUtils::powerKernel(). same as raisePower() but instead of using the Bond class the transition probabilities are read from fastPt and fastPTSave. |
|
reads the molecule description from a Mol file. MUST STILL BE COMPLETED |
|
OLD version of readMOL(). |
|
reads partial charges of the atoms of the molecule from a ';' separated string. |
|
hides all bonds to and from hidden atoms. call this function after hiding all atoms in a molecule. returns the number of edges hidden. |
|
resets the calculations of the morgan indices. CALL THIS FUNCTION EACH TIME THE MOLECULE IS MODIFIED! |
|
resets the self kernel. |
|
sets the flagHasSSSRDetected to false. |
|
restores the hidden atoms (for example hydrogens). |
|
restores the hidden bonds among atoms (does not restore bonds in hidden atoms). |
|
sets selectedFlag. |
|
sets the activity of the molecule from a string activity. provided for convenience. |
|
sets the activity of the molecule (a descriptor in float native format for faster retrieval). |
|
sets an entry of the adjacency matrix of the molecule to a given value. |
|
sets the flagHasSSSRDetected to true. |
|
sets the start, stop and transition probabilities for the calculation of the Kashima kernel as published in (Kashima et al. 2003). the start probability is set to an identical value of 1/numAtoms() for all atoms. the stop probability can be choosen between 0 and 1. the transition probability of each bond of an Atom is set to (1.0 - stop probability) / numbonds. |
|
introduces the sign of the partial charge into the label of the atoms of the molecule. |
|
sets the morganLabel of atoms to the concatenation of the atomic symbol and the Morgan index of anOrder iteration. |
|
sets the molecule name.
|
|
sets the molecule's original format. one of MDL2000, MDL3000, MDL2000JLP. |
|
sets the perretLabel of atoms: the atomic symbol. Only carbons get an additional J if they have more than two aromatic bonds (aromatic rings joining carbons). |
|
sets the set kernel to the given value. |
|
sets the descriptor type and name used by the < operator to compare molecules. if reverse == true then the sorting will be in descending order. |
|
sets the uniqueMorganIndex of each atom to the Morgan index having the maximum of different connectivity values for the molecule. returns the iteration of the smallest Morgan Index with the maximum number of connectivity values. |
|
sets an entry of the 'walks' matrix (i.e., a weighted adjacency matrix) of the molecule to a given value. |
|
substracts a value to the self kernel. |
|
function used to compute the graph kernel using the fused graph + sum of the powers of the transition matrix approach. see MoleculeUtils::powerKernel() |
|
fast version of sumPQPS(). |
|
function used to compute the graph kernel using the fused graph + sum the powers of the transition matrix approach. see MoleculeUtils::powerKernel(). |
|
fast version of sumProbabilities(). |
|
returns the sum of transition probabilities. should be used with fused graph. |
|
transforms the molecule into a 3D graph (see (Mahe 2006)). |
|
returns a string description of the molecule (name, unique Id, memory Adress, number of atoms). |
|
returns a string description of the molecule (name, unique Id, memory Adress, number of atoms and atom descriptions). |
|
returns a short string description of the molecule. (name and unique Id). |
|
computes the diagonal elements of the walk matrix multiplied by the adjacency matrix, and sums to get the trace. |
|
computes the trace of the walks matrix. |
|
NOT DOCUMENTED |
|
unset selectedFlag. |
|
turns off the activity flag. |
|
unsets all bonds flags. |
|
unsets all bonds flags original. |
|
writes a dot file representing the molecule. the molecule can be converted to graphic representation using Graphviz's dot program. |
|
NOT DOCUMENTED |
|
write the molecule definition in a MDL MOL file. |
|
write a MDL structure data (SD) file with a single molecule. |
|
activity value of the molecule. |
|
adjacency matrix of the molecule. used to compute the 3D pharmacophore kernel. |
|
container for atoms. |
|
true if the molecule has chiral information. |
|
NO DOCUMENTATION |
|
counter of the number of molecules which have been instanciated. |
|
hash used in the fast computations of the powers of the product graph adjacency matrix. |
|
hash used in the fast computations of the powers of the product graph adjacency matrix. |
|
hash used in the fast computations of the powers of the product graph adjacency matrix. |
|
hash used in the fast computations of the powers of the product graph adjacency matrix. |
|
hash used in the fast computations of the powers of the product graph adjacency matrix. |
|
true if the molecule has activity information. |
|
flag indicating if smallest set of smallest rings was detected. |
|
container for hidden atoms (for example the removed hydrogens are moved to this container). |
|
molecule unique Id. |
|
location where the molecule is stored. automatically set by readMOL. |
|
contains the iteration number at which the maximum number of distinct connectivity values is reached. |
|
original format of the molecule. |
|
flag to select subsets of molecules. |
|
molecule's self Kernel. |
|
true if selfKernel was calculated, false otherwise. |
|
name of the sorting descriptor. |
|
type and name of descriptor to be used to sort molecules. use setSortDescriptor( aType, aName ) getSortDescriptorType(), setSortDescriptorName() to access. |
|
container for rings (smallest set of smallest rings). |
|
'walks' matrix of the molecule. used to compute the 3D pharmacophore kernel. |