MoleculeSet Class Reference

#include <moleculeset.h>

List of all members.

Public Member Functions

MoleculeSet construction functions
 MoleculeSet ()
 MoleculeSet (const MoleculeSet &aSet)
 ~MoleculeSet ()
int add (MoleculeSet *aSet)
void addMolecule (Molecule *aMolecule)
MoleculeaddMoleculeCopy (Molecule *aMolecule)
void deleteAll ()
Input functions
int addSD (string aFileName, bool genericAtomType=false, long beginMolecule=-1, long endMolecule=-1)
int addKCF (string aFileName, long beginMolecule=-1, long endMolecule=-1)
MoleculeaddSingleMOL (string aMolFile, bool genericAtomType=false)
MoleculeaddSingleKCF (string aMolFile)
void readMolDirectory (string aPath, bool genericAtomType=false, long beginMolecule=-1, long endMolecule=-1)
void readKcfDirectory (string dataDir, long beginMolecule=-1, long endMolecule=-1)
void addMutag (string aFileName, string rFileName="", uint numMolToRead=500)
void readActivityFile (string aFileName)
void readDescriptorFile (string aFileName, string separator=";")
void readGistClassifyFile (string aFileName)
void readGistActivityFile (string aFileName, string aDescriptor)
void readGram (string aFileName, vector< vector< double > > *gram)
void readGramNormal (string aFileName)
void readGramRaw (string aFileName)
void readPartialCharges (string fileName)
Accessor functions
uint numMolecules ()
Moleculeoperator[] (string aName) throw ( CError )
MoleculegetMolByName (string aName) throw ( CError )
Moleculeoperator[] (int anInd) throw ( CError )
MoleculegetMolByIndex (int anInd) throw ( CError )
long getPossibleValuesInIntDescriptor (string aDescriptorName, vector< int > *p)
double getPq ()
int getConvergenceCondition ()
bool nameExists (string aName)
bool hasActivity ()
void setIntDescriptor (string aName, int aValue)
void setUniqueMorganIndices ()
void setMorganLabels (int anOrder)
void setIntDescriptor (string aLabel, string aMolecule, int aValue)
void setActivity (string aMolecule, float aValue)
void setKashimaKernelParam (double aPq, int aConvergenceCondition, bool skipSkeleton=false)
void setComparisonSet (MoleculeSet *)
void setMorganChargesLabels (double threshold)
MoleculeSet manipulation functions
void selectAll ()
void unSelectAll ()
int select (vector< string > *aSubset)
int unSelect (vector< string > *aSubset)
long selectByFloatDescriptor (string aName, float aValue)
long selectByIntDescriptor (string aName, int aValue)
long selectByActivity (float aValue)
long selectHasActivity ()
int selectByMW (float minmw, float maxmw=-1, bool addMolecularDescriptor=false)
int selectByNumAtoms (float minNumAtoms, float maxNumAtoms=-1, bool addMolecularDescriptor=false)
void sortByDescriptor (string aDescriptorName, int aDescriptorType, bool reverse=false)
void sortByDescriptor (string aDescriptorName, bool reverse=false)
void sortByMW ()
void sortByNumAtoms ()
void binClassifyFromDescriptor (string descriptorName, float value, bool smallerOrEqual=true)
MoleculefindFirstMoleculeWithName (string aName) throw ( CError )
void removeDuplicates ()
void deleteHiddenAtoms ()
void hideHydrogens ()
void hideSalts (string aReportFileName="")
void restoreHiddenAtoms ()
void addFragmentsToSet (Molecule *aMol, int minAtoms=1)
void pushFragments (Molecule *aMol, int minAtoms=1)
double diversityBaryMean ()
vector< string > atomsLabelsListing ()
vector< string > atomsSymbolsListing ()
vector< int > bondsListing ()
void noTottersTransform ()
void threeDtransform (int nBins, double distMin, double distMax)
void minMaxDistances (double *distMin, double *distMax)
Kernels and Gram matrices related functions
void gramCompute (double aPq, double(*pt2GraphKernel)(Molecule *mol1, Molecule *mol2, double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(Bond *, Bond *), int, int), double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(Bond *, Bond *), int aParameter=1000, string aReportFileName="", int nbThreadsWanted=1, bool silentMode=false, bool filterTotters=false)
void gramCompute (MoleculeSet *anotherSet, double aPq, double(*pt2GraphKernel)(Molecule *mol1, Molecule *mol2, double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(Bond *, Bond *), int, int), double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(Bond *, Bond *), int aParameter=1000, string aReportFileName="", int nbThreadsWanted=1, bool silentMode=false, bool filterTotters=false)
void gramCompute (double aPq, double(*pt2GraphKernel)(Molecule *mol1, Molecule *mol2, double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(Bond *, Bond *), int, int), double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(Bond *, Bond *), int parameter1=1000, int parameter2=1, string aReportFileName="", int nbThreadsWanted=1, bool silentMode=false, bool filterTotters=false)
void gramCompute (MoleculeSet *anotherSet, double aPq, double(*pt2GraphKernel)(Molecule *mol1, Molecule *mol2, double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(Bond *, Bond *), int, int), double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(Bond *, Bond *), int parameter1=1000, int parameter2=1, string aReportFileName="", int nbThreadsWanted=1, bool silentMode=false, bool filterTotters=false)
void gramCompute3D (double(*pt2GraphKernel)(Molecule *mol1, Molecule *mol2, double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(float, float, float), float), double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(float, float, float), float edgeKernelparameter, bool silentMode)
void gramCompute3D (MoleculeSet *anotherSet, double(*pt2GraphKernel)(Molecule *mol1, Molecule *mol2, double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(float, float, float), float), double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(float, float, float), float edgeKernelparameter, bool silentMode)
void kernelCompute (Molecule *aMol, double(*pt2GraphKernel)(Molecule *mol1, Molecule *mol2, double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(Bond *, Bond *), int, int), double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(Bond *, Bond *), vector< double > *resultsRaw, vector< double > *resultsNormal, int convergenceCondition=1000, int parameter2=1, bool silentMode=false)
void resetGramMatrix ()
void resetSelfKernels ()
void initializeGram (double value)
void initializeSelfKernel (double value)
void normalizeGram ()
void normalizeGram_raw ()
void normalizeTanimoto ()
void normalizeTanimoto_raw ()
void normalizeTanimotoMinMax ()
void addToGram (int row, int col, double value)
void addToGramNormal (int row, int col, double value)
void substractToGram (int row, int col, double value)
double getGramValue (int row, int col)
Output functions
void writeActivityFile (string aFilename, bool addActivityExtension=true, string activityDescriptor=ACTIVITY)
void writeGramMatrix (string aFileName, bool normal=false, bool self=false, bool silentMode=false)
void writeSelfKernelList (string aFilename, bool silentMode=false)
void writeSD (string aFileName, bool selectedOnly=false)
void writeSubsetSD (string aFileName, vector< string > *anOrder)
void writeSubsetKCF (string aFileName, vector< string > *anOrder)
void writeKCF (string aFileName, bool selectedOnly=false)
string toString (bool selectedOnly=false)
string toStringShort ()
string toStringLong ()
void describe (bool selectedOnly=false)
void describeShort ()
void describeLong ()
void writeDescriptors (string aFileName, bool selectedOnly=false) throw ( CError )
long writeMolToDir (string aDirName, bool selectedOnly=false)
long writeDotsToDir (string aDirectory, bool selectedOnly=false, bool perretLabels=false)

Protected Member Functions

void setSortDescriptor (string aName, int aType, bool reverse=false) throw ( CError )
string getSortDescriptorName ()

Protected Attributes

MoleculeSetcomparisonSet
vector< vector< double > > * gram
vector< vector< double > > * gramNormal
bool gramCalculated
double pq
int convergenceCondition
int subsetStart
int subsetSize
bool activitySet


Detailed Description

Set of molecules on which virtual experiments may be performed

Author:
Dr Jean-Luc Perret (luc@kuicr.kyoto-u.ac.jp), Kyoto University, Japan
Version:
0.3
Date:
17 Jan 2004
CLASS NAME: MoleculeSet FOR: SNSF SPONSORED PROJECT PURPOSE: This class implements the notion of molecule set

It is a set in the mathematical way in the sense that no two molecule should have the same name. However a MoleculeSet can contain two identical graphs.

WARNING: No checks are made for throwing error in case there are two molecules with the same name, except when calling the [] operator.

Examples:

moleculeset_example.cpp.


Constructor & Destructor Documentation

MoleculeSet::MoleculeSet  ) 
 

class constructor.

MoleculeSet::~MoleculeSet  ) 
 

destructor for the MoleculeSet. deletes gram and gramNormal.


Member Function Documentation

int MoleculeSet::add MoleculeSet aSet  ) 
 

adds all molecules in aSet to the current set.

void MoleculeSet::addFragmentsToSet Molecule aMol,
int  minAtoms = 1
 

NOT DOCUMENTED

int MoleculeSet::addKCF string  aFileName,
long  beginMolecule = -1,
long  endMolecule = -1
 

reads an KCF file and returns the number of created molecules.

void MoleculeSet::addMolecule Molecule aMolecule  ) 
 

adds a molecule to the set. molecules added to the molecule set are not deleted when the set is deleted. use the deleteAllMolecule() function to do so (CAUTION if you reference these molecules from elsewhere).

Molecule* MoleculeSet::addMoleculeCopy Molecule aMolecule  ) 
 

adds a copy of the molecule in argument. used by add() to merge two datasets.

void MoleculeSet::addMutag string  aFileName,
string  rFileName = "",
uint  numMolToRead = 500
 

reads the Mutag dataset. Atoms and bonds are read from aFilename while the biological activity of the molecule is read from file rFilename. numMolToRead allows to specify the number of first molecules to read.

Examples:
moleculeset_example.cpp.

int MoleculeSet::addSD string  aFileName,
bool  genericAtomType = false,
long  beginMolecule = -1,
long  endMolecule = -1
 

adds an sd file content and returns the number of created molecules. if genericAtomType is true, then the atoms are not read from the periodic table, but are created based only on the label provided.

beginMolecule and endMolecule specify the index of the first and last molecule to include in the training set (starting count from 0). values of -1 (default) means no limit.

Molecule* MoleculeSet::addSingleKCF string  aMolFile  ) 
 

creates a new molecule in the dataset and reads its definition from a Kcf file.

Molecule* MoleculeSet::addSingleMOL string  aMolFile,
bool  genericAtomType = false
 

creates a new molecule in the dataset and reads its definition from a MDL MOL file.

void MoleculeSet::addToGram int  row,
int  col,
double  value
 

adds a value to a Gram matrix entry.

void MoleculeSet::addToGramNormal int  row,
int  col,
double  value
 

adds a value to a Gram normal matrix entry.

vector<string> MoleculeSet::atomsLabelsListing  ) 
 

lists the different kinds of atoms present in the set based on their Morgan labels.

vector<string> MoleculeSet::atomsSymbolsListing  ) 
 

lists the different kinds of atoms present in the set based on their symbols.

void MoleculeSet::binClassifyFromDescriptor string  descriptorName,
float  value,
bool  smallerOrEqual = true
 

sets the activity of molecules in the set according to the value of a descriptor and a value. if true is passed as third argument (default) then descriptorName <= value are considered positive. if false is passed as third argument then molecules with descriptorName >= value are considered positive. if a descriptor is missing then the molecule is left without activity.

vector<int> MoleculeSet::bondsListing  ) 
 

lists the different kinds of bonds present in the set.

void MoleculeSet::deleteAll  ) 
 

deletes all molecules in dataset (memory deallocation) and clears the vector containing the pointers.

void MoleculeSet::deleteHiddenAtoms  ) 
 

deletes all hidden atoms in all molecules of the set.

void MoleculeSet::describe bool  selectedOnly = false  ) 
 

writes a description of the moleculeSet to cout.

void MoleculeSet::describeLong  ) 
 

writes a long description of the moleculeSet to cout.

void MoleculeSet::describeShort  ) 
 

writes a short description of the moleculeSet to cout.

double MoleculeSet::diversityBaryMean  ) 
 

returns the mean distance of all molecules to the moleculeSet barycenter.

Molecule* MoleculeSet::findFirstMoleculeWithName string  aName  )  throw ( CError )
 

returns a pointer to the molecule with name aName in the MoleculeSet.

int MoleculeSet::getConvergenceCondition  )  [inline]
 

returns the convergence condition currently in use in the MoleculeSet for the calculation of random walk graph Kernel.

double MoleculeSet::getGramValue int  row,
int  col
 

returns a Gram matrix entry.

Molecule* MoleculeSet::getMolByIndex int  anInd  )  throw ( CError )
 

returns a pointer to the molecule of index anInd in the set.

Molecule* MoleculeSet::getMolByName string  aName  )  throw ( CError )
 

returns a pointer to the first molecule with name aName in the set. Throws a CError exception if no molecule with that name exists in the set. WARNING: if more than one molecule have the same name than a CError is also thrown.

long MoleculeSet::getPossibleValuesInIntDescriptor string  aDescriptorName,
vector< int > *  p
 

fills a vector of int with the values an int descriptor can take among all molecules and returns the number of such values.

double MoleculeSet::getPq  )  [inline]
 

returns the stop probability currently in use in the MoleculeSet for the calculation of random walk graph Kernel.

string MoleculeSet::getSortDescriptorName  )  [protected]
 

returns the name of the sorting descriptor.

void MoleculeSet::gramCompute MoleculeSet anotherSet,
double  aPq,
double(*)(Molecule *mol1, Molecule *mol2, double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(Bond *, Bond *), int, int)  pt2GraphKernel,
double(*)(Atom *, Atom *)  pt2AtomKernel,
double(*)(Bond *, Bond *)  pt2BondKernel,
int  parameter1 = 1000,
int  parameter2 = 1,
string  aReportFileName = "",
int  nbThreadsWanted = 1,
bool  silentMode = false,
bool  filterTotters = false
 

"TRUE" gramCompute() function, i.e., the one called by EVERY OTHER GRAMCOMPUTE FUNCTION.

void MoleculeSet::gramCompute double  aPq,
double(*)(Molecule *mol1, Molecule *mol2, double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(Bond *, Bond *), int, int)  pt2GraphKernel,
double(*)(Atom *, Atom *)  pt2AtomKernel,
double(*)(Bond *, Bond *)  pt2BondKernel,
int  parameter1 = 1000,
int  parameter2 = 1,
string  aReportFileName = "",
int  nbThreadsWanted = 1,
bool  silentMode = false,
bool  filterTotters = false
 

calculates the gram matrix of similarity using the marginalized graph kernel for all molecules in the MoleculeSet using a specified graph, atom and bond kernel. ff aReportFilename is specified a report is saved in that file.

void MoleculeSet::gramCompute MoleculeSet anotherSet,
double  aPq,
double(*)(Molecule *mol1, Molecule *mol2, double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(Bond *, Bond *), int, int)  pt2GraphKernel,
double(*)(Atom *, Atom *)  pt2AtomKernel,
double(*)(Bond *, Bond *)  pt2BondKernel,
int  aParameter = 1000,
string  aReportFileName = "",
int  nbThreadsWanted = 1,
bool  silentMode = false,
bool  filterTotters = false
 

calculates the gram matrix of similarity using the marginalized graph kernel for all molecules in the MoleculeSet using a specified graph, atom and bond kernel. ff aReportFilename is specified a report is saved in that file.

void MoleculeSet::gramCompute double  aPq,
double(*)(Molecule *mol1, Molecule *mol2, double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(Bond *, Bond *), int, int)  pt2GraphKernel,
double(*)(Atom *, Atom *)  pt2AtomKernel,
double(*)(Bond *, Bond *)  pt2BondKernel,
int  aParameter = 1000,
string  aReportFileName = "",
int  nbThreadsWanted = 1,
bool  silentMode = false,
bool  filterTotters = false
 

calculates the gram matrix of similarity using the marginalized graph kernel for all molecules in the MoleculeSet using a specified graph, atom and bond kernel. ff aReportFilename is specified a report is saved in that file.

void MoleculeSet::gramCompute3D MoleculeSet anotherSet,
double(*)(Molecule *mol1, Molecule *mol2, double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(float, float, float), float)  pt2GraphKernel,
double(*)(Atom *, Atom *)  pt2AtomKernel,
double(*)(float, float, float)  pt2BondKernel,
float  edgeKernelparameter,
bool  silentMode
 

"TRUE" gramCompute3D function, i.e., the one called by EVERY OTHER GRAMCOMPUTE3D FUNCTION.

void MoleculeSet::gramCompute3D double(*)(Molecule *mol1, Molecule *mol2, double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(float, float, float), float)  pt2GraphKernel,
double(*)(Atom *, Atom *)  pt2AtomKernel,
double(*)(float, float, float)  pt2BondKernel,
float  edgeKernelparameter,
bool  silentMode
 

3D kernel computation. NOTE : AtomKernel/BondKernel prototyes different from gramCompute.

bool MoleculeSet::hasActivity  )  [inline]
 

returns true if the activity of molecules in the set was set using readActivityFile. WARNING: no check is made to verify if all molecules were set.

void MoleculeSet::hideHydrogens  ) 
 

hides all hydrogens in all molecules of the set.

void MoleculeSet::hideSalts string  aReportFileName = ""  ) 
 

hides all but the largest connected graph in all molecules of the set.

void MoleculeSet::initializeGram double  value  ) 
 

initializes every element gram matrix to the given value.

void MoleculeSet::initializeSelfKernel double  value  ) 
 

initializes the self kernels to the given value.

void MoleculeSet::kernelCompute Molecule aMol,
double(*)(Molecule *mol1, Molecule *mol2, double(*pt2AtomKernel)(Atom *, Atom *), double(*pt2BondKernel)(Bond *, Bond *), int, int)  pt2GraphKernel,
double(*)(Atom *, Atom *)  pt2AtomKernel,
double(*)(Bond *, Bond *)  pt2BondKernel,
vector< double > *  resultsRaw,
vector< double > *  resultsNormal,
int  convergenceCondition = 1000,
int  parameter2 = 1,
bool  silentMode = false
 

computes all kernel values between a molecule aMol and all compounds in the set.

void MoleculeSet::minMaxDistances double *  distMin,
double *  distMax
 

computes the Euclidian distance between the XYZ coordinates of two atoms.

bool MoleculeSet::nameExists string  aName  ) 
 

returns true if a molecule with name aName exists in the set, false otherwise.

void MoleculeSet::normalizeGram  ) 
 

normalizes the gram matrix, i.e. compute gramNormal.

void MoleculeSet::normalizeGram_raw  ) 
 

normalizes the gram matrix, i.e. compute gramNormal. WARNING: NORMALIZATION BASED ON THE RAW GRAM MATRIX (instead of self-kernels)

void MoleculeSet::normalizeTanimoto  ) 
 

normalizes the Gram matrix according to the Tanimoto kernel definition (Ralaivola et al. 2005).

void MoleculeSet::normalizeTanimoto_raw  ) 
 

normalizes the Gram matrix according to the Tanimoto kernel definition, BASED ON THE RAW GRAM MATRIX

void MoleculeSet::normalizeTanimotoMinMax  ) 
 

normalizes the Gram matrix according to the 'min-max' Tanimoto kernel definition (Ralaivola et al. 2005).

void MoleculeSet::noTottersTransform  ) 
 

transforms the molecular graphs into graph preventing tottering paths (see (Mahe et al., 2004)).

uint MoleculeSet::numMolecules  )  [inline]
 

returns the number of molecules in the MoleculeSet.

Molecule* MoleculeSet::operator[] int  anInd  )  throw ( CError )
 

returns a pointer to the molecule of index anInd in the set.

Molecule* MoleculeSet::operator[] string  aName  )  throw ( CError )
 

returns a pointer to the first molecule with name aName in the set. Throws a CError exception if no molecule with that name exists in the set. WARNING: if more than one molecule have the same name than a CError is also thrown.

void MoleculeSet::pushFragments Molecule aMol,
int  minAtoms = 1
 

NOT DOCUMENTED

void MoleculeSet::readActivityFile string  aFileName  ) 
 

reads the activity of molecules from an activity file. expects a file containing : label [tab] class molname [tab] 1 for active molecules molname [tab] -1 for inactive molecules molname is the fileName of the Mol file.

void MoleculeSet::readDescriptorFile string  aFileName,
string  separator = ";"
 

reads a descriptor file. the first line in the file indicates the Descriptor comment text. the second line in the file indicates the Descriptor units. the third line in the file indicates the Descriptor types. the forth line indicates the Descriptor names. data start at fifth line.

the first column should contain the same molecule names as those obtained by getName() for the molecules in the dataset.

example:

Full name;Log octanol partition coefficient;boiling point;special comment NA;NA;K;NA string;float;float;string name;logP;Bp;acomment ethane;10;200;junk data

WARNING: does not check for duplicate name entries in the descriptor file. WARNING: does not add anything to the molecules not contained in the descriptor file.

void MoleculeSet::readGistActivityFile string  aFileName,
string  aDescriptor
 

reads the content of a gist activity file and adds the information to descriptor aDescriptor for each molecule matching molecule names.

void MoleculeSet::readGistClassifyFile string  aFileName  ) 
 

reads the content of a file produced by gist-classify and adds the information matching molecule names in the set as descriptors gistname, gistclass, and gistdiscr.

void MoleculeSet::readGram string  aFileName,
vector< vector< double > > *  gram
 

reads a gram matrix matching the dataset. emits an error if the dimension of the read gram matrix read does not match the number of compounds in the dataset.

void MoleculeSet::readGramNormal string  aFileName  ) 
 

reads a normalized gram matrix matching the dataset.

void MoleculeSet::readGramRaw string  aFileName  ) 
 

reads a raw gram matrix matching the dataset.

void MoleculeSet::readKcfDirectory string  dataDir,
long  beginMolecule = -1,
long  endMolecule = -1
 

loads all .kcf files in a directory and adds the molecules to the set.

void MoleculeSet::readMolDirectory string  aPath,
bool  genericAtomType = false,
long  beginMolecule = -1,
long  endMolecule = -1
 

loads all .mol files in a directory and adds the molecules to the set.

void MoleculeSet::readPartialCharges string  fileName  ) 
 

reads the partial charges associated to the molecule set from an input file. NOTE : the input file has one line per molecule of the molecule set, and within each line, the values of the partial charges are separated by ';'.

void MoleculeSet::removeDuplicates  ) 
 

removes duplicate entries in the set.

void MoleculeSet::resetGramMatrix  ) 
 

erases the Gram Matrix and sets the gramCalculated flag to false.

void MoleculeSet::resetSelfKernels  ) 
 

resets all calculated selfkernels for the molecules in the dataset.

void MoleculeSet::restoreHiddenAtoms  ) 
 

restores the hidden atoms for all molecules.

int MoleculeSet::select vector< string > *  aSubset  ) 
 

selects the molecules with the names provided as arguments in a vector of string. unselect all others. returns the number of selected molecules.

void MoleculeSet::selectAll  ) 
 

selects all molecules in the dataset. WARNING the selection status is stored in the Molecule class. Therefore molecules in other datasets will be selected too...

long MoleculeSet::selectByActivity float  aValue  ) 
 

selects all molecules having activity equal to aValue.

long MoleculeSet::selectByFloatDescriptor string  aName,
float  aValue
 

selects all molecules having float descriptor aName with value aValue, unselects all others.

long MoleculeSet::selectByIntDescriptor string  aName,
int  aValue
 

selects all molecules having int descriptor aName with value aValue, unselects all others.

int MoleculeSet::selectByMW float  minmw,
float  maxmw = -1,
bool  addMolecularDescriptor = false
 

selects all molecules in the dataset with mw >= minmw and <= maxmw. if maxmw = -1 then there is no maximum limit. if addMolecularDescriptor is true then a floatDescriptor with label mw is added to the molecule.

int MoleculeSet::selectByNumAtoms float  minNumAtoms,
float  maxNumAtoms = -1,
bool  addMolecularDescriptor = false
 

selects all molecules in the dataset with number of atoms >= numAtoms and <= numAtoms. if maxNumAtoms = -1 then there is no maximum limit. if addMolecularDescriptor is true then a intDescriptor with label numAtoms is added to the molecule.

long MoleculeSet::selectHasActivity  ) 
 

selects all molecules which have a defined activity status.

void MoleculeSet::setActivity string  aMolecule,
float  aValue
 

sets the avtivity of aMolecule to aValue.

void MoleculeSet::setComparisonSet MoleculeSet  ) 
 

sets the comparison set of the moleculeSet.

void MoleculeSet::setIntDescriptor string  aLabel,
string  aMolecule,
int  aValue
 

sets the value of intDescriptor aLabel of molecule aLabel to aValue.

void MoleculeSet::setIntDescriptor string  aName,
int  aValue
 

sets integer aName to aValue for all compounds in the dataset.

void MoleculeSet::setKashimaKernelParam double  aPq,
int  aConvergenceCondition,
bool  skipSkeleton = false
 

sets the start, stop (aPq) and transition probabilities. sets the start, stop (aPq) and transition probabilities for all molecules according to the article by Kashima et al. using the setKashimaKernelProb(aFloat) function of the Molecule class. WARNING: a call to this function erases the gram Matrix and it will be recalculated.

void MoleculeSet::setMorganChargesLabels double  threshold  ) 
 

sets the 'morgan charges' labels of the atoms, i.e., the concatenation of the Morgan labels of the atoms and the (+/-) sign of their partial charges.

void MoleculeSet::setMorganLabels int  anOrder  ) 
 

sets the morganLabels of each molecule to the anOrder iteration of the Morgan index calculation process.

void MoleculeSet::setSortDescriptor string  aName,
int  aType,
bool  reverse = false
throw ( CError ) [protected]
 

sets the type and name of descriptor to be used when sorting molecules. if reverse == true the sorting will be in descending order.

void MoleculeSet::setUniqueMorganIndices  ) 
 

sets the uniqueMorganIndex of each atom to the Morgan index having the maximum of different connectivity values for the molecule.

void MoleculeSet::sortByDescriptor string  aDescriptorName,
bool  reverse = false
 

sorts the molecule collection according to the molecule Descriptor descriptorName. descriptor type will be read from descriptor name if name is of type .integer, or *******.float, set to string otherwise. if reverse = true then the sorting is in decreasing order.

void MoleculeSet::sortByDescriptor string  aDescriptorName,
int  aDescriptorType,
bool  reverse = false
 

sorts the molecule collection according to the molecule Descriptor descriptorName of type descriptorType. if reverse = true then the sorting is in decreasing order.

void MoleculeSet::sortByMW  ) 
 

sorts all compounds in the set by Molecular weight.

void MoleculeSet::sortByNumAtoms  ) 
 

sorts all compounds in the set by their number of atoms.

void MoleculeSet::substractToGram int  row,
int  col,
double  value
 

substracts a value to a Grammatrix entry.

void MoleculeSet::threeDtransform int  nBins,
double  distMin,
double  distMax
 

transforms the molecular graphs into '3D complete graphs' with edges labeled by inter atomic distances in order to compute the pharmacophore kernel (see (Mahe et al., 2006)).

string MoleculeSet::toString bool  selectedOnly = false  ) 
 

returns a string description of the MoleculeSet.

string MoleculeSet::toStringLong  ) 
 

returns a long string description of the MoleculeSet.

string MoleculeSet::toStringShort  ) 
 

returns a short string description of the MoleculeSet (number of molecules in the set and short description of each molecule).

int MoleculeSet::unSelect vector< string > *  aSubset  ) 
 

unselects the molecules with the names provided as arguments in a vector of string. returns the number of selected molecules.

void MoleculeSet::unSelectAll  ) 
 

unselects all molecules in the dataset. WARNING the selection status is stored in the Molecule class. Therefore molecules in other datasets will be unselected too...

void MoleculeSet::writeActivityFile string  aFilename,
bool  addActivityExtension = true,
string  activityDescriptor = ACTIVITY
 

writes a file with the biological activity of molecules in a format compatible with GIST.

void MoleculeSet::writeDescriptors string  aFileName,
bool  selectedOnly = false
throw ( CError )
 

writes a ';' separated file containing all descriptors for all molecules.

long MoleculeSet::writeDotsToDir string  aDirectory,
bool  selectedOnly = false,
bool  perretLabels = false
 

writes a dot file for all molecule in the set to aDirectory.

void MoleculeSet::writeGramMatrix string  aFileName,
bool  normal = false,
bool  self = false,
bool  silentMode = false
 

writes the gram matrix.

Examples:
moleculeset_example.cpp.

void MoleculeSet::writeKCF string  aFileName,
bool  selectedOnly = false
 

writes a KCF file with the whole moleculeSet.

long MoleculeSet::writeMolToDir string  aDirName,
bool  selectedOnly = false
 

writes a mol file for each molecule in the set to aDirName. if selectedOnly == true then only selected molecules are written.

void MoleculeSet::writeSD string  aFileName,
bool  selectedOnly = false
 

writes a MDL structure data (SD) file with the molecules in the moleculeSet setting selectedOnly to true outputs only the selectwed compounds. a pointer to a vector of strings containing the molecule names for ordered output can be specified.

void MoleculeSet::writeSelfKernelList string  aFilename,
bool  silentMode = false
 

writes all self kernel values in a file.

void MoleculeSet::writeSubsetKCF string  aFileName,
vector< string > *  anOrder
 

writes a KCF file with the molecules in the moleculeSet matching the names given as argument.

void MoleculeSet::writeSubsetSD string  aFileName,
vector< string > *  anOrder
 

writes a MDL structure data (SD) file with the molecules in the moleculeSet matching the names given as argument.


Member Data Documentation

bool MoleculeSet::activitySet [protected]
 

stores if the activity of the molecules in the set was specified.

MoleculeSet* MoleculeSet::comparisonSet [protected]
 

comparison set of the molecule set. In test set mode, the comparison set is another set of compounds. In self set mode, the comparison set is the molecule set itself.

int MoleculeSet::convergenceCondition [protected]
 

convergence condition used in the calculation of the Kashima Kernel.

vector< vector<double> >* MoleculeSet::gram [protected]
 

Gram matrix.

bool MoleculeSet::gramCalculated [protected]
 

contains true if the gram matrix was evaluated for the current MoleculeSet, false otherwise. note that setKashimaKernelProb sets this flag to false. {Addition of a new molecule to the set when the flag is set to true induces the calculation of a new line to the gram matrix DEPRECATED}

vector< vector<double> >* MoleculeSet::gramNormal [protected]
 

normal Gram matrix.

double MoleculeSet::pq [protected]
 

kashima Stop probability for the moleculeSet used to set kashimaProb in added molecules.

int MoleculeSet::subsetSize [protected]
 

subset size.

int MoleculeSet::subsetStart [protected]
 

subset start.


The documentation for this class was generated from the following file:
Generated on Wed Nov 28 12:12:52 2007 for ChemCpp by  doxygen 1.4.6