molecule.h

00001 /****************************************************************************************
00002                                           molecule.h 
00003                                         --------------
00004     copyright            : (C) 2006 Jean-Luc Perret - Pierre Mahé
00005     email                : jean-luc.perret@unine.ch - pierre.mahe@ensmp.fr
00006  ***************************************************************************************/
00007 
00008 /****************************************************************************************
00009  *                                                                                      *
00010  *      This program is free software; you can redistribute it and/or                   *
00011  *      modify it under the terms of the GNU Lesser General Public                      *
00012  *      License as published by the Free Software Foundation; either                    *
00013  *      version 2.1 of the License, or (at your option) any later version.              *
00014  *                                                                                      *
00015  *      This program is distributed in the hope that it will be useful,                 *
00016  *      but WITHOUT ANY WARRANTY; without even the implied warranty of                  *
00017  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU               *
00018  *      Lesser General Public License for more details.                                 *
00019  *                                                                                      *
00020  *      You should have received a copy of the GNU Lesser General Public                *
00021  *      License along with this library; if not, write to the Free Software             *
00022  *      Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA      *
00023  *                                                                                      *
00024  ****************************************************************************************/
00025 
00026 
00027 
00028 #ifndef MOLECULE_H
00029 #define MOLECULE_H
00030 
00031 #include <vector>
00032 #include <map>
00033 #include <sstream>
00034 #include <locale>
00035 #include <string>
00036 #include <algorithm>
00037 
00038 
00039 #include <datacontainer.h>
00040 #include <atom.h>
00041 #include <elements.h>
00042 #include <constant.h>
00043 
00044 //#include <kashimathreadstruct.h>
00045 
00046 extern Elements elements;
00047 
00048 
00076 class Molecule : public DataContainer  {
00077 
00082 public:
00083 
00085 
00086 
00089         Molecule();
00090 
00093         virtual ~Molecule();
00094 
00097         Molecule( Molecule& aMolecule , bool bool_resetMorganIndex = true );
00098 
00103         Molecule(
00104                                 Molecule& molecule1,
00105                                 Molecule& molecule2,
00106                                 double (*pt2AtomKernel)( Atom*, Atom* ),
00107                                 double (*pt2BondKernel)( Bond*, Bond* )
00108                                 );
00109 
00113         Molecule(
00114                         Molecule& m1,
00115                         Molecule& m2,
00116                         double (*pt2AtomKernel)( Atom*, Atom*),
00117                         double (*pt2BondKernel)( float, float, float ),
00118                         float edgeKernelParameter
00119                         );
00120 
00121 
00124         Molecule& operator=( const Molecule& aMolecule );
00125 
00133         virtual Atom* addAtom( string aSymbol, bool resetSSSR = true ) throw( CError );
00134 
00140         Atom* addAtom( Atom*, bool resetSSSR = true, bool resetMorganIndex = true ) throw( CError);
00141 
00142 
00147         Bond* linkAtoms( Atom* aSource, Atom* aTarget, int aBondLabel, int aBondStereo = 0,  int aBondNotUsed = 0, int aBondTopology = 0, int aBondReactionCenter = 0, bool resetSSSR = true );
00148 
00153         Bond* linkAtoms( string aSource, string aTarget, int aBondLabel, int aBondStereo = 0,  int aBondNotUsed = 0, int aBondTopology = 0, int aBondReactionCenter = 0, bool resetSSSR = true ) throw( CError );
00154 
00159         Bond* linkAtoms( int firstAtom, int secondAtom, int aBondLabel, int aBondStereo = 0,  int aBondNotUsed = 0, int aBondTopology = 0, int aBondReactionCenter = 0, bool resetSSSR = true ) throw( CError );
00160 
00161 
00171         void readMOL( string aFilename, bool genericAtomType = false ) throw(CError);
00172 
00175         void readMOLOld( string aFilename ) throw(CError);
00176 
00180         void erase();
00181         
00184         void eraseHiddenAtoms();
00185 
00188         void eraseRings();
00189 
00192         void eraseAdjacency();
00193         
00196         void eraseWalks();
00197 
00199 
00200 
00201 
00203 
00204 
00206         int getId() const { return( id ); }
00207 
00210         string getIdString();
00211 
00214         string getName(){ return( getStringDescriptor( "name" )->getValue() ); }
00215 
00218         void setName( string aName ) { setStringDescriptor( "name", aName, "", "", true, true ); }
00219 
00222         void setActivity( float aNumber ){ activity = aNumber; flagActivity = true; }
00223 
00226         void setActivity( string aNumber ){ activity = atof( aNumber.c_str() ); flagActivity = true; };
00227 
00230         float getActivity( bool silentMode = false ) throw( CError );
00231         
00234         float hasActivity(){ return( flagActivity ); }
00235 
00238         void unsetActivity(){ flagActivity = false; }
00239 
00240 
00243         vector<Atom*>& getAtoms(){ return( atoms ); }
00244 
00248         Atom* getAtom(int anId) throw( CError );
00249 
00250         Atom* getAtomByIndex(int ind);
00251 
00255         bool atomExists(Atom* anAtom);
00256 
00259         int numAtoms(){ return( atoms.size() ); }
00260 
00263         int numHiddenAtoms(){ return( hiddenAtoms.size() ); }
00264 
00265 
00268         int numBonds();
00271         int numHiddenBonds();
00272 
00275         long bondSum();
00276 
00279         void unsetBondFlags();
00282         void unsetBondFlagsOriginal();
00283 
00286         bool isChiral(){ return( chiral ); }
00287 
00290         void select(){ selectedFlag = true; }
00293         void unSelect(){ selectedFlag = false; }
00296         bool isSelected(){ return( selectedFlag ); }
00297 
00301         void setSortDescriptor( string aName, int aType );
00302 
00303 
00306         int getSortDescriptorType(){ return( sortDescriptorType ); }
00307 
00310         string getSortDescriptorName(){ return( sortDescriptorName ); }
00311 
00314         Ring* getRingWithID( int anID, bool createIfMissing) throw( CError );
00315 
00319         bool hasRing( Ring* newRing, bool detectingRing = false ) throw( CError );
00320 
00324         bool hasRing() throw( CError );
00325 
00328         int numRings() throw( CError ){
00329                 if( !hasSSSRDetected() ){
00330                         //detectSSSR();
00331                         CError e( SSSRNOTDETECTED, "Smallest Set of Smallest Rings was not detected before calling Molecule::numRings()" );
00332                         //e.describe();
00333                         throw(e);
00334                 }
00335                 return( sssr.size() );
00336         }
00337 
00338 
00341         bool hasSSSRDetected(){ return( flagHasSSSRDetected ); }
00342 
00345         float getMW( bool silentError = false ) throw( CError );
00346 
00349         int getNumAtoms(){
00350                 return( atoms.size() );
00351         }
00352 
00355         string getLocation(){ return( location ); }
00356 
00360         int getOriginalFormat(){ return( originalFormat ); }
00361         
00365         void setOriginalFormat( int a ){ originalFormat = a; }
00366 
00368 
00369 
00370         
00371 
00372 
00373 
00375 
00376 
00380         int hideAtomsByIntDescriptor( string aDescriptorName, int aValue, bool refreshBonds = true );
00381 
00385         void hideAtom( vector< Atom* >::iterator anAtomI );
00386 
00389         void hideAtomAndToFromBonds( vector< Atom* >::iterator anAtomI );
00390 
00393         void hideAtomAndToFromBonds( Atom* anAtom );
00394 
00399         int hideHydrogens();
00400 
00403         bool isHiddenAtom( Atom* anAtom );
00404 
00407         int restoreHiddenAtoms( bool flagRestoreBonds = true );
00408 
00411         int restoreHiddenBonds();
00412 
00417         int refreshBonds();
00418 
00421         void eraseAtom( Atom* anAtom ) throw( CError );
00422 
00425         void deleteBonds();
00426 
00429         void deleteHiddenAtoms();
00430 
00434         void setMorganLabels( int anOrder );
00435 
00439         void setPerretLabels();
00440 
00446         int setUniqueMorganIndices();
00447 
00450         void resetMorganIndex();
00451 
00454         int getNumberOfDistinctMorganIndices( int anOrder );
00455 
00459         int getMaxMorganIteration();
00460 
00461 
00465         int getNumCarbonsOfComponent( string aDescriptorName, int aValue );
00466 
00470         int getNumNitrogensOfComponent( string aDescriptorName, int aValue );
00471 
00474         int numAtomsNonCSkeleton();     
00475 
00478         float atomicDistance(Atom* atom1, Atom* atom2);
00479 
00484         void compute();
00485 
00491         void binClassifyFromDescriptor( string descriptorName, float value, bool smallerOrEqual = true );
00492 
00493         
00496         void atomsLabelsListing( vector<string>* );
00497 
00500         void atomsSymbolsListing( vector<string>* );
00501 
00504         void bondsListing( vector<int>* );
00505 
00508         void noTottersTransform();
00509 
00512         void threeDtransform(int nBins, double distMin, double distMax);
00513 
00516         void readPartialCharges(string charges);
00517         
00520         void setMorganChargesLabels(double threshold);
00521 
00527         int hideSalts( stringstream* out );
00528 
00531         int markFragments();
00532 
00535         void unmarkFragments();
00536 
00539         void hideAllFragmentsBut( int aFragmentNumber );
00540 
00543         void writeFragments( ofstream* outStream );
00544 
00549         int DFS( Atom* startAtom, string intDescriptorName, int markValue );
00550 
00557         int detectSSSR();
00558 
00561         void resetSSSR(){ flagHasSSSRDetected = false; }
00562 
00565         void setHasSSSR(){ flagHasSSSRDetected = true; }
00566 
00570         Bond* checkEdges( Atom* );
00571 
00572 
00574 
00575 
00576 
00577 
00578 
00580 
00581 
00584         vector<Atom*>::iterator beginAtom(){ return( atoms.begin() ); }
00587         vector<Atom*>::iterator endAtom(){ return( atoms.end() ); }
00588 
00591         map<Atom*, Bond*>::iterator beginBond( Atom* anAtom ){ return( anAtom->beginBond() ); }
00594         map<Atom*, Bond*>::iterator endBond( Atom* anAtom ){ return( anAtom->endBond() ); }
00595 
00598         map<Atom*, Bond*>::iterator beginBond( int anId ){ return( getAtom( anId )->beginBond() ); }
00601         map<Atom*, Bond*>::iterator endBond( int anId ){ return( getAtom( anId )->endBond() ); }
00602 
00605         map<int, int>::iterator beginComponentSizes(){
00606                 return( componentSizes.begin() );
00607         }
00608 
00611         map<int, int>::iterator endComponentSizes(){
00612                 return( componentSizes.end() );
00613         }
00614 
00616 
00617 
00618 
00620 
00621 
00630         void setKashimaKernelProb( double aPq, bool skipSkeleton = false );
00631 
00634         double sumPT();
00635 
00640         double sumProbabilities();
00641         
00644         double sumProbabilitiesFast();
00645 
00650         double sumPQPS();
00651 
00654         double sumPQPSFast();
00655 
00656 
00663         void raisePowerFast();
00664 
00669         double computeKernel(
00670                                 Molecule* anotherMolecule,
00671                                 double (*pt2GraphKernel)(
00672                                         Molecule* mol1,
00673                                         Molecule* mol2,
00674                                         double (*pt2AtomKernel) ( Atom*, Atom* ),
00675                                         double (*pt2BondKernel)( Bond*, Bond* ),
00676                                         int parameter1, int parameter2
00677                                 ),
00678                                 double (*pt2AtomKernel)( Atom*, Atom* ),
00679                                 double (*pt2BondKernel)( Bond*, Bond* ),
00680                                 int parameter1, int parameter2 = 1 );
00681 
00682 
00685         double getSelfKernel( double(*pt2GraphKernel)( Molecule* mol1,
00686                 Molecule* mol2, double(*pt2AtomKernel)(Atom*, Atom*),
00687                 double(*pt2BondKernel)( Bond*, Bond* ), int, int ),
00688                 double(*pt2AtomKernel)( Atom*, Atom* ),
00689                 double(*pt2BondKernel)( Bond*, Bond* ) ,
00690                 int parameter1, int parameter2 = 1
00691                 ){
00692                         #ifdef DEBUG
00693                                 cout << "Molecule::getSelfKernel(...) " << endl;
00694                         #endif
00695 
00696                         if(selfKernelCalculated == false){
00697                                 #ifdef DEBUG
00698                                         cout << "  sk not computed, computing" << endl;
00699                                 #endif
00700                                 calculateSelfKernel( pt2GraphKernel, pt2AtomKernel, pt2BondKernel, parameter1, parameter2 );
00701                         }
00702                         #ifdef DEBUG
00703                                 cout << "  returning " << selfKernel << endl;
00704                         #endif
00705                         return( selfKernel );
00706                 }
00707 
00708 
00712         double calculateSelfKernel
00713         (
00714                                 double(*pt2GraphKernel)
00715                                 (
00716                                                 Molecule* mol1,
00717                                                 Molecule* mol2,
00718                                                 double(*pt2AtomKernel)(Atom*, Atom*),
00719                                                 double(*pt2BondKernel)(Bond*, Bond*),
00720                                                 int, int
00721                                 ),
00722                                 double(*pt2AtomKernel)( Atom*, Atom* ),
00723                                 double(*pt2BondKernel)( Bond*, Bond* ),
00724                                 int paramter1, int parameter2
00725         );
00726 
00727 
00730         double getSelfKernel() throw( CError );
00731 
00734         void setSelfKernel(double value);
00735 
00738         void addToSelfKernel( double );
00739         
00742         void substractToSelfKernel( double );
00743 
00746         void resetSelfKernel(){ selfKernelCalculated = false; }
00747 
00749 
00750 
00751 
00753 
00754 
00756         void setAdjacency(int i, int j, double value);
00757 
00760         double getAdjacency(int i, int j);
00761 
00764         void setWalks(int i, int j, double value);
00765 
00768         double getWalks(int i, int j);
00769 
00772         void raisePowerAdjacency();
00773         
00776         double traceWalks();
00777 
00780         double traceDiagWalks();
00781 
00782 
00784 
00785 
00786 
00788 
00789 
00793         string toString();
00794 
00798         string toStringShort();
00799 
00803         string toStringLong();
00804 
00805 
00809         void describe();
00810 
00814         void describeShort();
00815 
00820         void describeLong();
00821 
00825         void describeEachAtom();
00826 
00829         void writeMOL( string aFileName );
00830 
00834         void writeDOT( string aFilename, bool perretLabels = false );
00835 
00838         void writeSD( string aFileName );
00839 
00841 
00842 
00843 // ******************************* //
00844 // **** DEPRECATED FUNCTIONS **** //
00845 // ****************************** //
00846 
00847 
00850         //void saveAllBonds();
00851 
00852 
00853 
00854 
00855 
00867         //virtual float moleculeKernel( Molecule* anotherMolecule, float (*pt2AtomKernel)( Atom*, Atom* ), float (*pt2BondKernel)( Bond*, Bond* ), int convergenceCondition = 1000 );
00868 
00869 
00875         //virtual float powerKernelUntilN( Molecule* anotherMolecule, float (*pt2AtomKernel)( Atom*, Atom* ), int maxPower = 4 );
00876 
00883         //virtual float powerKernelConverge( Molecule* anotherMolecule, float (*pt2AtomKernel)( Atom*, Atom* ), int converg = 1000 );
00884 
00889         //virtual float powerKernelOrderN( Molecule* anotherMolecule, float (*pt2AtomKernel)( Atom*, Atom* ), int length );
00890 
00891 
00892 
00893 
00923         //  void* kashimaKernelThread(void* arg);
00924 
00925 
00926 
00927 
00931         //void raisePower();
00932 
00933 
00934 
00935 
00936 
00941         //void exportFragments( string sdFileName, int minAtoms = 1 );
00942 
00943 
00944 
00945 protected:
00946 
00949         void moleculeChanged( bool resetSSSR = true, bool resetMorganIndex = true );
00950 
00951 
00954         vector<Atom*> atoms;
00955 
00959         vector<Atom*> hiddenAtoms;
00960 
00961 
00964         vector<Ring*> sssr;
00965 
00968         bool flagHasSSSRDetected;
00969 
00972         static int counter;
00973 
00976         int id;
00977 
00980         bool selectedFlag;
00981 
00984         double selfKernel;
00985 
00988         bool selfKernelCalculated;
00989 
00992         bool chiral;
00993 
00996         bool flagActivity;
00997 
01003         int sortDescriptorType;
01004 
01007         string sortDescriptorName;
01008 
01012         int maxMorganIteration;
01013 
01014         
01015 
01016 private: // Private methods
01017 
01021         //void hideAllBonds();
01022 
01029         Bond* linkAtomsNoReturn( Atom* aSource, Atom* aTarget, int aBondLabel, int aBondStereo = 0,  int aBondNotUsed = 0, int aBondTopology = 0, int aBondReactionCenter = 0 );
01030 
01033         void linkAtomsNoReturn( int firstAtom, int secondAtom, int aBondLabel ) throw( CError );
01034 
01035 
01038         map<Atom*, map<Atom*, double>* >* fastPT;
01041         map<Atom*, map<Atom*, double>* >* fastPTNext;
01044         map<Atom*, map<Atom*, double>* >* fastPTSave;   
01047         map<Atom*, double> fastPQ;
01050         map<Atom*, double> fastPS;
01051 
01052 
01055         map< int, int > componentSizes;
01056 
01059         float activity;
01060 
01063         string location;        
01064         
01067         int originalFormat;
01068 
01071         vector< vector<double> >* adjacency;
01072         
01075         vector< vector<double> >* walks;
01076 
01077 };
01078 
01079 #endif

Generated on Wed Nov 28 12:12:51 2007 for ChemCpp by  doxygen 1.4.6