Upload New File

fe1a622d · CitrangoloDestro Fabiola · b68ee131 · fe1a622d
Commit fe1a622d authored 1 year ago by CitrangoloDestro Fabiola
--- a/correctionfactor.py
+++ b/correctionfactor.py
+ # -*- coding: utf-8 -*-
+"""
+Created on Fri Mar 15 11:06:06 2024
+
+"""
+
+from rdkit import Chem
+from rdkit.Chem import rdmolfiles
+from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers, StereoEnumerationOptions,GetStereoisomerCount
+from rdkit.Chem.rdMolDescriptors import CalcNumAtomStereoCenters
+from rdkit.Chem import AllChem
+from _kinetic import iso_alkyl_C3,iso_alkyl_C4,iso_alkyl_C5,iso_alkyl_C6
+import subprocess as sub
+import copy
+from anytree import Node, RenderTree, PreOrderIter
+import ast,copy
+import os
+
+def matchreaction(Rsmiles,Psmiles):
+    """
+    Parameters
+    ----------
+    Rsmiles : TYPE str
+        DESCRIPTION. Smiles of the reactant species, it can be canonical or not
+    Psmiles : TYPE str
+        DESCRIPTION. Smiles of the product species, it can be canonical or not
+
+    Returns
+    -------
+    Arrhenius : TYPE list
+        DESCRIPTION. list of dictionaries containing the reaction TMTS model with the 
+        three Arrhenius paramters for the reaction and the statistical factor,
+        as the reaction may happen by different pathways
+        /!\ if no pathway is identified, then a None will be returned
+    """
+    pathways,cs= cyclesize(Rsmiles,Psmiles)
+    #check if it was possible to find a pathway and a cyclesize:
+    if len(cs)==0 or len(pathways)==0:
+        return None
+    Arrhenius=[]
+    for i in range(0,len(cs)):
+        cycle=cs[i]
+       # print(cycle)
+        path=pathways[i]
+        TMTSmodel,match= searchmatch(Rsmiles,Psmiles,cycle)
+        #print(TMTSmodel)
+        if match==1:
+            Arrhenius.append(TMTSmodel)
+        else:
+            #here, we need to call the code for calculating the correction factor
+            #for the statistical factor
+            Arrhenius.append(refiningarrhenius(TMTSmodel,Rsmiles,Psmiles,path,cycle))
+        
+    return Arrhenius
+
+
+def refiningarrhenius(TMTSmodel,Rsmiles,Psmiles,path,cycle):
+    """
+
+    Parameters
+    ----------
+    TMTSmodel : TYPE dic
+        DESCRIPTION. dictionary cointaining the reaction description as key 
+        (format "Rmiles>>Psmiles") and the list of Arrhenius parameters as 
+        descriptions [A (s-1),n, Ea(cal/mol) and Lc]
+    Rsmiles : TYPE str
+        DESCRIPTION. Smiles of the reactant species, it can be canonical or not
+    Psmiles : TYPE str
+        DESCRIPTION.Smiles of the reactant species, it can be canonical or not
+
+    Returns
+    -------
+    TMTSmodel : TYPE. dic
+    DESCRIPTION. corrected with a list of Arrhenius parameters after the correction factor is applied
+
+    """
+    print(TMTSmodel )
+    for model in TMTSmodel:
+        Lcmodel=TMTSmodel[model][-1]
+        fc=1/float(Lcmodel)
+        Rsmiles=Rsmiles.replace("@","")
+        Rmol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Rsmiles)))
+        nisoR=CalcNumAtomStereoCenters(Rmol)+1
+        opts = StereoEnumerationOptions(unique=True)
+        isomers = tuple(EnumerateStereoisomers(Rmol, options=opts))
+        isomerssmi=[]
+        for smi in sorted(Chem.MolToSmiles(x,isomericSmiles=True) for x in isomers):
+            isomerssmi.append(smi)
+        isomerssmi=cleanisomers(isomerssmi)
+        NR=len(isomerssmi)
+        isomersextsym=[]
+        for isomer in isomerssmi:
+            moliso=Chem.MolFromSmiles(isomer)
+            m3= Chem.AddHs(moliso)
+            AllChem.EmbedMolecule(m3) 
+            AllChem.MMFFOptimizeMolecule(m3)
+            rdmolfiles.MolToPDBFile(m3,"C:/pwt/sym.pdb",flavor=16)
+            file = open("C:/pwt/sym.pdb","w+")
+            textinput = file.read()
+            textinputcorrige="COMPND    C:/pwt/base.xyz\nAUTHOR    GENERATED BY OPEN BABEL 3.1.0\n"+textinput
+            file.write(textinputcorrige)
+            callplatonsym()
+            extsymiso=tradsym(foundsym("C:/pwt/sym.lis"))
+            isomersextsym.append(copy.deepcopy(extsymiso))
+        if len(isomersextsym)==1:
+            sym=isomersextsym[0]
+            LR=[sym/nisoR]
+        else:
+            LR=[sym/2 for sym in isomersextsym]
+        # now we will create a dictionary of TS based on the structures:
+        TS=TSdic(path,Rmol)
+        subsTS=[TS[i] for i in TS]
+        uniquecombofsubs=createTS(subsTS)
+        NTS=len(uniquecombofsubs)
+        LTSs=[]
+        #print(uniquecombofsubs)
+        for ts in uniquecombofsubs:
+            nisoTS=2
+            symTS=1
+            #verifyif nisoTS=1
+            if cycle <=4 or cycle ==6:
+                if samesubextremes(ts)==True:
+                    nisoTS=1
+            invinvTS=middleCinversion(planinversion(ts))
+            if invinvTS==ts:
+                symTS=2
+            LTS=nisoTS/symTS
+            LTSs.append(copy.deepcopy(LTS))
+        print(LR)
+        print(LTSs)
+        Lmodel=[]
+        i=NR
+        for LTS in LTSs:
+            j=i%NR
+            Lmodeli=LTS*LR[j]
+            i+=1
+            Lmodel.append(copy.deepcopy(Lmodeli))
+        Lmodelfinal=sum(Lmodel)/NR
+        fc=fc*Lmodelfinal
+        print(fc)
+        TMTSmodel[model]=[str(fc*float(TMTSmodel[model][0])),TMTSmodel[model][1],TMTSmodel[model][2],str(Lmodelfinal)]
+        
+    return TMTSmodel
+def symfromsmi(smiles):
+    """
+    
+
+    Parameters
+    ----------
+    smiles : TYPE str
+        DESCRIPTION. smiles of the molecule
+
+    Returns
+    -------
+    sym: TYPE float
+        DESCRIPTION. integer with the symmetry value
+
+    """
+    moliso=Chem.MolFromSmiles(smiles)
+    m3= Chem.AddHs(m2)
+    AllChem.EmbedMolecule(m3) 
+    AllChem.MMFFOptimizeMolecule(m3)
+    rdmolfiles.MolToPDBFile(m3,"C:/pwt/sym.pdb",flavor=16)
+    file = open("C:/pwt/sym.pdb","r")
+    textinput = file.read()
+    file.close()
+    file2=open("C:/pwt/sym.pdb","w")
+    textinputcorrige="COMPND    C:/pwt/base.xyz\nAUTHOR    GENERATED BY OPEN BABEL 3.1.0\n"+textinput
+    textinputcorrige=textinputcorrige.replace(" H "," F ")
+    file2.write(textinputcorrige)
+    file2.close()
+    callplatonsym()
+    symiso=tradsym(foundsym("C:/pwt/sym.lis"))
+    
+def callplatonsym():
+    newpath = "C:\pwt"
+    os.chdir(newpath)
+    proc = sub.Popen(["platon", "-o", "sym.pdb\n"],
+                     stdin=sub.PIPE, stdout=sub.PIPE, shell=True, text=True)
+    output2, errors = proc.communicate(input="NONSYM\nexit\n")           
+
+def foundsym(filename):
+    symmetry = "C1"
+    symfile = open(filename, "r")
+    text = symfile.read()
+    lines = text.splitlines()
+    for i in range(0, len(lines)):
+        if lines[i][0:6] == "Resd #":
+            data = lines[i+2].split()
+            tol = float(data[9])
+            if tol == 0.1:
+                symmetry = str(data[6])
+    symfile.close()
+    return symmetry
+
+def tradsym(symmetry):
+    extsym = 1
+    if symmetry == "C1" or symmetry == "Cs":
+        extsym = 1
+    elif symmetry == "C2" or symmetry == "C2v":
+        extsym = 2
+    elif symmetry == "C3v":
+        extsym = 3
+    elif symmetry == "D2h":
+        extsym = 4
+    elif symmetry == "D3h" or symmetry == "D3d":
+        extsym = 6
+    elif symmetry == "D5h":
+        extsym = 10
+    elif symmetry == "Td":
+        extsym = 12
+    elif symmetry == "Oh":
+        extsym = 24
+    return extsym
+        
+def cleanisomers(listofiso):
+    """
+    
+
+    Parameters
+    ----------
+    listofiso : TYPE list
+        DESCRIPTION. list of smiles for isomers, chiral centers indicated with @ or @@
+
+    Returns
+    -------
+    listofisoclean : TYPE list
+        DESCRIPTION. list of smiles for isomers, chiral centers indicated with @ or @@, no
+                    double structure
+    """
+    already=[]
+    cleaniso=[]
+    for iso in listofiso:
+        if iso not in already:
+            cleaniso.append(iso)
+            already.append(iso)
+            already.append(inverseiso(iso))
+    return cleaniso
+
+def inverseiso(isomer):
+    """
+    
+
+    Parameters
+    ----------
+    isomer : TYPE str
+        DESCRIPTION.
+
+    Returns
+    -------
+    isomer : TYPE str
+        DESCRIPTION. @ and @@ inverted
+
+    """
+    isomer=isomer.replace("@@","XXX")
+    isomer=isomer.replace("@","XO")
+    isomer=isomer.replace("XXX","@")
+    isomer=isomer.replace("XO","@@")
+    
+    return isomer
+
+def searchmatch(Rsmi,Psmi,C):
+    """
+    
+    Parameters
+    ----------
+    Rsmi : TYPE str
+        DESCRIPTION. SMILES of reactant
+    Psmi : TYPE str
+        DESCRIPTION. SMILES of the product
+    C : TYPE integer
+        DESCRIPTION. size of the TScycle
+
+    Returns
+    -------
+     TMTSmodel : TYPE dic
+         DESCRIPTION. dictionary cointaining the reaction description as key 
+         (format "Rmiles>>Psmiles") and the list of Arrhenius parameters as 
+         descriptions [A (s-1),n, Ea(cal/mol) and Lc]
+         /!\ Return None if the reaction is not covered by ouyr database
+    """
+    Rmol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Rsmi)))
+    natomR=Rmol.GetNumAtoms()
+    Pmol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Psmi)))
+    natomP=Pmol.GetNumAtoms()
+    if C==3:
+        kinetic=iso_alkyl_C3
+    elif C==4:
+        kinetic=iso_alkyl_C4
+    elif C==5:
+        kinetic=iso_alkyl_C5
+    elif C==6:
+        kinetic=iso_alkyl_C6
+    else:
+        return None,0
+    nmatch=0
+    TMTSmodel={}
+    wincase=""
+    match=0
+    for case in kinetic:
+        [Rmodel,Pmodel]=case.split(">>")
+        RmodelMol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Rmodel)))
+        PmodelMol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Pmodel)))
+        ntestR=len(Rmol.GetSubstructMatch(RmodelMol))
+        ntestP=len(Pmol.GetSubstructMatch(PmodelMol))
+        if ntestR==ntestP:
+            if ntestR>nmatch:
+                match=ntestR/natomR
+                nmatch=ntestR
+                wincase=case
+    if wincase not in TMTSmodel:
+        TMTSmodel[wincase]=kinetic[wincase]
+        return TMTSmodel,match
+    else:
+        return None,0
+
+def cyclesize(Rsmiles,Psmiles):
+    """
+    Parameters
+    ----------
+    Rsmiles : TYPE str
+        DESCRIPTION. Smiles of the reactant species, it can be canonical or not
+    Psmiles : TYPE str
+        DESCRIPTION. Smiles of the product species, it can be canonical or not
+
+    Returns
+    -------
+    path : TYPE tuple
+        DESCRIPTION. tuple containing the indexes of atoms being part of the TS cycle
+    cyclesize : TYPE integer
+        DESCRIPTION. size of the TS cycle
+
+    """
+    Rsmilescanonical=Chem.MolToSmiles(Chem.MolFromSmiles(Rsmiles))
+    Psmilescanonical=Chem.MolToSmiles(Chem.MolFromSmiles(Psmiles))
+    RMol=Chem.MolFromSmiles(Rsmilescanonical)
+    PMol=Chem.MolFromSmiles(Psmilescanonical)
+    alkane=Chem.MolFromSmiles(equivalentalkane(Rsmilescanonical))
+    ridxR=radicalatomindex(Rsmilescanonical)
+    ridxP=radicalatomindex(Psmilescanonical)
+    path=[]
+    cyclesize=[]
+    rpositionsR,dicR=equivalencelist(Rsmilescanonical)
+    rpositionsP,dicP=equivalencelist(Psmilescanonical)
+    for atomR in rpositionsR:
+        for atomP in rpositionsP:
+            if atomP!=atomR:
+                path.append(Chem.rdmolops.GetShortestPath(alkane,atomR,atomP))
+    path=cleanpath(path,dicR)
+    cyclesize=[len(path[i])+1 for i in range(0,len(path))]
+    return path,cyclesize
+
+def cleanpath(path,dicR):
+    """
+    
+    Parameters
+    ----------
+    path : TYPE list
+        DESCRIPTION. list of tuples containing the atoms indexes of a isomerisation TS cycle
+    dicR : TYPE dictionary
+        DESCRIPTION. tom indexes (keys) with their respective cannonical classification
+
+    Returns
+    -------
+    path: TYPE list of tuples
+        DESCRIPTION. list of tuples containing the atoms indexes of a isomerisation TS cycle,
+        but this times cleaned from equivalent ones
+
+    """
+    cleanpath=[]
+    aux=[]
+    for p in path:
+        equivalent=[dicR[p[i]] for i in range(0,len(p))]
+        if equivalent not in aux:
+            cleanpath.append(p)
+            aux.append(equivalent)
+    return cleanpath
+
+def equivalencelist(radsmiles):
+    """
+
+    Parameters
+    ----------
+    radsmiles : str
+        DESCRIPTION. smiles of a radical alkyl
+
+    Returns
+    -------
+    alleq : list 
+        DESCRIPTION. atom indexes of the equivalent positions for the radical center
+    dic: dictionary
+        DESCRIPTION. atom indexes (keys) with their respective cannonical classification
+    """
+    
+    nonradsmiles=equivalentalkane(radsmiles)
+    Mol=Chem.MolFromSmiles(nonradsmiles)
+    radMol=Chem.MolFromSmiles(radsmiles)
+    listofequivalents=list(Chem.CanonicalRankAtoms(Mol, breakTies=False))
+    radicalidx=radicalatomindex(radsmiles)
+    canradical=listofequivalents[radicalidx]
+    alleq=[i for i in range(0,len(listofequivalents)) if listofequivalents[i]==canradical]
+    dic={}
+    for i in range(0,len(listofequivalents)):
+        dic[i]=listofequivalents[i] 
+    return alleq,dic 
+
+def radicalatomindex(smiles):
+    """
+
+    Parameters
+    ----------
+    smiles : string
+        DESCRIPTION. simplified molecular input line entry specification
+        it can be canonical or not
+        /!\ it must contain only ONE radical site, otherise the function will 
+            return None
+    Returns
+    -------
+    radical : integer
+        DESCRIPTION. index of the atom that contains the radical
+        if the species contain moe than 1 radicla site, it will return None
+    """
+    mol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(smiles)))
+    r=Chem.MolToSmiles(mol)
+    radical=[]
+    for atom in mol.GetAtoms():
+        n=atom.GetNumRadicalElectrons()
+        if n>0 :
+            radical.append(atom.GetIdx())
+    if len(radical)==1:
+        return radical[0]
+    else:
+        radical=None
+        return radical
+    
+def equivalentalkane(radicalsmiles):
+    """
+    
+    Parameters
+    ----------
+    radicalsmiles : str
+        DESCRIPTION.simplified molecular input line entry specification of a radical
+        species, it can be canonical or not
+
+    Returns
+    -------
+    newsmiles2 : str
+        DESCRIPTION. simplified molecular input line entry specification
+        of the equivalent non-radical species
+    
+    #important observation: as the Mol in RDkits are C++ objects,
+    # it is hard to create a copy of them in python,
+    # so I decide to create functions that take smiles all the time 
+    # it can be done with xyz coordinates or others, but smiles are 
+    # simpler to me
+    """
+    radicalmol=Chem.MolFromSmiles(radicalsmiles)
+    for atom in radicalmol.GetAtoms():
+        if atom.GetNumRadicalElectrons()!=0:
+            atom.SetNumRadicalElectrons(0) 
+    newsmiles=Chem.MolToSmiles(radicalmol)
+    return newsmiles
+
+def getnonradicalfrag(smiles):
+    """
+    
+
+    Parameters
+    ----------
+    smiles : TYPE str
+        DESCRIPTION. smiles containning a fragmented radical
+
+    Returns
+    -------
+    smi : TYPE str
+        DESCRIPTION. smiles of the non radical fragment
+
+    """
+    
+    listofsmiles=smiles.split(".")
+    for smi in listofsmiles:
+        if "[" not in smi and "]" not in smi:
+            return smi
+
+def TSdic(path,R1):
+    """
+    
+    Parameters
+    ----------
+    path : TYPE  tuple
+        DESCRIPTION. indexes of the atoms in the TS cycle
+    R1 : TYPE Mol object, from RDkit
+        DESCRIPTION. radical reactant
+
+    Returns
+    -------
+    TSdic : TYPE dic
+        DESCRIPTION. dictionary containing the substituents of each atom on the cycle, follow the format:
+            TSDic={ind:[0,0],id:["CCC",0]}..
+
+    """
+    TSdic={}
+    for a in path: 
+        bonds=[]
+        fragsa=[]
+        atom=R1.GetAtomWithIdx(a)
+        #print("atom ",a)
+        for x in atom.GetNeighbors():
+            if x.GetIdx() not in path:
+                #print("bonded to",x.GetIdx(),"with bond number", R1.GetBondBetweenAtoms(atom.GetIdx(),x.GetIdx()).GetIdx())
+                bonds.append(R1.GetBondBetweenAtoms(atom.GetIdx(),x.GetIdx()).GetIdx())
+        if len(bonds)>0:
+            frag=Chem.FragmentOnSomeBonds(R1,tuple(bonds),addDummies=False)
+            for chem in frag:
+                chemfrag=Chem.MolToSmiles(chem)
+                newchemfrag=getnonradicalfrag(chemfrag)
+                fragsa.append(copy.deepcopy(newchemfrag))
+        else:
+            fragsa=[0,0]
+        if len(fragsa)==2:
+            TSdic[a]=fragsa
+        else:
+            fragsa.append(0)
+            TSdic[a]=fragsa
+    return TSdic
+    
+def mol_with_atom_index(mol):
+    for atom in mol.GetAtoms():
+        atom.SetAtomMapNum(atom.GetIdx())
+    # for bond in mol.GetBonds():
+    #     bond.SetBondMapNum(bond.GetIdx())
+    return mol
+
+
+def creer_solutions(liste,pivot,arbre,longueur):
+    if pivot < longueur:
+        filsg = Node(liste[pivot],arbre)
+        filsd = Node(list(reversed(liste[pivot])),arbre,)
+        pivot += 1
+        creer_solutions(liste,pivot,filsg,longueur)
+        creer_solutions(liste,pivot,filsd,longueur)
+
+def planinversion(subslist):
+    newlist=[list(reversed(i)) for i in subslist]
+    return newlist
+
+def middleCinversion(sublist):
+    return list(reversed(sublist))
+
+def createTS(liste):
+    liste.insert(0,0)
+    pivot=1
+    root = Node(liste[0])
+    longueur=len(liste)
+    creer_solutions(liste, pivot, root, longueur)
+    cnodes = list(PreOrderIter(root, filter_=lambda node: node.is_leaf))
+    combinaisons = [str(i)[7:-2].split(i.separator) for i in cnodes]
+    combinaisons=[comb[1:] for comb in combinaisons]
+    c=[]
+    for comb in combinaisons:
+       comb=[ast.literal_eval(i) for i in comb]
+       c.append(copy.deepcopy(comb))
+    uniquesubs=[]
+    for comb in c:
+        if len(comb)<=3:
+            combinvplan=planinversion(comb)
+            combinvC=middleCinversion(comb)
+            doubleinv=planinversion(combinvC)
+            if (combinvplan not in uniquesubs) and (combinvC not in uniquesubs) and (comb not in uniquesubs) and (doubleinv not in uniquesubs):
+                uniquesubs.append(copy.deepcopy(comb))
+        elif len(comb)>=4:
+            combinvC=middleCinversion(comb)
+            if combinvC not in uniquesubs and (comb not in uniquesubs):
+                uniquesubs.append(copy.deepcopy(comb))
+    return uniquesubs
+
+def samesubextremes(subsTS):
+    """
+    
+
+    Parameters
+    ----------
+    subsTS : TYPE list
+        DESCRIPTION.
+
+    Returns
+    -------
+    same type Boolean
+    
+
+    """
+    p1=subsTS[0]
+    p2=subsTS[-1]
+    if p1==list(reversed(p1)) and p2==list(reversed(p2)):
+        return True
+    else:
+        return False
\ No newline at end of file