diff --git a/correctionfactor.py b/correctionfactor.py new file mode 100644 index 0000000000000000000000000000000000000000..0361e02437a4307fca5216f2642a00a1042dc43d --- /dev/null +++ b/correctionfactor.py @@ -0,0 +1,597 @@ + # -*- coding: utf-8 -*- +""" +Created on Fri Mar 15 11:06:06 2024 + +""" + +from rdkit import Chem +from rdkit.Chem import rdmolfiles +from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers, StereoEnumerationOptions,GetStereoisomerCount +from rdkit.Chem.rdMolDescriptors import CalcNumAtomStereoCenters +from rdkit.Chem import AllChem +from _kinetic import iso_alkyl_C3,iso_alkyl_C4,iso_alkyl_C5,iso_alkyl_C6 +import subprocess as sub +import copy +from anytree import Node, RenderTree, PreOrderIter +import ast,copy +import os + +def matchreaction(Rsmiles,Psmiles): + """ + Parameters + ---------- + Rsmiles : TYPE str + DESCRIPTION. Smiles of the reactant species, it can be canonical or not + Psmiles : TYPE str + DESCRIPTION. Smiles of the product species, it can be canonical or not + + Returns + ------- + Arrhenius : TYPE list + DESCRIPTION. list of dictionaries containing the reaction TMTS model with the + three Arrhenius paramters for the reaction and the statistical factor, + as the reaction may happen by different pathways + /!\ if no pathway is identified, then a None will be returned + """ + pathways,cs= cyclesize(Rsmiles,Psmiles) + #check if it was possible to find a pathway and a cyclesize: + if len(cs)==0 or len(pathways)==0: + return None + Arrhenius=[] + for i in range(0,len(cs)): + cycle=cs[i] + # print(cycle) + path=pathways[i] + TMTSmodel,match= searchmatch(Rsmiles,Psmiles,cycle) + #print(TMTSmodel) + if match==1: + Arrhenius.append(TMTSmodel) + else: + #here, we need to call the code for calculating the correction factor + #for the statistical factor + Arrhenius.append(refiningarrhenius(TMTSmodel,Rsmiles,Psmiles,path,cycle)) + + return Arrhenius + + +def refiningarrhenius(TMTSmodel,Rsmiles,Psmiles,path,cycle): + """ + + Parameters + ---------- + TMTSmodel : TYPE dic + DESCRIPTION. dictionary cointaining the reaction description as key + (format "Rmiles>>Psmiles") and the list of Arrhenius parameters as + descriptions [A (s-1),n, Ea(cal/mol) and Lc] + Rsmiles : TYPE str + DESCRIPTION. Smiles of the reactant species, it can be canonical or not + Psmiles : TYPE str + DESCRIPTION.Smiles of the reactant species, it can be canonical or not + + Returns + ------- + TMTSmodel : TYPE. dic + DESCRIPTION. corrected with a list of Arrhenius parameters after the correction factor is applied + + """ + print(TMTSmodel ) + for model in TMTSmodel: + Lcmodel=TMTSmodel[model][-1] + fc=1/float(Lcmodel) + Rsmiles=Rsmiles.replace("@","") + Rmol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Rsmiles))) + nisoR=CalcNumAtomStereoCenters(Rmol)+1 + opts = StereoEnumerationOptions(unique=True) + isomers = tuple(EnumerateStereoisomers(Rmol, options=opts)) + isomerssmi=[] + for smi in sorted(Chem.MolToSmiles(x,isomericSmiles=True) for x in isomers): + isomerssmi.append(smi) + isomerssmi=cleanisomers(isomerssmi) + NR=len(isomerssmi) + isomersextsym=[] + for isomer in isomerssmi: + moliso=Chem.MolFromSmiles(isomer) + m3= Chem.AddHs(moliso) + AllChem.EmbedMolecule(m3) + AllChem.MMFFOptimizeMolecule(m3) + rdmolfiles.MolToPDBFile(m3,"C:/pwt/sym.pdb",flavor=16) + file = open("C:/pwt/sym.pdb","w+") + textinput = file.read() + textinputcorrige="COMPND C:/pwt/base.xyz\nAUTHOR GENERATED BY OPEN BABEL 3.1.0\n"+textinput + file.write(textinputcorrige) + callplatonsym() + extsymiso=tradsym(foundsym("C:/pwt/sym.lis")) + isomersextsym.append(copy.deepcopy(extsymiso)) + if len(isomersextsym)==1: + sym=isomersextsym[0] + LR=[sym/nisoR] + else: + LR=[sym/2 for sym in isomersextsym] + # now we will create a dictionary of TS based on the structures: + TS=TSdic(path,Rmol) + subsTS=[TS[i] for i in TS] + uniquecombofsubs=createTS(subsTS) + NTS=len(uniquecombofsubs) + LTSs=[] + #print(uniquecombofsubs) + for ts in uniquecombofsubs: + nisoTS=2 + symTS=1 + #verifyif nisoTS=1 + if cycle <=4 or cycle ==6: + if samesubextremes(ts)==True: + nisoTS=1 + invinvTS=middleCinversion(planinversion(ts)) + if invinvTS==ts: + symTS=2 + LTS=nisoTS/symTS + LTSs.append(copy.deepcopy(LTS)) + print(LR) + print(LTSs) + Lmodel=[] + i=NR + for LTS in LTSs: + j=i%NR + Lmodeli=LTS*LR[j] + i+=1 + Lmodel.append(copy.deepcopy(Lmodeli)) + Lmodelfinal=sum(Lmodel)/NR + fc=fc*Lmodelfinal + print(fc) + TMTSmodel[model]=[str(fc*float(TMTSmodel[model][0])),TMTSmodel[model][1],TMTSmodel[model][2],str(Lmodelfinal)] + + return TMTSmodel +def symfromsmi(smiles): + """ + + + Parameters + ---------- + smiles : TYPE str + DESCRIPTION. smiles of the molecule + + Returns + ------- + sym: TYPE float + DESCRIPTION. integer with the symmetry value + + """ + moliso=Chem.MolFromSmiles(smiles) + m3= Chem.AddHs(m2) + AllChem.EmbedMolecule(m3) + AllChem.MMFFOptimizeMolecule(m3) + rdmolfiles.MolToPDBFile(m3,"C:/pwt/sym.pdb",flavor=16) + file = open("C:/pwt/sym.pdb","r") + textinput = file.read() + file.close() + file2=open("C:/pwt/sym.pdb","w") + textinputcorrige="COMPND C:/pwt/base.xyz\nAUTHOR GENERATED BY OPEN BABEL 3.1.0\n"+textinput + textinputcorrige=textinputcorrige.replace(" H "," F ") + file2.write(textinputcorrige) + file2.close() + callplatonsym() + symiso=tradsym(foundsym("C:/pwt/sym.lis")) + +def callplatonsym(): + newpath = "C:\pwt" + os.chdir(newpath) + proc = sub.Popen(["platon", "-o", "sym.pdb\n"], + stdin=sub.PIPE, stdout=sub.PIPE, shell=True, text=True) + output2, errors = proc.communicate(input="NONSYM\nexit\n") + +def foundsym(filename): + symmetry = "C1" + symfile = open(filename, "r") + text = symfile.read() + lines = text.splitlines() + for i in range(0, len(lines)): + if lines[i][0:6] == "Resd #": + data = lines[i+2].split() + tol = float(data[9]) + if tol == 0.1: + symmetry = str(data[6]) + symfile.close() + return symmetry + +def tradsym(symmetry): + extsym = 1 + if symmetry == "C1" or symmetry == "Cs": + extsym = 1 + elif symmetry == "C2" or symmetry == "C2v": + extsym = 2 + elif symmetry == "C3v": + extsym = 3 + elif symmetry == "D2h": + extsym = 4 + elif symmetry == "D3h" or symmetry == "D3d": + extsym = 6 + elif symmetry == "D5h": + extsym = 10 + elif symmetry == "Td": + extsym = 12 + elif symmetry == "Oh": + extsym = 24 + return extsym + +def cleanisomers(listofiso): + """ + + + Parameters + ---------- + listofiso : TYPE list + DESCRIPTION. list of smiles for isomers, chiral centers indicated with @ or @@ + + Returns + ------- + listofisoclean : TYPE list + DESCRIPTION. list of smiles for isomers, chiral centers indicated with @ or @@, no + double structure + """ + already=[] + cleaniso=[] + for iso in listofiso: + if iso not in already: + cleaniso.append(iso) + already.append(iso) + already.append(inverseiso(iso)) + return cleaniso + +def inverseiso(isomer): + """ + + + Parameters + ---------- + isomer : TYPE str + DESCRIPTION. + + Returns + ------- + isomer : TYPE str + DESCRIPTION. @ and @@ inverted + + """ + isomer=isomer.replace("@@","XXX") + isomer=isomer.replace("@","XO") + isomer=isomer.replace("XXX","@") + isomer=isomer.replace("XO","@@") + + return isomer + +def searchmatch(Rsmi,Psmi,C): + """ + + Parameters + ---------- + Rsmi : TYPE str + DESCRIPTION. SMILES of reactant + Psmi : TYPE str + DESCRIPTION. SMILES of the product + C : TYPE integer + DESCRIPTION. size of the TScycle + + Returns + ------- + TMTSmodel : TYPE dic + DESCRIPTION. dictionary cointaining the reaction description as key + (format "Rmiles>>Psmiles") and the list of Arrhenius parameters as + descriptions [A (s-1),n, Ea(cal/mol) and Lc] + /!\ Return None if the reaction is not covered by ouyr database + """ + Rmol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Rsmi))) + natomR=Rmol.GetNumAtoms() + Pmol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Psmi))) + natomP=Pmol.GetNumAtoms() + if C==3: + kinetic=iso_alkyl_C3 + elif C==4: + kinetic=iso_alkyl_C4 + elif C==5: + kinetic=iso_alkyl_C5 + elif C==6: + kinetic=iso_alkyl_C6 + else: + return None,0 + nmatch=0 + TMTSmodel={} + wincase="" + match=0 + for case in kinetic: + [Rmodel,Pmodel]=case.split(">>") + RmodelMol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Rmodel))) + PmodelMol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Pmodel))) + ntestR=len(Rmol.GetSubstructMatch(RmodelMol)) + ntestP=len(Pmol.GetSubstructMatch(PmodelMol)) + if ntestR==ntestP: + if ntestR>nmatch: + match=ntestR/natomR + nmatch=ntestR + wincase=case + if wincase not in TMTSmodel: + TMTSmodel[wincase]=kinetic[wincase] + return TMTSmodel,match + else: + return None,0 + +def cyclesize(Rsmiles,Psmiles): + """ + Parameters + ---------- + Rsmiles : TYPE str + DESCRIPTION. Smiles of the reactant species, it can be canonical or not + Psmiles : TYPE str + DESCRIPTION. Smiles of the product species, it can be canonical or not + + Returns + ------- + path : TYPE tuple + DESCRIPTION. tuple containing the indexes of atoms being part of the TS cycle + cyclesize : TYPE integer + DESCRIPTION. size of the TS cycle + + """ + Rsmilescanonical=Chem.MolToSmiles(Chem.MolFromSmiles(Rsmiles)) + Psmilescanonical=Chem.MolToSmiles(Chem.MolFromSmiles(Psmiles)) + RMol=Chem.MolFromSmiles(Rsmilescanonical) + PMol=Chem.MolFromSmiles(Psmilescanonical) + alkane=Chem.MolFromSmiles(equivalentalkane(Rsmilescanonical)) + ridxR=radicalatomindex(Rsmilescanonical) + ridxP=radicalatomindex(Psmilescanonical) + path=[] + cyclesize=[] + rpositionsR,dicR=equivalencelist(Rsmilescanonical) + rpositionsP,dicP=equivalencelist(Psmilescanonical) + for atomR in rpositionsR: + for atomP in rpositionsP: + if atomP!=atomR: + path.append(Chem.rdmolops.GetShortestPath(alkane,atomR,atomP)) + path=cleanpath(path,dicR) + cyclesize=[len(path[i])+1 for i in range(0,len(path))] + return path,cyclesize + +def cleanpath(path,dicR): + """ + + Parameters + ---------- + path : TYPE list + DESCRIPTION. list of tuples containing the atoms indexes of a isomerisation TS cycle + dicR : TYPE dictionary + DESCRIPTION. tom indexes (keys) with their respective cannonical classification + + Returns + ------- + path: TYPE list of tuples + DESCRIPTION. list of tuples containing the atoms indexes of a isomerisation TS cycle, + but this times cleaned from equivalent ones + + """ + cleanpath=[] + aux=[] + for p in path: + equivalent=[dicR[p[i]] for i in range(0,len(p))] + if equivalent not in aux: + cleanpath.append(p) + aux.append(equivalent) + return cleanpath + +def equivalencelist(radsmiles): + """ + + Parameters + ---------- + radsmiles : str + DESCRIPTION. smiles of a radical alkyl + + Returns + ------- + alleq : list + DESCRIPTION. atom indexes of the equivalent positions for the radical center + dic: dictionary + DESCRIPTION. atom indexes (keys) with their respective cannonical classification + """ + + nonradsmiles=equivalentalkane(radsmiles) + Mol=Chem.MolFromSmiles(nonradsmiles) + radMol=Chem.MolFromSmiles(radsmiles) + listofequivalents=list(Chem.CanonicalRankAtoms(Mol, breakTies=False)) + radicalidx=radicalatomindex(radsmiles) + canradical=listofequivalents[radicalidx] + alleq=[i for i in range(0,len(listofequivalents)) if listofequivalents[i]==canradical] + dic={} + for i in range(0,len(listofequivalents)): + dic[i]=listofequivalents[i] + return alleq,dic + +def radicalatomindex(smiles): + """ + + Parameters + ---------- + smiles : string + DESCRIPTION. simplified molecular input line entry specification + it can be canonical or not + /!\ it must contain only ONE radical site, otherise the function will + return None + Returns + ------- + radical : integer + DESCRIPTION. index of the atom that contains the radical + if the species contain moe than 1 radicla site, it will return None + """ + mol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(smiles))) + r=Chem.MolToSmiles(mol) + radical=[] + for atom in mol.GetAtoms(): + n=atom.GetNumRadicalElectrons() + if n>0 : + radical.append(atom.GetIdx()) + if len(radical)==1: + return radical[0] + else: + radical=None + return radical + +def equivalentalkane(radicalsmiles): + """ + + Parameters + ---------- + radicalsmiles : str + DESCRIPTION.simplified molecular input line entry specification of a radical + species, it can be canonical or not + + Returns + ------- + newsmiles2 : str + DESCRIPTION. simplified molecular input line entry specification + of the equivalent non-radical species + + #important observation: as the Mol in RDkits are C++ objects, + # it is hard to create a copy of them in python, + # so I decide to create functions that take smiles all the time + # it can be done with xyz coordinates or others, but smiles are + # simpler to me + """ + radicalmol=Chem.MolFromSmiles(radicalsmiles) + for atom in radicalmol.GetAtoms(): + if atom.GetNumRadicalElectrons()!=0: + atom.SetNumRadicalElectrons(0) + newsmiles=Chem.MolToSmiles(radicalmol) + return newsmiles + +def getnonradicalfrag(smiles): + """ + + + Parameters + ---------- + smiles : TYPE str + DESCRIPTION. smiles containning a fragmented radical + + Returns + ------- + smi : TYPE str + DESCRIPTION. smiles of the non radical fragment + + """ + + listofsmiles=smiles.split(".") + for smi in listofsmiles: + if "[" not in smi and "]" not in smi: + return smi + +def TSdic(path,R1): + """ + + Parameters + ---------- + path : TYPE tuple + DESCRIPTION. indexes of the atoms in the TS cycle + R1 : TYPE Mol object, from RDkit + DESCRIPTION. radical reactant + + Returns + ------- + TSdic : TYPE dic + DESCRIPTION. dictionary containing the substituents of each atom on the cycle, follow the format: + TSDic={ind:[0,0],id:["CCC",0]}.. + + """ + TSdic={} + for a in path: + bonds=[] + fragsa=[] + atom=R1.GetAtomWithIdx(a) + #print("atom ",a) + for x in atom.GetNeighbors(): + if x.GetIdx() not in path: + #print("bonded to",x.GetIdx(),"with bond number", R1.GetBondBetweenAtoms(atom.GetIdx(),x.GetIdx()).GetIdx()) + bonds.append(R1.GetBondBetweenAtoms(atom.GetIdx(),x.GetIdx()).GetIdx()) + if len(bonds)>0: + frag=Chem.FragmentOnSomeBonds(R1,tuple(bonds),addDummies=False) + for chem in frag: + chemfrag=Chem.MolToSmiles(chem) + newchemfrag=getnonradicalfrag(chemfrag) + fragsa.append(copy.deepcopy(newchemfrag)) + else: + fragsa=[0,0] + if len(fragsa)==2: + TSdic[a]=fragsa + else: + fragsa.append(0) + TSdic[a]=fragsa + return TSdic + +def mol_with_atom_index(mol): + for atom in mol.GetAtoms(): + atom.SetAtomMapNum(atom.GetIdx()) + # for bond in mol.GetBonds(): + # bond.SetBondMapNum(bond.GetIdx()) + return mol + + +def creer_solutions(liste,pivot,arbre,longueur): + if pivot < longueur: + filsg = Node(liste[pivot],arbre) + filsd = Node(list(reversed(liste[pivot])),arbre,) + pivot += 1 + creer_solutions(liste,pivot,filsg,longueur) + creer_solutions(liste,pivot,filsd,longueur) + +def planinversion(subslist): + newlist=[list(reversed(i)) for i in subslist] + return newlist + +def middleCinversion(sublist): + return list(reversed(sublist)) + +def createTS(liste): + liste.insert(0,0) + pivot=1 + root = Node(liste[0]) + longueur=len(liste) + creer_solutions(liste, pivot, root, longueur) + cnodes = list(PreOrderIter(root, filter_=lambda node: node.is_leaf)) + combinaisons = [str(i)[7:-2].split(i.separator) for i in cnodes] + combinaisons=[comb[1:] for comb in combinaisons] + c=[] + for comb in combinaisons: + comb=[ast.literal_eval(i) for i in comb] + c.append(copy.deepcopy(comb)) + uniquesubs=[] + for comb in c: + if len(comb)<=3: + combinvplan=planinversion(comb) + combinvC=middleCinversion(comb) + doubleinv=planinversion(combinvC) + if (combinvplan not in uniquesubs) and (combinvC not in uniquesubs) and (comb not in uniquesubs) and (doubleinv not in uniquesubs): + uniquesubs.append(copy.deepcopy(comb)) + elif len(comb)>=4: + combinvC=middleCinversion(comb) + if combinvC not in uniquesubs and (comb not in uniquesubs): + uniquesubs.append(copy.deepcopy(comb)) + return uniquesubs + +def samesubextremes(subsTS): + """ + + + Parameters + ---------- + subsTS : TYPE list + DESCRIPTION. + + Returns + ------- + same type Boolean + + + """ + p1=subsTS[0] + p2=subsTS[-1] + if p1==list(reversed(p1)) and p2==list(reversed(p2)): + return True + else: + return False \ No newline at end of file