diff --git a/correctionfactor.py b/correctionfactor.py index 0361e02437a4307fca5216f2642a00a1042dc43d..ae44e8b5f2b553c06b17b1dfe42f8e280e1cd0d2 100644 --- a/correctionfactor.py +++ b/correctionfactor.py @@ -1,597 +1,1009 @@ - # -*- coding: utf-8 -*- -""" -Created on Fri Mar 15 11:06:06 2024 - -""" - -from rdkit import Chem -from rdkit.Chem import rdmolfiles -from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers, StereoEnumerationOptions,GetStereoisomerCount -from rdkit.Chem.rdMolDescriptors import CalcNumAtomStereoCenters -from rdkit.Chem import AllChem -from _kinetic import iso_alkyl_C3,iso_alkyl_C4,iso_alkyl_C5,iso_alkyl_C6 -import subprocess as sub -import copy -from anytree import Node, RenderTree, PreOrderIter -import ast,copy -import os - -def matchreaction(Rsmiles,Psmiles): - """ - Parameters - ---------- - Rsmiles : TYPE str - DESCRIPTION. Smiles of the reactant species, it can be canonical or not - Psmiles : TYPE str - DESCRIPTION. Smiles of the product species, it can be canonical or not - - Returns - ------- - Arrhenius : TYPE list - DESCRIPTION. list of dictionaries containing the reaction TMTS model with the - three Arrhenius paramters for the reaction and the statistical factor, - as the reaction may happen by different pathways - /!\ if no pathway is identified, then a None will be returned - """ - pathways,cs= cyclesize(Rsmiles,Psmiles) - #check if it was possible to find a pathway and a cyclesize: - if len(cs)==0 or len(pathways)==0: - return None - Arrhenius=[] - for i in range(0,len(cs)): - cycle=cs[i] - # print(cycle) - path=pathways[i] - TMTSmodel,match= searchmatch(Rsmiles,Psmiles,cycle) - #print(TMTSmodel) - if match==1: - Arrhenius.append(TMTSmodel) - else: - #here, we need to call the code for calculating the correction factor - #for the statistical factor - Arrhenius.append(refiningarrhenius(TMTSmodel,Rsmiles,Psmiles,path,cycle)) - - return Arrhenius - - -def refiningarrhenius(TMTSmodel,Rsmiles,Psmiles,path,cycle): - """ - - Parameters - ---------- - TMTSmodel : TYPE dic - DESCRIPTION. dictionary cointaining the reaction description as key - (format "Rmiles>>Psmiles") and the list of Arrhenius parameters as - descriptions [A (s-1),n, Ea(cal/mol) and Lc] - Rsmiles : TYPE str - DESCRIPTION. Smiles of the reactant species, it can be canonical or not - Psmiles : TYPE str - DESCRIPTION.Smiles of the reactant species, it can be canonical or not - - Returns - ------- - TMTSmodel : TYPE. dic - DESCRIPTION. corrected with a list of Arrhenius parameters after the correction factor is applied - - """ - print(TMTSmodel ) - for model in TMTSmodel: - Lcmodel=TMTSmodel[model][-1] - fc=1/float(Lcmodel) - Rsmiles=Rsmiles.replace("@","") - Rmol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Rsmiles))) - nisoR=CalcNumAtomStereoCenters(Rmol)+1 - opts = StereoEnumerationOptions(unique=True) - isomers = tuple(EnumerateStereoisomers(Rmol, options=opts)) - isomerssmi=[] - for smi in sorted(Chem.MolToSmiles(x,isomericSmiles=True) for x in isomers): - isomerssmi.append(smi) - isomerssmi=cleanisomers(isomerssmi) - NR=len(isomerssmi) - isomersextsym=[] - for isomer in isomerssmi: - moliso=Chem.MolFromSmiles(isomer) - m3= Chem.AddHs(moliso) - AllChem.EmbedMolecule(m3) - AllChem.MMFFOptimizeMolecule(m3) - rdmolfiles.MolToPDBFile(m3,"C:/pwt/sym.pdb",flavor=16) - file = open("C:/pwt/sym.pdb","w+") - textinput = file.read() - textinputcorrige="COMPND C:/pwt/base.xyz\nAUTHOR GENERATED BY OPEN BABEL 3.1.0\n"+textinput - file.write(textinputcorrige) - callplatonsym() - extsymiso=tradsym(foundsym("C:/pwt/sym.lis")) - isomersextsym.append(copy.deepcopy(extsymiso)) - if len(isomersextsym)==1: - sym=isomersextsym[0] - LR=[sym/nisoR] - else: - LR=[sym/2 for sym in isomersextsym] - # now we will create a dictionary of TS based on the structures: - TS=TSdic(path,Rmol) - subsTS=[TS[i] for i in TS] - uniquecombofsubs=createTS(subsTS) - NTS=len(uniquecombofsubs) - LTSs=[] - #print(uniquecombofsubs) - for ts in uniquecombofsubs: - nisoTS=2 - symTS=1 - #verifyif nisoTS=1 - if cycle <=4 or cycle ==6: - if samesubextremes(ts)==True: - nisoTS=1 - invinvTS=middleCinversion(planinversion(ts)) - if invinvTS==ts: - symTS=2 - LTS=nisoTS/symTS - LTSs.append(copy.deepcopy(LTS)) - print(LR) - print(LTSs) - Lmodel=[] - i=NR - for LTS in LTSs: - j=i%NR - Lmodeli=LTS*LR[j] - i+=1 - Lmodel.append(copy.deepcopy(Lmodeli)) - Lmodelfinal=sum(Lmodel)/NR - fc=fc*Lmodelfinal - print(fc) - TMTSmodel[model]=[str(fc*float(TMTSmodel[model][0])),TMTSmodel[model][1],TMTSmodel[model][2],str(Lmodelfinal)] - - return TMTSmodel -def symfromsmi(smiles): - """ - - - Parameters - ---------- - smiles : TYPE str - DESCRIPTION. smiles of the molecule - - Returns - ------- - sym: TYPE float - DESCRIPTION. integer with the symmetry value - - """ - moliso=Chem.MolFromSmiles(smiles) - m3= Chem.AddHs(m2) - AllChem.EmbedMolecule(m3) - AllChem.MMFFOptimizeMolecule(m3) - rdmolfiles.MolToPDBFile(m3,"C:/pwt/sym.pdb",flavor=16) - file = open("C:/pwt/sym.pdb","r") - textinput = file.read() - file.close() - file2=open("C:/pwt/sym.pdb","w") - textinputcorrige="COMPND C:/pwt/base.xyz\nAUTHOR GENERATED BY OPEN BABEL 3.1.0\n"+textinput - textinputcorrige=textinputcorrige.replace(" H "," F ") - file2.write(textinputcorrige) - file2.close() - callplatonsym() - symiso=tradsym(foundsym("C:/pwt/sym.lis")) - -def callplatonsym(): - newpath = "C:\pwt" - os.chdir(newpath) - proc = sub.Popen(["platon", "-o", "sym.pdb\n"], - stdin=sub.PIPE, stdout=sub.PIPE, shell=True, text=True) - output2, errors = proc.communicate(input="NONSYM\nexit\n") - -def foundsym(filename): - symmetry = "C1" - symfile = open(filename, "r") - text = symfile.read() - lines = text.splitlines() - for i in range(0, len(lines)): - if lines[i][0:6] == "Resd #": - data = lines[i+2].split() - tol = float(data[9]) - if tol == 0.1: - symmetry = str(data[6]) - symfile.close() - return symmetry - -def tradsym(symmetry): - extsym = 1 - if symmetry == "C1" or symmetry == "Cs": - extsym = 1 - elif symmetry == "C2" or symmetry == "C2v": - extsym = 2 - elif symmetry == "C3v": - extsym = 3 - elif symmetry == "D2h": - extsym = 4 - elif symmetry == "D3h" or symmetry == "D3d": - extsym = 6 - elif symmetry == "D5h": - extsym = 10 - elif symmetry == "Td": - extsym = 12 - elif symmetry == "Oh": - extsym = 24 - return extsym - -def cleanisomers(listofiso): - """ - - - Parameters - ---------- - listofiso : TYPE list - DESCRIPTION. list of smiles for isomers, chiral centers indicated with @ or @@ - - Returns - ------- - listofisoclean : TYPE list - DESCRIPTION. list of smiles for isomers, chiral centers indicated with @ or @@, no - double structure - """ - already=[] - cleaniso=[] - for iso in listofiso: - if iso not in already: - cleaniso.append(iso) - already.append(iso) - already.append(inverseiso(iso)) - return cleaniso - -def inverseiso(isomer): - """ - - - Parameters - ---------- - isomer : TYPE str - DESCRIPTION. - - Returns - ------- - isomer : TYPE str - DESCRIPTION. @ and @@ inverted - - """ - isomer=isomer.replace("@@","XXX") - isomer=isomer.replace("@","XO") - isomer=isomer.replace("XXX","@") - isomer=isomer.replace("XO","@@") - - return isomer - -def searchmatch(Rsmi,Psmi,C): - """ - - Parameters - ---------- - Rsmi : TYPE str - DESCRIPTION. SMILES of reactant - Psmi : TYPE str - DESCRIPTION. SMILES of the product - C : TYPE integer - DESCRIPTION. size of the TScycle - - Returns - ------- - TMTSmodel : TYPE dic - DESCRIPTION. dictionary cointaining the reaction description as key - (format "Rmiles>>Psmiles") and the list of Arrhenius parameters as - descriptions [A (s-1),n, Ea(cal/mol) and Lc] - /!\ Return None if the reaction is not covered by ouyr database - """ - Rmol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Rsmi))) - natomR=Rmol.GetNumAtoms() - Pmol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Psmi))) - natomP=Pmol.GetNumAtoms() - if C==3: - kinetic=iso_alkyl_C3 - elif C==4: - kinetic=iso_alkyl_C4 - elif C==5: - kinetic=iso_alkyl_C5 - elif C==6: - kinetic=iso_alkyl_C6 - else: - return None,0 - nmatch=0 - TMTSmodel={} - wincase="" - match=0 - for case in kinetic: - [Rmodel,Pmodel]=case.split(">>") - RmodelMol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Rmodel))) - PmodelMol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Pmodel))) - ntestR=len(Rmol.GetSubstructMatch(RmodelMol)) - ntestP=len(Pmol.GetSubstructMatch(PmodelMol)) - if ntestR==ntestP: - if ntestR>nmatch: - match=ntestR/natomR - nmatch=ntestR - wincase=case - if wincase not in TMTSmodel: - TMTSmodel[wincase]=kinetic[wincase] - return TMTSmodel,match - else: - return None,0 - -def cyclesize(Rsmiles,Psmiles): - """ - Parameters - ---------- - Rsmiles : TYPE str - DESCRIPTION. Smiles of the reactant species, it can be canonical or not - Psmiles : TYPE str - DESCRIPTION. Smiles of the product species, it can be canonical or not - - Returns - ------- - path : TYPE tuple - DESCRIPTION. tuple containing the indexes of atoms being part of the TS cycle - cyclesize : TYPE integer - DESCRIPTION. size of the TS cycle - - """ - Rsmilescanonical=Chem.MolToSmiles(Chem.MolFromSmiles(Rsmiles)) - Psmilescanonical=Chem.MolToSmiles(Chem.MolFromSmiles(Psmiles)) - RMol=Chem.MolFromSmiles(Rsmilescanonical) - PMol=Chem.MolFromSmiles(Psmilescanonical) - alkane=Chem.MolFromSmiles(equivalentalkane(Rsmilescanonical)) - ridxR=radicalatomindex(Rsmilescanonical) - ridxP=radicalatomindex(Psmilescanonical) - path=[] - cyclesize=[] - rpositionsR,dicR=equivalencelist(Rsmilescanonical) - rpositionsP,dicP=equivalencelist(Psmilescanonical) - for atomR in rpositionsR: - for atomP in rpositionsP: - if atomP!=atomR: - path.append(Chem.rdmolops.GetShortestPath(alkane,atomR,atomP)) - path=cleanpath(path,dicR) - cyclesize=[len(path[i])+1 for i in range(0,len(path))] - return path,cyclesize - -def cleanpath(path,dicR): - """ - - Parameters - ---------- - path : TYPE list - DESCRIPTION. list of tuples containing the atoms indexes of a isomerisation TS cycle - dicR : TYPE dictionary - DESCRIPTION. tom indexes (keys) with their respective cannonical classification - - Returns - ------- - path: TYPE list of tuples - DESCRIPTION. list of tuples containing the atoms indexes of a isomerisation TS cycle, - but this times cleaned from equivalent ones - - """ - cleanpath=[] - aux=[] - for p in path: - equivalent=[dicR[p[i]] for i in range(0,len(p))] - if equivalent not in aux: - cleanpath.append(p) - aux.append(equivalent) - return cleanpath - -def equivalencelist(radsmiles): - """ - - Parameters - ---------- - radsmiles : str - DESCRIPTION. smiles of a radical alkyl - - Returns - ------- - alleq : list - DESCRIPTION. atom indexes of the equivalent positions for the radical center - dic: dictionary - DESCRIPTION. atom indexes (keys) with their respective cannonical classification - """ - - nonradsmiles=equivalentalkane(radsmiles) - Mol=Chem.MolFromSmiles(nonradsmiles) - radMol=Chem.MolFromSmiles(radsmiles) - listofequivalents=list(Chem.CanonicalRankAtoms(Mol, breakTies=False)) - radicalidx=radicalatomindex(radsmiles) - canradical=listofequivalents[radicalidx] - alleq=[i for i in range(0,len(listofequivalents)) if listofequivalents[i]==canradical] - dic={} - for i in range(0,len(listofequivalents)): - dic[i]=listofequivalents[i] - return alleq,dic - -def radicalatomindex(smiles): - """ - - Parameters - ---------- - smiles : string - DESCRIPTION. simplified molecular input line entry specification - it can be canonical or not - /!\ it must contain only ONE radical site, otherise the function will - return None - Returns - ------- - radical : integer - DESCRIPTION. index of the atom that contains the radical - if the species contain moe than 1 radicla site, it will return None - """ - mol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(smiles))) - r=Chem.MolToSmiles(mol) - radical=[] - for atom in mol.GetAtoms(): - n=atom.GetNumRadicalElectrons() - if n>0 : - radical.append(atom.GetIdx()) - if len(radical)==1: - return radical[0] - else: - radical=None - return radical - -def equivalentalkane(radicalsmiles): - """ - - Parameters - ---------- - radicalsmiles : str - DESCRIPTION.simplified molecular input line entry specification of a radical - species, it can be canonical or not - - Returns - ------- - newsmiles2 : str - DESCRIPTION. simplified molecular input line entry specification - of the equivalent non-radical species - - #important observation: as the Mol in RDkits are C++ objects, - # it is hard to create a copy of them in python, - # so I decide to create functions that take smiles all the time - # it can be done with xyz coordinates or others, but smiles are - # simpler to me - """ - radicalmol=Chem.MolFromSmiles(radicalsmiles) - for atom in radicalmol.GetAtoms(): - if atom.GetNumRadicalElectrons()!=0: - atom.SetNumRadicalElectrons(0) - newsmiles=Chem.MolToSmiles(radicalmol) - return newsmiles - -def getnonradicalfrag(smiles): - """ - - - Parameters - ---------- - smiles : TYPE str - DESCRIPTION. smiles containning a fragmented radical - - Returns - ------- - smi : TYPE str - DESCRIPTION. smiles of the non radical fragment - - """ - - listofsmiles=smiles.split(".") - for smi in listofsmiles: - if "[" not in smi and "]" not in smi: - return smi - -def TSdic(path,R1): - """ - - Parameters - ---------- - path : TYPE tuple - DESCRIPTION. indexes of the atoms in the TS cycle - R1 : TYPE Mol object, from RDkit - DESCRIPTION. radical reactant - - Returns - ------- - TSdic : TYPE dic - DESCRIPTION. dictionary containing the substituents of each atom on the cycle, follow the format: - TSDic={ind:[0,0],id:["CCC",0]}.. - - """ - TSdic={} - for a in path: - bonds=[] - fragsa=[] - atom=R1.GetAtomWithIdx(a) - #print("atom ",a) - for x in atom.GetNeighbors(): - if x.GetIdx() not in path: - #print("bonded to",x.GetIdx(),"with bond number", R1.GetBondBetweenAtoms(atom.GetIdx(),x.GetIdx()).GetIdx()) - bonds.append(R1.GetBondBetweenAtoms(atom.GetIdx(),x.GetIdx()).GetIdx()) - if len(bonds)>0: - frag=Chem.FragmentOnSomeBonds(R1,tuple(bonds),addDummies=False) - for chem in frag: - chemfrag=Chem.MolToSmiles(chem) - newchemfrag=getnonradicalfrag(chemfrag) - fragsa.append(copy.deepcopy(newchemfrag)) - else: - fragsa=[0,0] - if len(fragsa)==2: - TSdic[a]=fragsa - else: - fragsa.append(0) - TSdic[a]=fragsa - return TSdic - -def mol_with_atom_index(mol): - for atom in mol.GetAtoms(): - atom.SetAtomMapNum(atom.GetIdx()) - # for bond in mol.GetBonds(): - # bond.SetBondMapNum(bond.GetIdx()) - return mol - - -def creer_solutions(liste,pivot,arbre,longueur): - if pivot < longueur: - filsg = Node(liste[pivot],arbre) - filsd = Node(list(reversed(liste[pivot])),arbre,) - pivot += 1 - creer_solutions(liste,pivot,filsg,longueur) - creer_solutions(liste,pivot,filsd,longueur) - -def planinversion(subslist): - newlist=[list(reversed(i)) for i in subslist] - return newlist - -def middleCinversion(sublist): - return list(reversed(sublist)) - -def createTS(liste): - liste.insert(0,0) - pivot=1 - root = Node(liste[0]) - longueur=len(liste) - creer_solutions(liste, pivot, root, longueur) - cnodes = list(PreOrderIter(root, filter_=lambda node: node.is_leaf)) - combinaisons = [str(i)[7:-2].split(i.separator) for i in cnodes] - combinaisons=[comb[1:] for comb in combinaisons] - c=[] - for comb in combinaisons: - comb=[ast.literal_eval(i) for i in comb] - c.append(copy.deepcopy(comb)) - uniquesubs=[] - for comb in c: - if len(comb)<=3: - combinvplan=planinversion(comb) - combinvC=middleCinversion(comb) - doubleinv=planinversion(combinvC) - if (combinvplan not in uniquesubs) and (combinvC not in uniquesubs) and (comb not in uniquesubs) and (doubleinv not in uniquesubs): - uniquesubs.append(copy.deepcopy(comb)) - elif len(comb)>=4: - combinvC=middleCinversion(comb) - if combinvC not in uniquesubs and (comb not in uniquesubs): - uniquesubs.append(copy.deepcopy(comb)) - return uniquesubs - -def samesubextremes(subsTS): - """ - - - Parameters - ---------- - subsTS : TYPE list - DESCRIPTION. - - Returns - ------- - same type Boolean - - - """ - p1=subsTS[0] - p2=subsTS[-1] - if p1==list(reversed(p1)) and p2==list(reversed(p2)): - return True - else: - return False \ No newline at end of file + # -*- coding: utf-8 -*- +""" +Created on Fri Mar 15 11:06:06 2024 + +""" + +from rdkit import Chem +from rdkit.Chem import rdmolfiles +from rdkit.Chem import Draw +from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers, StereoEnumerationOptions,GetStereoisomerCount +from rdkit.Chem.rdMolDescriptors import CalcNumAtomStereoCenters +from rdkit.Chem import AllChem +from _kinetic import iso_alkyl_C3,iso_alkyl_C4,iso_alkyl_C5,iso_alkyl_C6,bs_alkyl_CH,bs_alkyl_CC,habs_H,habs_C,recomb +import subprocess as sub +import copy +from anytree import Node, RenderTree, PreOrderIter +import ast,copy +import os +from rdkit import RDLogger + +def findmatchHabs(typeofHabs,radreactant, molreactant, radproduct,molproduct): + """ + Parameters + ---------- + typeofBS: TYPE str + DESCRIPTION. either "H" or "CH", define the type of radical abstracting the H (hydrogen = "H" or alkyl = "CH") + radreactant : TYPE str + DESCRIPTION. Smiles of the radical abstracting the H + molreactant : TYPE str + DESCRIPTION. Smiles of the molecule given the H + radprodcut: TYPE str + DESCRIPTION. Smiles of the radical product + molproduct: TYPE str + DESCRITPTION. Smiles of the molecular product + Returns + ------- + TMTSmodel: TYPE dic + DESCRIPTION. dictionary cointaining the reaction description as key + (format "Rmiles>>Psmile1+Psmile2") and the list of Arrhenius parameters as + descriptions [A (s-1),n, Ea(cal/mol) and Lc] + /!\ Return None if the reaction is not covered by ouyr database + match: TYPE float + DESCRIPTION. the fraction of model matching the target reaction + """ + if typeofHabs=="H": + kinetic=habs_H + elif typeofHabs=="CH": + kinetic=habs_C + #print(kinetic) + Rrad=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(radreactant))) + natomRrad=Rrad.GetNumAtoms() + Rmol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(molreactant))) + natomRmol=Rmol.GetNumAtoms() + Prad=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(radproduct))) + #natomPrad=Prad.GetNumAtoms() + Pmol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(molproduct))) + #natomPmol=Pmol.GetNumAtoms() + nmatch=0 + TMTSmodel={} + wincase="" + match=0 + for cas in kinetic: + [reactants,products]=cas.split(">>") + modelmolR,modelradR=reactants.split("+") + modelradP,modelmolP=products.split("+") + RmodelMol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(modelmolR))) + RmodelRad=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(modelradR))) + PmodelMol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(modelmolP))) + PmodelRad=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(modelradP))) + nRmol=len(Rmol.GetSubstructMatch(RmodelMol)) + nRrad=len(Rrad.GetSubstructMatch(RmodelRad)) + nPmol=len(Pmol.GetSubstructMatch(PmodelMol)) + nPrad=len(Prad.GetSubstructMatch(PmodelRad)) + ntotal=nRmol+nRrad + #ntestP=len(Pmolradical.GetSubstructMatch(PalkylmodelMol))+len(Pmolalkene.GetSubstructMatch(PalkenemodelMol)) + if nRmol==nPrad and nRrad==nPmol: + if nRmol>0 and nRrad>0 and nPmol>0 and nPrad>0 and ntotal>nmatch: + match=ntotal/(natomRrad+natomRmol) + nmatch=ntotal + wincase=cas + if wincase!='' and wincase not in TMTSmodel: + TMTSmodel[wincase]=kinetic[wincase] + return TMTSmodel,match,modelradP + else: + return None,0,None + +def findmatchBS(typeofBS,radicalP,alkeneP,reactant): + """ + Parameters + ---------- + typeofBS: TYPE str + DESCRIPTION. either "H" or "C", define the type of bond scissions (C-C) or (C-H) + radicalP : TYPE str + DESCRIPTION. Smiles of the radical product being formed in the B-scission + alkeneP: TYPE str + DESCRIPTION. Smiles of the product alkene + reactant: TYPE str + DESCRITPTION. Smiles of the alkyl radical reactant + Returns + ------- + TMTSmodel: TYPE dic + DESCRIPTION. dictionary cointaining the reaction description as key + (format "Rmiles>>Psmile1+Psmile2") and the list of Arrhenius parameters as + descriptions [A (s-1),n, Ea(cal/mol) and Lc] + /!\ Return None if the reaction is not covered by ouyr database + match: TYPE float + DESCRIPTION. the fraction of mo + """ + Rmol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(reactant))) + Pmolradical=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(radicalP))) + natomPradical=Pmolradical.GetNumAtoms() + Pmolalkene=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(alkeneP))) + natomPalkene=Pmolalkene.GetNumAtoms() + natomP=natomPradical+natomPalkene + if typeofBS=="H": + kinetic=bs_alkyl_CH + elif typeofBS=="C": + kinetic=bs_alkyl_CC + else: + return None + nmatch=0 + + + TMTSmodel={} + wincase="" + match=0 + for case in kinetic: + [reactant,products]=case.split(">>") + alkylPmodel,alkenePmodel=products.split("+") + RmodelMol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(reactant))) + PalkylmodelMol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(alkylPmodel))) + PalkenemodelMol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(alkenePmodel))) + ntestR=len(Rmol.GetSubstructMatch(RmodelMol)) + if typeofBS=="H": + ntestR+=1 + ntestP=len(Pmolradical.GetSubstructMatch(PalkylmodelMol))+len(Pmolalkene.GetSubstructMatch(PalkenemodelMol)) + if ntestR==ntestP: + if ntestP>nmatch: + match=ntestP/natomP + nmatch=ntestP + wincase=case + if wincase not in TMTSmodel: + TMTSmodel[wincase]=kinetic[wincase] + #print(TMTSmodel) + return TMTSmodel,match + else: + return None,0 + +def refiningarrheniusBS(TMTSmodel,Rsmiles): + """ + + Parameters + ---------- + TMTSmodel : TYPE dic + DESCRIPTION. dictionary cointaining the reaction description as key + (format "Rmiles>>Psmiles") and the list of Arrhenius parameters as + descriptions [A (s-1),n, Ea(cal/mol) and Lc] + Rsmiles : TYPE str + DESCRIPTION. Smiles of the reactant species, it can be canonical or not + Returns + ------- + TMTSmodel : TYPE. dic + DESCRIPTION. corrected with a list of Arrhenius parameters after the correction factor is applied + + """ + for model in TMTSmodel: + Lcmodel=TMTSmodel[model][-1] + fc=1/float(Lcmodel) + Rmodel,Pmodels=model.split(">>") + RmodelMol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Rmodel))) + nisoRmodel=CalcNumAtomStereoCenters(RmodelMol)+1 + m3= Chem.AddHs(RmodelMol) + AllChem.EmbedMolecule(m3) + AllChem.MMFFOptimizeMolecule(m3) + rdmolfiles.MolToPDBFile(m3,"C:/pwt/sym.pdb",flavor=16) + callplatonsym() + extsymRmodel=tradsym(foundsym("C:/pwt/sym.lis")) + RMol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Rsmiles))) + nisoR=CalcNumAtomStereoCenters(RMol)+1 + m3R= Chem.AddHs(RMol) + AllChem.EmbedMolecule(m3R) + AllChem.MMFFOptimizeMolecule(m3R) + rdmolfiles.MolToPDBFile(m3R,"C:/pwt/sym.pdb",flavor=16) + file = open("C:/pwt/sym.pdb","w+") + textinput = file.read() + textinputcorrige="COMPND C:/pwt/base.xyz\nAUTHOR GENERATED BY OPEN BABEL 3.1.0\n"+textinput + file.write(textinputcorrige) + callplatonsym() + extsymR=tradsym(foundsym("C:/pwt/sym.lis")) + fc=extsymR*nisoRmodel/(extsymRmodel*nisoR) + Lmodelfinal=fc*float(Lcmodel) + TMTSmodel[model]=[str(fc*float(TMTSmodel[model][0])),TMTSmodel[model][1],TMTSmodel[model][2],str(Lmodelfinal)] + return TMTSmodel + + +def matchreaction_recomb(Rsmiles,Psmile): + """ + Parameters + ---------- + Rsmiles : TYPE list of str + DESCRIPTION. list of the smiles of the reactants + Psmile : TYPE str + DESCRIPTION. smile of the product + + Returns + ------- + Arrhenius : TYPE list + DESCRIPTION. list of dictionaries containing the reaction TMTS model with the + three Arrhenius paramters for the reaction and the statistical factor, + as the reaction may happen by different pathways + /!\ if no pathway is identified, then a None will be returned + """ + Arrhenius={} + kinetic=recomb + PMol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Psmile))) + nP=len(PMol.GetAtoms()) + RMol1=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Rsmiles[0]))) + nR1=len(RMol1.GetAtoms()) + RMol2=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Rsmiles[1]))) + nR2=len(RMol2.GetAtoms()) + ntotal=nP+nR1+nR2 + nmatchb=0 + wincase="" + for model in kinetic: + Rmodelssmi,Pmodelssmi=model.split(">>") + Rmodelsmi1,Rmodelsmi2=Rmodelssmi.split("+") + PmodelMol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Pmodelssmi))) + RmodelMol1=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Rmodelsmi1))) + RmodelMol2=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Rmodelsmi2))) + ntestRtotal=max(len(RMol1.GetSubstructMatch(RmodelMol1))+len(RMol2.GetSubstructMatch(RmodelMol2)),len(RMol2.GetSubstructMatch(RmodelMol1))+len(RMol1.GetSubstructMatch(RmodelMol2))) + ntestP=len(PMol.GetSubstructMatch(PmodelMol)) + nmatchtest=(ntestRtotal+ntestP)/ntotal + if Rsmiles[1]=="[H]" or Rsmiles[0]=="[H]": + ntestP+=1 + if nmatchtest>nmatchb and ntestP==ntestRtotal: + wincase=copy.deepcopy(model) + nmatchb=nmatchtest + if nmatchb==1: + Arrhenius[wincase]=kinetic[wincase] + elif wincase!="": + #print(wincase) + Rmodelssmi,Pmodelssmi=wincase.split(">>") + Rmodelsmi1,Rmodelsmi2=Rmodelssmi.split("+") + if Rmodelsmi1==Rmodelsmi2 and Rsmiles[0]!=Rsmiles[1]: + Arrhenius[wincase]=[str(float(kinetic[wincase][0])*2),kinetic[wincase][1],kinetic[wincase][2]] + else: + Arrhenius[wincase]=kinetic[wincase] + if Arrhenius=={}: + Arrhenius=None + else: + Arrhenius=[Arrhenius] + return Arrhenius + +def matchreaction_Habs(Rsmiles,Psmiles): + """ + Parameters + ---------- + Rsmiles : TYPE list of str + DESCRIPTION. list containning list of the smiles (str) reactant species, it can be canonical or not + Psmiles : TYPE list of str + DESCRIPTION. list containing a list of the smiles (str) of the product species, + it can be canonical or not + + Returns + ------- + Arrhenius : TYPE list + DESCRIPTION. list of dictionaries containing the reaction TMTS model with the + three Arrhenius paramters for the reaction and the statistical factor, + as the reaction may happen by different pathways + /!\ if no pathway is identified, then a None will be returned + """ + typeofHabs="CH" + for R in Rsmiles: + if "[" and "]" in R: + radreactant=copy.deepcopy(R) + if radreactant=="[H]": + typeofHabs="H" + else: + molreactant=copy.deepcopy(R) + for P in Psmiles: + if "[" and "]" in P and P!="[HH]": + radproduct=copy.deepcopy(P) + elif P=="[HH]": + molproduct=copy.deepcopy(P) + else: + molproduct=copy.deepcopy(P) + #print(typeofHabs) + TMTSmodel,match,modelradP=findmatchHabs(typeofHabs,radreactant,molreactant,radproduct,molproduct) + #print(TMTSmodel) + #print(match) + Arrhenius=[] + #Arrhenius.append(TMTSmodel) + if match==1: + Arrhenius.append(TMTSmodel) + elif TMTSmodel!=None: + #Arrhenius.append(refiningarrheniusBS(TMTSmodel,Rsmiles)) + fcorr=count_Heq(radproduct)/count_Heq(modelradP) + for model in TMTSmodel: + Arrhenius.append({model:[str(float(TMTSmodel[model][0])*fcorr),TMTSmodel[model][1],TMTSmodel[model][2]]}) + #print(TMTSmodel) + else: + Arrhenius=None + return Arrhenius + +def matchreaction_BS(Rsmiles,Psmiles): + """ + Parameters + ---------- + Rsmiles : TYPE str + DESCRIPTION. Smiles of the reactant species, it can be canonical or not + Psmiles : TYPE list of str + DESCRIPTION. list containint thelist of the smiles of the product species, + it can be canonical or not + + Returns + ------- + Arrhenius : TYPE list + DESCRIPTION. list of dictionaries containing the reaction TMTS model with the + three Arrhenius paramters for the reaction and the statistical factor, + as the reaction may happen by different pathways + /!\ if no pathway is identified, then a None will be returned + """ + typeofbs="C" + rad="[H]" + alkene="" + for P in Psmiles: + if P=="[H]": + typeofbs="H" + rad=copy.deepcopy(P) + if "=" in P: + alkene=copy.deepcopy(P) + if typeofbs!="H": + for P in Psmiles: + if P != alkene: + rad=copy.deepcopy(P) + + TMTSmodel,match=findmatchBS(typeofbs,rad,alkene,Rsmiles) + Arrhenius=[] + if match==1: + Arrhenius.append(TMTSmodel) + elif TMTSmodel!=None: + Arrhenius.append(refiningarrheniusBS(TMTSmodel,Rsmiles)) + else: + Arrhenius=None + return Arrhenius + +def matchreaction_iso(Rsmiles,Psmiles): + """ + Parameters + ---------- + Rsmiles : TYPE str + DESCRIPTION. Smiles of the reactant species, it can be canonical or not + Psmiles : TYPE str + DESCRIPTION. Smiles of the product species, it can be canonical or not + + Returns + ------- + Arrhenius : TYPE list + DESCRIPTION. list of dictionaries containing the reaction TMTS model with the + three Arrhenius paramters for the reaction and the statistical factor, + as the reaction may happen by different pathways + /!\ if no pathway is identified, then a None will be returned + """ + pathways,cs= cyclesize(Rsmiles,Psmiles) + # print(pathways) + # print(cs) + #check if it was possible to find a pathway and a cyclesize: + if len(cs)==0 or len(pathways)==0: + return None + Arrhenius=[] + for i in range(0,len(cs)): + cycle=cs[i] + #print(cycle) + path=pathways[i] + TMTSmodel,match= searchmatch(Rsmiles,Psmiles,cycle) + #print(TMTSmodel) + if match==1: + Arrhenius.append(TMTSmodel) + elif TMTSmodel!=None: + #here, we need to call the code for calculating the correction factor + #for the statistical factor + Arrhenius.append(refiningarrhenius(TMTSmodel,Rsmiles,Psmiles,path,cycle)) + else: + Arrhenius=None + return Arrhenius + + +def refiningarrhenius(TMTSmodel,Rsmiles,Psmiles,path,cycle): + """ + + Parameters + ---------- + TMTSmodel : TYPE dic + DESCRIPTION. dictionary cointaining the reaction description as key + (format "Rmiles>>Psmiles") and the list of Arrhenius parameters as + descriptions [A (s-1),n, Ea(cal/mol) and Lc] + Rsmiles : TYPE str + DESCRIPTION. Smiles of the reactant species, it can be canonical or not + Psmiles : TYPE str + DESCRIPTION.Smiles of the reactant species, it can be canonical or not + + Returns + ------- + TMTSmodel : TYPE. dic + DESCRIPTION. corrected with a list of Arrhenius parameters after the correction factor is applied + + """ + for model in TMTSmodel: + Lcmodel=TMTSmodel[model][-1] + fc=1/float(Lcmodel) + Rsmiles=Rsmiles.replace("@","") + Rmol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Rsmiles))) + nisoR=CalcNumAtomStereoCenters(Rmol)+1 + opts = StereoEnumerationOptions(unique=True) + isomers = tuple(EnumerateStereoisomers(Rmol, options=opts)) + isomerssmi=[] + for smi in sorted(Chem.MolToSmiles(x,isomericSmiles=True) for x in isomers): + isomerssmi.append(smi) + isomerssmi=cleanisomers(isomerssmi) + NR=len(isomerssmi) + isomersextsym=[] + for isomer in isomerssmi: + moliso=Chem.MolFromSmiles(isomer) + m3= Chem.AddHs(moliso) + AllChem.EmbedMolecule(m3) + AllChem.MMFFOptimizeMolecule(m3) + rdmolfiles.MolToPDBFile(m3,"C:/pwt/sym.pdb",flavor=16) + file = open("C:/pwt/sym.pdb","w+") + textinput = file.read() + textinputcorrige="COMPND C:/pwt/base.xyz\nAUTHOR GENERATED BY OPEN BABEL 3.1.0\n"+textinput + file.write(textinputcorrige) + callplatonsym() + extsymiso=tradsym(foundsym("C:/pwt/sym.lis")) + isomersextsym.append(copy.deepcopy(extsymiso)) + if len(isomersextsym)==1: + sym=isomersextsym[0] + LR=[sym/nisoR] + else: + LR=[sym/2 for sym in isomersextsym] + # now we will create a dictionary of TS based on the structures: + TS=TSdic(path,Rmol) + subsTS=[TS[i] for i in TS] + uniquecombofsubs=createTS(subsTS) + NTS=len(uniquecombofsubs) + LTSs=[] + #print(uniquecombofsubs) + for ts in uniquecombofsubs: + nisoTS=2 + symTS=1 + #verifyif nisoTS=1 + if cycle <=4 or cycle ==6: + if samesubextremes(ts)==True: + nisoTS=1 + invinvTS=middleCinversion(planinversion(ts)) + if invinvTS==ts: + symTS=2 + LTS=nisoTS/symTS + LTSs.append(copy.deepcopy(LTS)) + Lmodel=[] + i=NR + for LTS in LTSs: + j=i%NR + Lmodeli=LTS*LR[j] + i+=1 + Lmodel.append(copy.deepcopy(Lmodeli)) + Lmodelfinal=sum(Lmodel)/NR + fc=fc*Lmodelfinal + TMTSmodel[model]=[str(fc*float(TMTSmodel[model][0])),TMTSmodel[model][1],TMTSmodel[model][2],str(Lmodelfinal)] + + return TMTSmodel + +def symfromsmi(smiles): + """ + + + Parameters + ---------- + smiles : TYPE str + DESCRIPTION. smiles of the molecule + + Returns + ------- + sym: TYPE float + DESCRIPTION. integer with the symmetry value + + """ + moliso=Chem.MolFromSmiles(smiles) + m3= Chem.AddHs(moliso) + AllChem.EmbedMolecule(m3) + AllChem.MMFFOptimizeMolecule(m3) + rdmolfiles.MolToPDBFile(m3,"C:/pwt/sym.pdb",flavor=16) + file = open("C:/pwt/sym.pdb","r") + textinput = file.read() + file.close() + file2=open("C:/pwt/sym.pdb","w") + textinputcorrige="COMPND C:/pwt/base.xyz\nAUTHOR GENERATED BY OPEN BABEL 3.1.0\n"+textinput + textinputcorrige=textinputcorrige.replace(" H "," F ") + file2.write(textinputcorrige) + file2.close() + callplatonsym() + symiso=tradsym(foundsym("C:/pwt/sym.lis")) + +def callplatonsym(): + newpath = "C:\pwt" + os.chdir(newpath) + proc = sub.Popen(["platon", "-o", "sym.pdb\n"], + stdin=sub.PIPE, stdout=sub.PIPE, shell=True, text=True) + output2, errors = proc.communicate(input="NONSYM\nexit\n") + +def foundsym(filename): + symmetry = "C1" + symfile = open(filename, "r") + text = symfile.read() + lines = text.splitlines() + for i in range(0, len(lines)): + if lines[i][0:6] == "Resd #": + data = lines[i+2].split() + tol = float(data[9]) + if tol == 0.1: + symmetry = str(data[6]) + symfile.close() + return symmetry + +def tradsym(symmetry): + extsym = 1 + if symmetry == "C1" or symmetry == "Cs": + extsym = 1 + elif symmetry == "C2" or symmetry == "C2v": + extsym = 2 + elif symmetry == "C3v": + extsym = 3 + elif symmetry == "D2h": + extsym = 4 + elif symmetry == "D3h" or symmetry == "D3d": + extsym = 6 + elif symmetry == "D5h": + extsym = 10 + elif symmetry == "Td": + extsym = 12 + elif symmetry == "Oh": + extsym = 24 + return extsym + +def cleanisomers(listofiso): + """ + + + Parameters + ---------- + listofiso : TYPE list + DESCRIPTION. list of smiles for isomers, chiral centers indicated with @ or @@ + + Returns + ------- + listofisoclean : TYPE list + DESCRIPTION. list of smiles for isomers, chiral centers indicated with @ or @@, no + double structure + """ + already=[] + cleaniso=[] + for iso in listofiso: + if iso not in already: + cleaniso.append(iso) + already.append(iso) + already.append(inverseiso(iso)) + return cleaniso + +def inverseiso(isomer): + """ + + + Parameters + ---------- + isomer : TYPE str + DESCRIPTION. + + Returns + ------- + isomer : TYPE str + DESCRIPTION. @ and @@ inverted + + """ + isomer=isomer.replace("@@","XXX") + isomer=isomer.replace("@","XO") + isomer=isomer.replace("XXX","@") + isomer=isomer.replace("XO","@@") + + return isomer + +def searchmatch(Rsmi,Psmi,C): + """ + + Parameters + ---------- + Rsmi : TYPE str + DESCRIPTION. SMILES of reactant + Psmi : TYPE str + DESCRIPTION. SMILES of the product + C : TYPE integer + DESCRIPTION. size of the TScycle + + Returns + ------- + TMTSmodel : TYPE dic + DESCRIPTION. dictionary cointaining the reaction description as key + (format "Rmiles>>Psmiles") and the list of Arrhenius parameters as + descriptions [A (s-1),n, Ea(cal/mol) and Lc] + /!\ Return None if the reaction is not covered by ouyr database + """ + Rmol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Rsmi))) + natomR=Rmol.GetNumAtoms() + Pmol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Psmi))) + natomP=Pmol.GetNumAtoms() + if C==3: + kinetic=iso_alkyl_C3 + elif C==4: + kinetic=iso_alkyl_C4 + elif C==5: + kinetic=iso_alkyl_C5 + elif C==6: + kinetic=iso_alkyl_C6 + else: + return None,0 + nmatch=0 + TMTSmodel={} + wincase="" + match=0 + for case in kinetic: + [Rmodel,Pmodel]=case.split(">>") + RmodelMol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Rmodel))) + PmodelMol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Pmodel))) + ntestR=len(Rmol.GetSubstructMatch(RmodelMol)) + ntestP=len(Pmol.GetSubstructMatch(PmodelMol)) + if ntestR==ntestP: + if ntestR>nmatch: + match=ntestR/natomR + nmatch=ntestR + wincase=case + if wincase not in TMTSmodel: + TMTSmodel[wincase]=kinetic[wincase] + return TMTSmodel,match + else: + return None,0 + +def cyclesize(Rsmiles,Psmiles): + """ + Parameters + ---------- + Rsmiles : TYPE str + DESCRIPTION. Smiles of the reactant species, it can be canonical or not + Psmiles : TYPE str + DESCRIPTION. Smiles of the product species, it can be canonical or not + + Returns + ------- + path : TYPE tuple + DESCRIPTION. tuple containing the indexes of atoms being part of the TS cycle + cyclesize : TYPE integer + DESCRIPTION. size of the TS cycle + + """ + Rsmilescanonical=Chem.MolToSmiles(Chem.MolFromSmiles(Rsmiles)) + Psmilescanonical=Chem.MolToSmiles(Chem.MolFromSmiles(Psmiles)) + RMol=Chem.MolFromSmiles(Rsmilescanonical) + PMol=Chem.MolFromSmiles(Psmilescanonical) + alkane=Chem.MolFromSmiles(equivalentalkane(Rsmilescanonical)) + ridxR=radatomindex(Rsmilescanonical) + ridxP=radatomindex(Psmilescanonical) + path=[] + cyclesize=[] + rpositionsR,dicR=equivalencelist(Rsmilescanonical) + rpositionsP,dicP=equivalencelist(Psmilescanonical) + for atomR in rpositionsR: + for atomP in rpositionsP: + if atomP!=atomR: + path.append(Chem.rdmolops.GetShortestPath(alkane,atomR,atomP)) + path=cleanpath(path,dicR) + cyclesize=[len(path[i])+1 for i in range(0,len(path))] + return path,cyclesize + +def cleanpath(path,dicR): + """ + + Parameters + ---------- + path : TYPE list + DESCRIPTION. list of tuples containing the atoms indexes of a isomerisation TS cycle + dicR : TYPE dictionary + DESCRIPTION. tom indexes (keys) with their respective cannonical classification + + Returns + ------- + path: TYPE list of tuples + DESCRIPTION. list of tuples containing the atoms indexes of a isomerisation TS cycle, + but this times cleaned from equivalent ones + + """ + cleanpath=[] + aux=[] + for p in path: + equivalent=[dicR[p[i]] for i in range(0,len(p))] + if equivalent not in aux: + cleanpath.append(p) + aux.append(equivalent) + return cleanpath + +def equivalencelist(radsmiles): + """ + Parameters + ---------- + radsmiles : str + DESCRIPTION. smiles of a radical alkyl + + Returns + ------- + alleq : list + DESCRIPTION. atom indexes of the equivalent positions for the radical center + dic: dictionary + DESCRIPTION. atom indexes (keys) with their respective cannonical classification + """ + nonradsmiles=equivalentalkane(radsmiles) + Mol=Chem.MolFromSmiles(nonradsmiles) + listofequivalents=list(Chem.CanonicalRankAtoms(Mol, breakTies=False)) + radMol=Chem.MolFromSmiles(radsmiles) + radicalidx=radatomindex(radsmiles) + canradical=listofequivalents[radicalidx] + alleq=[i for i in range(0,len(listofequivalents)) if listofequivalents[i]==canradical] + dic={} + for i in range(0,len(listofequivalents)): + dic[i]=listofequivalents[i] + return alleq,dic + +def radatomindex(radsmiles): + """ + + Parameters + ---------- + radsmiles : string + DESCRIPTION. simplified molecular input line entry specification + it can be canonical or not + /!\ it must contain only ONE radical site, otherise the function will + return None + Returns + ------- + indxrad: integer + DESCRIPTION. index of the atom in the alkane equivalent that contains + the radical site + """ + indxradR=-1 + alkane=equivalentalkane(radsmiles) + Rmol=Chem.MolFromSmiles(radsmiles) + Amol=Chem.MolFromSmiles(alkane) + for atom in Amol.GetAtoms(): + atom.SetNumRadicalElectrons(1) + if len(Rmol.GetSubstructMatch(Amol))==len(Rmol.GetAtoms()): + indxradR=atom.GetIdx() + atom.SetNumRadicalElectrons(0) + return indxradR + +# def radicalatomindex(smiles): +# """ + +# Parameters +# ---------- +# smiles : string +# DESCRIPTION. simplified molecular input line entry specification +# it can be canonical or not +# /!\ it must contain only ONE radical site, otherise the function will +# return None +# Returns +# ------- +# radical : integer +# DESCRIPTION. index of the atom that contains the radical +# if the species contain moe than 1 radicla site, it will return None +# """ +# mol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(smiles))) +# r=Chem.MolToSmiles(mol) +# radical=[] +# for atom in mol.GetAtoms(): +# n=atom.GetNumRadicalElectrons() +# if n>0 : +# radical.append(atom.GetIdx()) +# if len(radical)==1: +# return radical[0] +# else: +# radical=None +# return radical + +def equivalentalkane(radicalsmiles): + """ + + Parameters + ---------- + radicalsmiles : str + DESCRIPTION.simplified molecular input line entry specification of a radical + species, it can be canonical or not + + Returns + ------- + newsmiles2 : str + DESCRIPTION. simplified molecular input line entry specification + of the equivalent non-radical species + + #important observation: as the Mol in RDkits are C++ objects, + # it is hard to create a copy of them in python, + # so I decide to create functions that take smiles all the time + # it can be done with xyz coordinates or others, but smiles are + # simpler to me + """ + + radicalmol=Chem.MolFromSmiles(radicalsmiles) + radicalmol=Chem.AddHs(radicalmol) + for atom in radicalmol.GetAtoms(): + # print("numeroatom", atom.GetIdx()) + # print("radical electron", atom.GetNumRadicalElectrons()) + if atom.GetNumRadicalElectrons()!=0: + #print('esse entrou') + atom.SetNumRadicalElectrons(0) + atom.SetFormalCharge(0) + sminew=Chem.MolToSmiles(radicalmol) + newsmiles=Chem.MolToSmiles(Chem.MolFromSmiles(sminew)) + + return newsmiles + +def count_Heq(radsmiles): + """ + Parameters + ---------- + radsmiles : str + DESCRIPTION.simplified molecular input line entry specification of a radical + species, it can be canonical or not + + Returns + ------- + nH : int + DESCRIPTION. number of H in the equivalent position + + """ + nH=1 + alkaneeq=equivalentalkane(radsmiles) + Molcomplete=Chem.AddHs(Chem.MolFromSmiles(alkaneeq)) + indrad=radatomindex(radsmiles) + for bond in Molcomplete.GetBonds(): + if bond.GetBeginAtomIdx()==indrad: + if Molcomplete.GetAtomWithIdx(bond.GetEndAtomIdx()).GetAtomicNum()==1: + Hpos=int(bond.GetEndAtomIdx()) + + #Radcomplete=Chem.AddHs(radMol) + # for atom in Molcomplete.GetAtoms(): + # if atom.GetAtomicNum()==1: + # print('found an H') + # with Chem.RWMol(Molcomplete) as mw: + # mw.RemoveAtom(atom.GetIdx()) + # moltest=Chem.MolFromSmiles(Chem.MolToSmiles(mw)) + # print(Chem.MolToSmiles(Chem.RemoveHs(moltest))) + # print(Chem.MolToSmiles(radMol)) + # if len(moltest.GetSubstructMatch(radMol))==len(radMol.GetAtoms()): + # print('found THE H') + # Hpos=int(atom.GetIdx()) + + + listofequivalents=list(Chem.CanonicalRankAtoms(Molcomplete, breakTies=False)) + nH=listofequivalents.count(listofequivalents[Hpos]) + + return nH + +def getnonradicalfrag(smiles): + """ + + + Parameters + ---------- + smiles : TYPE str + DESCRIPTION. smiles containning a fragmented radical + + Returns + ------- + smi : TYPE str + DESCRIPTION. smiles of the non radical fragment + + """ + + listofsmiles=smiles.split(".") + for smi in listofsmiles: + if "[" not in smi and "]" not in smi: + return smi + +def TSdic(path,R1): + """ + + Parameters + ---------- + path : TYPE tuple + DESCRIPTION. indexes of the atoms in the TS cycle + R1 : TYPE Mol object, from RDkit + DESCRIPTION. radical reactant + + Returns + ------- + TSdic : TYPE dic + DESCRIPTION. dictionary containing the substituents of each atom on the cycle, follow the format: + TSDic={ind:[0,0],id:["CCC",0]}.. + + """ + TSdic={} + for a in path: + bonds=[] + fragsa=[] + atom=R1.GetAtomWithIdx(a) + #print("atom ",a) + for x in atom.GetNeighbors(): + if x.GetIdx() not in path: + #print("bonded to",x.GetIdx(),"with bond number", R1.GetBondBetweenAtoms(atom.GetIdx(),x.GetIdx()).GetIdx()) + bonds.append(R1.GetBondBetweenAtoms(atom.GetIdx(),x.GetIdx()).GetIdx()) + if len(bonds)>0: + frag=Chem.FragmentOnSomeBonds(R1,tuple(bonds),addDummies=False) + for chem in frag: + chemfrag=Chem.MolToSmiles(chem) + newchemfrag=getnonradicalfrag(chemfrag) + fragsa.append(copy.deepcopy(newchemfrag)) + else: + fragsa=[0,0] + if len(fragsa)==2: + TSdic[a]=fragsa + else: + fragsa.append(0) + TSdic[a]=fragsa + return TSdic + +def mol_with_atom_index(mol): + for atom in mol.GetAtoms(): + atom.SetAtomMapNum(atom.GetIdx()) + # for bond in mol.GetBonds(): + # bond.SetBondMapNum(bond.GetIdx()) + return mol + + +def creer_solutions(liste,pivot,arbre,longueur): + if pivot < longueur: + filsg = Node(liste[pivot],arbre) + filsd = Node(list(reversed(liste[pivot])),arbre,) + pivot += 1 + creer_solutions(liste,pivot,filsg,longueur) + creer_solutions(liste,pivot,filsd,longueur) + +def planinversion(subslist): + newlist=[list(reversed(i)) for i in subslist] + return newlist + +def middleCinversion(sublist): + return list(reversed(sublist)) + +def createTS(liste): + liste.insert(0,0) + pivot=1 + root = Node(liste[0]) + longueur=len(liste) + creer_solutions(liste, pivot, root, longueur) + cnodes = list(PreOrderIter(root, filter_=lambda node: node.is_leaf)) + combinaisons = [str(i)[7:-2].split(i.separator) for i in cnodes] + combinaisons=[comb[1:] for comb in combinaisons] + c=[] + for comb in combinaisons: + comb=[ast.literal_eval(i) for i in comb] + c.append(copy.deepcopy(comb)) + uniquesubs=[] + for comb in c: + if len(comb)<=3: + combinvplan=planinversion(comb) + combinvC=middleCinversion(comb) + doubleinv=planinversion(combinvC) + if (combinvplan not in uniquesubs) and (combinvC not in uniquesubs) and (comb not in uniquesubs) and (doubleinv not in uniquesubs): + uniquesubs.append(copy.deepcopy(comb)) + elif len(comb)>=4: + combinvC=middleCinversion(comb) + if combinvC not in uniquesubs and (comb not in uniquesubs): + uniquesubs.append(copy.deepcopy(comb)) + return uniquesubs + +def samesubextremes(subsTS): + """ + + + Parameters + ---------- + subsTS : TYPE list + DESCRIPTION. + + Returns + ------- + same type Boolean + + + """ + p1=subsTS[0] + p2=subsTS[-1] + if p1==list(reversed(p1)) and p2==list(reversed(p2)): + return True + else: + return False + + +RDLogger.DisableLog('rdApp.*') #this is only for avoiding RdKit warnings +# examples of how to use those functions: +# v +#arr=matchreaction_BS("CC(C)(C)CC([CH2])CC(C)(C)C",["[H]","CC(C)(C)CC(=C)CC(C)(C)C"]) +#arr=matchreaction_iso("CC(C)(C)[CH]C(C)CC(C)(C)C","[CH2]C(C)(C)CC(C)CC(C)(C)C") +#arr=matchreaction_recomb(["CC(C)(C)CC([CH2])CC(C)(C)C","[H]"],'CC(C)(C)CC(C)CC(C)(C)C') +# important comment: make sure to check units before using.