Skip to content
Snippets Groups Projects
Commit fe1a622d authored by CitrangoloDestro Fabiola's avatar CitrangoloDestro Fabiola
Browse files

Upload New File

parent b68ee131
No related branches found
No related tags found
No related merge requests found
# -*- coding: utf-8 -*-
"""
Created on Fri Mar 15 11:06:06 2024
"""
from rdkit import Chem
from rdkit.Chem import rdmolfiles
from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers, StereoEnumerationOptions,GetStereoisomerCount
from rdkit.Chem.rdMolDescriptors import CalcNumAtomStereoCenters
from rdkit.Chem import AllChem
from _kinetic import iso_alkyl_C3,iso_alkyl_C4,iso_alkyl_C5,iso_alkyl_C6
import subprocess as sub
import copy
from anytree import Node, RenderTree, PreOrderIter
import ast,copy
import os
def matchreaction(Rsmiles,Psmiles):
"""
Parameters
----------
Rsmiles : TYPE str
DESCRIPTION. Smiles of the reactant species, it can be canonical or not
Psmiles : TYPE str
DESCRIPTION. Smiles of the product species, it can be canonical or not
Returns
-------
Arrhenius : TYPE list
DESCRIPTION. list of dictionaries containing the reaction TMTS model with the
three Arrhenius paramters for the reaction and the statistical factor,
as the reaction may happen by different pathways
/!\ if no pathway is identified, then a None will be returned
"""
pathways,cs= cyclesize(Rsmiles,Psmiles)
#check if it was possible to find a pathway and a cyclesize:
if len(cs)==0 or len(pathways)==0:
return None
Arrhenius=[]
for i in range(0,len(cs)):
cycle=cs[i]
# print(cycle)
path=pathways[i]
TMTSmodel,match= searchmatch(Rsmiles,Psmiles,cycle)
#print(TMTSmodel)
if match==1:
Arrhenius.append(TMTSmodel)
else:
#here, we need to call the code for calculating the correction factor
#for the statistical factor
Arrhenius.append(refiningarrhenius(TMTSmodel,Rsmiles,Psmiles,path,cycle))
return Arrhenius
def refiningarrhenius(TMTSmodel,Rsmiles,Psmiles,path,cycle):
"""
Parameters
----------
TMTSmodel : TYPE dic
DESCRIPTION. dictionary cointaining the reaction description as key
(format "Rmiles>>Psmiles") and the list of Arrhenius parameters as
descriptions [A (s-1),n, Ea(cal/mol) and Lc]
Rsmiles : TYPE str
DESCRIPTION. Smiles of the reactant species, it can be canonical or not
Psmiles : TYPE str
DESCRIPTION.Smiles of the reactant species, it can be canonical or not
Returns
-------
TMTSmodel : TYPE. dic
DESCRIPTION. corrected with a list of Arrhenius parameters after the correction factor is applied
"""
print(TMTSmodel )
for model in TMTSmodel:
Lcmodel=TMTSmodel[model][-1]
fc=1/float(Lcmodel)
Rsmiles=Rsmiles.replace("@","")
Rmol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Rsmiles)))
nisoR=CalcNumAtomStereoCenters(Rmol)+1
opts = StereoEnumerationOptions(unique=True)
isomers = tuple(EnumerateStereoisomers(Rmol, options=opts))
isomerssmi=[]
for smi in sorted(Chem.MolToSmiles(x,isomericSmiles=True) for x in isomers):
isomerssmi.append(smi)
isomerssmi=cleanisomers(isomerssmi)
NR=len(isomerssmi)
isomersextsym=[]
for isomer in isomerssmi:
moliso=Chem.MolFromSmiles(isomer)
m3= Chem.AddHs(moliso)
AllChem.EmbedMolecule(m3)
AllChem.MMFFOptimizeMolecule(m3)
rdmolfiles.MolToPDBFile(m3,"C:/pwt/sym.pdb",flavor=16)
file = open("C:/pwt/sym.pdb","w+")
textinput = file.read()
textinputcorrige="COMPND C:/pwt/base.xyz\nAUTHOR GENERATED BY OPEN BABEL 3.1.0\n"+textinput
file.write(textinputcorrige)
callplatonsym()
extsymiso=tradsym(foundsym("C:/pwt/sym.lis"))
isomersextsym.append(copy.deepcopy(extsymiso))
if len(isomersextsym)==1:
sym=isomersextsym[0]
LR=[sym/nisoR]
else:
LR=[sym/2 for sym in isomersextsym]
# now we will create a dictionary of TS based on the structures:
TS=TSdic(path,Rmol)
subsTS=[TS[i] for i in TS]
uniquecombofsubs=createTS(subsTS)
NTS=len(uniquecombofsubs)
LTSs=[]
#print(uniquecombofsubs)
for ts in uniquecombofsubs:
nisoTS=2
symTS=1
#verifyif nisoTS=1
if cycle <=4 or cycle ==6:
if samesubextremes(ts)==True:
nisoTS=1
invinvTS=middleCinversion(planinversion(ts))
if invinvTS==ts:
symTS=2
LTS=nisoTS/symTS
LTSs.append(copy.deepcopy(LTS))
print(LR)
print(LTSs)
Lmodel=[]
i=NR
for LTS in LTSs:
j=i%NR
Lmodeli=LTS*LR[j]
i+=1
Lmodel.append(copy.deepcopy(Lmodeli))
Lmodelfinal=sum(Lmodel)/NR
fc=fc*Lmodelfinal
print(fc)
TMTSmodel[model]=[str(fc*float(TMTSmodel[model][0])),TMTSmodel[model][1],TMTSmodel[model][2],str(Lmodelfinal)]
return TMTSmodel
def symfromsmi(smiles):
"""
Parameters
----------
smiles : TYPE str
DESCRIPTION. smiles of the molecule
Returns
-------
sym: TYPE float
DESCRIPTION. integer with the symmetry value
"""
moliso=Chem.MolFromSmiles(smiles)
m3= Chem.AddHs(m2)
AllChem.EmbedMolecule(m3)
AllChem.MMFFOptimizeMolecule(m3)
rdmolfiles.MolToPDBFile(m3,"C:/pwt/sym.pdb",flavor=16)
file = open("C:/pwt/sym.pdb","r")
textinput = file.read()
file.close()
file2=open("C:/pwt/sym.pdb","w")
textinputcorrige="COMPND C:/pwt/base.xyz\nAUTHOR GENERATED BY OPEN BABEL 3.1.0\n"+textinput
textinputcorrige=textinputcorrige.replace(" H "," F ")
file2.write(textinputcorrige)
file2.close()
callplatonsym()
symiso=tradsym(foundsym("C:/pwt/sym.lis"))
def callplatonsym():
newpath = "C:\pwt"
os.chdir(newpath)
proc = sub.Popen(["platon", "-o", "sym.pdb\n"],
stdin=sub.PIPE, stdout=sub.PIPE, shell=True, text=True)
output2, errors = proc.communicate(input="NONSYM\nexit\n")
def foundsym(filename):
symmetry = "C1"
symfile = open(filename, "r")
text = symfile.read()
lines = text.splitlines()
for i in range(0, len(lines)):
if lines[i][0:6] == "Resd #":
data = lines[i+2].split()
tol = float(data[9])
if tol == 0.1:
symmetry = str(data[6])
symfile.close()
return symmetry
def tradsym(symmetry):
extsym = 1
if symmetry == "C1" or symmetry == "Cs":
extsym = 1
elif symmetry == "C2" or symmetry == "C2v":
extsym = 2
elif symmetry == "C3v":
extsym = 3
elif symmetry == "D2h":
extsym = 4
elif symmetry == "D3h" or symmetry == "D3d":
extsym = 6
elif symmetry == "D5h":
extsym = 10
elif symmetry == "Td":
extsym = 12
elif symmetry == "Oh":
extsym = 24
return extsym
def cleanisomers(listofiso):
"""
Parameters
----------
listofiso : TYPE list
DESCRIPTION. list of smiles for isomers, chiral centers indicated with @ or @@
Returns
-------
listofisoclean : TYPE list
DESCRIPTION. list of smiles for isomers, chiral centers indicated with @ or @@, no
double structure
"""
already=[]
cleaniso=[]
for iso in listofiso:
if iso not in already:
cleaniso.append(iso)
already.append(iso)
already.append(inverseiso(iso))
return cleaniso
def inverseiso(isomer):
"""
Parameters
----------
isomer : TYPE str
DESCRIPTION.
Returns
-------
isomer : TYPE str
DESCRIPTION. @ and @@ inverted
"""
isomer=isomer.replace("@@","XXX")
isomer=isomer.replace("@","XO")
isomer=isomer.replace("XXX","@")
isomer=isomer.replace("XO","@@")
return isomer
def searchmatch(Rsmi,Psmi,C):
"""
Parameters
----------
Rsmi : TYPE str
DESCRIPTION. SMILES of reactant
Psmi : TYPE str
DESCRIPTION. SMILES of the product
C : TYPE integer
DESCRIPTION. size of the TScycle
Returns
-------
TMTSmodel : TYPE dic
DESCRIPTION. dictionary cointaining the reaction description as key
(format "Rmiles>>Psmiles") and the list of Arrhenius parameters as
descriptions [A (s-1),n, Ea(cal/mol) and Lc]
/!\ Return None if the reaction is not covered by ouyr database
"""
Rmol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Rsmi)))
natomR=Rmol.GetNumAtoms()
Pmol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Psmi)))
natomP=Pmol.GetNumAtoms()
if C==3:
kinetic=iso_alkyl_C3
elif C==4:
kinetic=iso_alkyl_C4
elif C==5:
kinetic=iso_alkyl_C5
elif C==6:
kinetic=iso_alkyl_C6
else:
return None,0
nmatch=0
TMTSmodel={}
wincase=""
match=0
for case in kinetic:
[Rmodel,Pmodel]=case.split(">>")
RmodelMol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Rmodel)))
PmodelMol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(Pmodel)))
ntestR=len(Rmol.GetSubstructMatch(RmodelMol))
ntestP=len(Pmol.GetSubstructMatch(PmodelMol))
if ntestR==ntestP:
if ntestR>nmatch:
match=ntestR/natomR
nmatch=ntestR
wincase=case
if wincase not in TMTSmodel:
TMTSmodel[wincase]=kinetic[wincase]
return TMTSmodel,match
else:
return None,0
def cyclesize(Rsmiles,Psmiles):
"""
Parameters
----------
Rsmiles : TYPE str
DESCRIPTION. Smiles of the reactant species, it can be canonical or not
Psmiles : TYPE str
DESCRIPTION. Smiles of the product species, it can be canonical or not
Returns
-------
path : TYPE tuple
DESCRIPTION. tuple containing the indexes of atoms being part of the TS cycle
cyclesize : TYPE integer
DESCRIPTION. size of the TS cycle
"""
Rsmilescanonical=Chem.MolToSmiles(Chem.MolFromSmiles(Rsmiles))
Psmilescanonical=Chem.MolToSmiles(Chem.MolFromSmiles(Psmiles))
RMol=Chem.MolFromSmiles(Rsmilescanonical)
PMol=Chem.MolFromSmiles(Psmilescanonical)
alkane=Chem.MolFromSmiles(equivalentalkane(Rsmilescanonical))
ridxR=radicalatomindex(Rsmilescanonical)
ridxP=radicalatomindex(Psmilescanonical)
path=[]
cyclesize=[]
rpositionsR,dicR=equivalencelist(Rsmilescanonical)
rpositionsP,dicP=equivalencelist(Psmilescanonical)
for atomR in rpositionsR:
for atomP in rpositionsP:
if atomP!=atomR:
path.append(Chem.rdmolops.GetShortestPath(alkane,atomR,atomP))
path=cleanpath(path,dicR)
cyclesize=[len(path[i])+1 for i in range(0,len(path))]
return path,cyclesize
def cleanpath(path,dicR):
"""
Parameters
----------
path : TYPE list
DESCRIPTION. list of tuples containing the atoms indexes of a isomerisation TS cycle
dicR : TYPE dictionary
DESCRIPTION. tom indexes (keys) with their respective cannonical classification
Returns
-------
path: TYPE list of tuples
DESCRIPTION. list of tuples containing the atoms indexes of a isomerisation TS cycle,
but this times cleaned from equivalent ones
"""
cleanpath=[]
aux=[]
for p in path:
equivalent=[dicR[p[i]] for i in range(0,len(p))]
if equivalent not in aux:
cleanpath.append(p)
aux.append(equivalent)
return cleanpath
def equivalencelist(radsmiles):
"""
Parameters
----------
radsmiles : str
DESCRIPTION. smiles of a radical alkyl
Returns
-------
alleq : list
DESCRIPTION. atom indexes of the equivalent positions for the radical center
dic: dictionary
DESCRIPTION. atom indexes (keys) with their respective cannonical classification
"""
nonradsmiles=equivalentalkane(radsmiles)
Mol=Chem.MolFromSmiles(nonradsmiles)
radMol=Chem.MolFromSmiles(radsmiles)
listofequivalents=list(Chem.CanonicalRankAtoms(Mol, breakTies=False))
radicalidx=radicalatomindex(radsmiles)
canradical=listofequivalents[radicalidx]
alleq=[i for i in range(0,len(listofequivalents)) if listofequivalents[i]==canradical]
dic={}
for i in range(0,len(listofequivalents)):
dic[i]=listofequivalents[i]
return alleq,dic
def radicalatomindex(smiles):
"""
Parameters
----------
smiles : string
DESCRIPTION. simplified molecular input line entry specification
it can be canonical or not
/!\ it must contain only ONE radical site, otherise the function will
return None
Returns
-------
radical : integer
DESCRIPTION. index of the atom that contains the radical
if the species contain moe than 1 radicla site, it will return None
"""
mol=Chem.MolFromSmiles(Chem.MolToSmiles(Chem.MolFromSmiles(smiles)))
r=Chem.MolToSmiles(mol)
radical=[]
for atom in mol.GetAtoms():
n=atom.GetNumRadicalElectrons()
if n>0 :
radical.append(atom.GetIdx())
if len(radical)==1:
return radical[0]
else:
radical=None
return radical
def equivalentalkane(radicalsmiles):
"""
Parameters
----------
radicalsmiles : str
DESCRIPTION.simplified molecular input line entry specification of a radical
species, it can be canonical or not
Returns
-------
newsmiles2 : str
DESCRIPTION. simplified molecular input line entry specification
of the equivalent non-radical species
#important observation: as the Mol in RDkits are C++ objects,
# it is hard to create a copy of them in python,
# so I decide to create functions that take smiles all the time
# it can be done with xyz coordinates or others, but smiles are
# simpler to me
"""
radicalmol=Chem.MolFromSmiles(radicalsmiles)
for atom in radicalmol.GetAtoms():
if atom.GetNumRadicalElectrons()!=0:
atom.SetNumRadicalElectrons(0)
newsmiles=Chem.MolToSmiles(radicalmol)
return newsmiles
def getnonradicalfrag(smiles):
"""
Parameters
----------
smiles : TYPE str
DESCRIPTION. smiles containning a fragmented radical
Returns
-------
smi : TYPE str
DESCRIPTION. smiles of the non radical fragment
"""
listofsmiles=smiles.split(".")
for smi in listofsmiles:
if "[" not in smi and "]" not in smi:
return smi
def TSdic(path,R1):
"""
Parameters
----------
path : TYPE tuple
DESCRIPTION. indexes of the atoms in the TS cycle
R1 : TYPE Mol object, from RDkit
DESCRIPTION. radical reactant
Returns
-------
TSdic : TYPE dic
DESCRIPTION. dictionary containing the substituents of each atom on the cycle, follow the format:
TSDic={ind:[0,0],id:["CCC",0]}..
"""
TSdic={}
for a in path:
bonds=[]
fragsa=[]
atom=R1.GetAtomWithIdx(a)
#print("atom ",a)
for x in atom.GetNeighbors():
if x.GetIdx() not in path:
#print("bonded to",x.GetIdx(),"with bond number", R1.GetBondBetweenAtoms(atom.GetIdx(),x.GetIdx()).GetIdx())
bonds.append(R1.GetBondBetweenAtoms(atom.GetIdx(),x.GetIdx()).GetIdx())
if len(bonds)>0:
frag=Chem.FragmentOnSomeBonds(R1,tuple(bonds),addDummies=False)
for chem in frag:
chemfrag=Chem.MolToSmiles(chem)
newchemfrag=getnonradicalfrag(chemfrag)
fragsa.append(copy.deepcopy(newchemfrag))
else:
fragsa=[0,0]
if len(fragsa)==2:
TSdic[a]=fragsa
else:
fragsa.append(0)
TSdic[a]=fragsa
return TSdic
def mol_with_atom_index(mol):
for atom in mol.GetAtoms():
atom.SetAtomMapNum(atom.GetIdx())
# for bond in mol.GetBonds():
# bond.SetBondMapNum(bond.GetIdx())
return mol
def creer_solutions(liste,pivot,arbre,longueur):
if pivot < longueur:
filsg = Node(liste[pivot],arbre)
filsd = Node(list(reversed(liste[pivot])),arbre,)
pivot += 1
creer_solutions(liste,pivot,filsg,longueur)
creer_solutions(liste,pivot,filsd,longueur)
def planinversion(subslist):
newlist=[list(reversed(i)) for i in subslist]
return newlist
def middleCinversion(sublist):
return list(reversed(sublist))
def createTS(liste):
liste.insert(0,0)
pivot=1
root = Node(liste[0])
longueur=len(liste)
creer_solutions(liste, pivot, root, longueur)
cnodes = list(PreOrderIter(root, filter_=lambda node: node.is_leaf))
combinaisons = [str(i)[7:-2].split(i.separator) for i in cnodes]
combinaisons=[comb[1:] for comb in combinaisons]
c=[]
for comb in combinaisons:
comb=[ast.literal_eval(i) for i in comb]
c.append(copy.deepcopy(comb))
uniquesubs=[]
for comb in c:
if len(comb)<=3:
combinvplan=planinversion(comb)
combinvC=middleCinversion(comb)
doubleinv=planinversion(combinvC)
if (combinvplan not in uniquesubs) and (combinvC not in uniquesubs) and (comb not in uniquesubs) and (doubleinv not in uniquesubs):
uniquesubs.append(copy.deepcopy(comb))
elif len(comb)>=4:
combinvC=middleCinversion(comb)
if combinvC not in uniquesubs and (comb not in uniquesubs):
uniquesubs.append(copy.deepcopy(comb))
return uniquesubs
def samesubextremes(subsTS):
"""
Parameters
----------
subsTS : TYPE list
DESCRIPTION.
Returns
-------
same type Boolean
"""
p1=subsTS[0]
p2=subsTS[-1]
if p1==list(reversed(p1)) and p2==list(reversed(p2)):
return True
else:
return False
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment