Skip to content
Snippets Groups Projects
Commit 54f7836b authored by BOULANGEOT Nathan's avatar BOULANGEOT Nathan
Browse files

Update 17 files

- /Minimum Working Exemple/.gitkeep
- /Minimum Working Exemple/AEM_image.py
- /Minimum Working Exemple/Ag111_AEM.pdf
- /Minimum Working Exemple/POSCAR
- /Minimum Working Exemple/run.py
- /Minimum Working Exemple/train.traj
- /codes/create_descriptor.py
- /codes/position_selection.py
- /Minimum_Working_Exemple/AEM_image.py
- /Minimum_Working_Exemple/.gitkeep
- /Minimum_Working_Exemple/POSCAR
- /Minimum_Working_Exemple/Ag111_AEM.pdf
- /Minimum_Working_Exemple/train.traj
- /codes/tool.py
- /README.md
- /position_selection.py
- /run.py
parent b9f4ac4d
Branches
No related tags found
No related merge requests found
File moved
File moved
...@@ -22,7 +22,7 @@ pip install -r requirements.txt ...@@ -22,7 +22,7 @@ pip install -r requirements.txt
This code purpose is to recreate Energy Adsorption Map (EAM) using machine learning. This code purpose is to recreate Energy Adsorption Map (EAM) using machine learning.
running file "run.py" will train a model using an ase ".traj" object as a training set. It then predicts on a n $\times$ n regular grid above the POSCAR file the adsorption energy. Results are written on a ".txt" file. running file "run.py" will train a model using an ase "train.traj" object as a training set. It then predicts on a n $\times$ n regular grid above the POSCAR file the adsorption energy. Results are written on "resultat.txt".
train set file, POSCAR file, n value as well as parameters for the SOAP and Gaussian Process Regression can be modify in the "run.py" file. train set file, POSCAR file, n value as well as parameters for the SOAP and Gaussian Process Regression can be modify in the "run.py" file.
...@@ -30,14 +30,21 @@ The code to create AEM image like "Ag111_AEM.pdf" is also given and uses matplot ...@@ -30,14 +30,21 @@ The code to create AEM image like "Ag111_AEM.pdf" is also given and uses matplot
## Data sets ## Data sets
Two data bases are given in this git. The minimum working exemple is focused on the adsorption of Hydrogen on Ag(111). We also give the data for Hydrogen adsorption on Al<sub>13</sub>Co<sub>4</sub>(100) on wich the related [paper](https://doi.org/10.1021/acs.jctc.4c00367) focuses. Two data bases are given in this git. The minimum working exemple is focused on the adsorption of Hydrogen on Ag(111), its corresponding train set (3 Hydrogen positions relaxed by DFT) is used by default when running "run.py". We also give the data for Hydrogen adsorption on Al<sub>13</sub>Co<sub>4</sub>(100) on wich the related [paper](https://doi.org/10.1021/acs.jctc.4c00367) focuses.
The data "final.traj" located in the folder "data_Al13Co4" consist of the 20 $\times$ 20 positions above the surface where DFT relaxation is done. In order to recreate results obtained in the [paper](https://doi.org/10.1021/acs.jctc.4c00367), one may create a "train.traj" from a fraction of the "final.traj", then run the code as similarly done in the minimum working exemple. The data "final.traj" located in the folder "data_Al13Co4" consist of the 20 $\times$ 20 positions above the surface where DFT relaxation is done. In order to recreate results obtained in the [paper](https://doi.org/10.1021/acs.jctc.4c00367), one may create a "train.traj" from a fraction of the "final.traj". In order to select the positions run the code "position_selection.py" which uses the Farthest Point Sampling method ; you can change the value "ncomp" in the first line of this code to choose the number of position selected. Once the "train.traj" is create, you can run code for Al<sub>13</sub>Co<sub>4</sub>(100) after changing the directory in "run.py".
## Citation ## Citation
@inproceedings{Agazzotti23deep,
author = {Boulangeot, Nathanand Brix, Florian and Sur, Frédéric and Gaudry, Émilie},
title = {Hydrogen, oxygen and lead adsorbates onAl13Co4(100) : accurate potential energysurfaces at low computational cost bymachine learning and DFT-based data},
journal = {Journal of Chemical Theory and Computation},
year = {2024},
doi = {10.1021/acs.jctc.4c00367}
}
```
......
import numpy as np
from ase.io import Trajectory
from dscribe.descriptors import SOAP
import os
import ase
class create_descriptor():
def __init__(self,method='soap',params={'species':['Al','Co','H'],'l_max':2,'n_max':2,'r_cut':7},ats=0):
self.ats=ats
self.method=method
if method == 'soap':
self.decr=SOAP(periodic=True,**params)
self.title=method+'_l'+str(params['l_max'])+'_n'+str(params['n_max'])+'_r'+str(params['r_cut'])
def create(self,atoms,load=False,save_file=None):
if load and os.path.exists(save_file+'/'+self.title+'.npy'):
X=np.load(save_file+'/'+self.title+'.npy')
else:
X=np.squeeze(self.decr.create(atoms,centers=[[self.ats]]*len(atoms)))
if load:
np.save(save_file+'/'+self.title+'.npy',X)
return X
\ No newline at end of file
...@@ -14,9 +14,23 @@ from sklearn.model_selection import GridSearchCV ...@@ -14,9 +14,23 @@ from sklearn.model_selection import GridSearchCV
from joblib import Parallel, delayed from joblib import Parallel, delayed
from ase import neighborlist from ase import neighborlist
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import StandardScaler
from create_descriptor import *
class create_descriptor():
def __init__(self,method='soap',params={'species':['Al','Co','H'],'l_max':2,'n_max':2,'r_cut':7},ats=0):
self.ats=ats
self.method=method
if method == 'soap':
self.decr=SOAP(periodic=True,**params)
self.title=method+'_l'+str(params['l_max'])+'_n'+str(params['n_max'])+'_r'+str(params['r_cut'])
def create(self,atoms,load=False,save_file=None):
if load and os.path.exists(save_file+'/'+self.title+'.npy'):
X=np.load(save_file+'/'+self.title+'.npy')
else:
X=np.squeeze(self.decr.create(atoms,centers=[[self.ats]]*len(atoms)))
if load:
np.save(save_file+'/'+self.title+'.npy',X)
return X
def sign(x): def sign(x):
if x<0: if x<0:
......
...@@ -4,14 +4,30 @@ from sklearn.metrics import pairwise_kernels ...@@ -4,14 +4,30 @@ from sklearn.metrics import pairwise_kernels
from ase.io import Trajectory from ase.io import Trajectory
from scipy.spatial import distance_matrix from scipy.spatial import distance_matrix
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import StandardScaler
from create_descriptor import * from codes.tool import *
def retrieve_pos_ind(traj,atom=0):
postrain=None
ind_split=[]
for ind,atoms in enumerate(traj):
pos=atoms.get_positions()[atom][:2]
if postrain is None:
postrain=[pos]
count=0
elif any(np.equal(postrain,pos).all(1)):
pass
else:
postrain=np.vstack((postrain,pos))
ind_split.append([count,ind-1])
count=ind
ind_split.append([count,len(traj)-1])
return np.array(ind_split)
def matrice_farthest_search(points, k,mode='euclidian',gamma=1e-5,n=3): def matrice_farthest_search(points, k,mode='euclidian',gamma=1e-5,n=3):
if mode is 'euclidian': if mode == 'euclidian':
def distance_m(a): def distance_m(a):
return distance_matrix(a,a) return distance_matrix(a,a)
if mode is 'rbf': if mode == 'rbf':
def distance_m(a): def distance_m(a):
DM=pairwise_kernels(a,a,metric='rbf',gamma=gamma) DM=pairwise_kernels(a,a,metric='rbf',gamma=gamma)
return 1-DM return 1-DM
...@@ -30,57 +46,44 @@ def matrice_farthest_search(points, k,mode='euclidian',gamma=1e-5,n=3): ...@@ -30,57 +46,44 @@ def matrice_farthest_search(points, k,mode='euclidian',gamma=1e-5,n=3):
rem=indiceleft.pop(ind_rem) rem=indiceleft.pop(ind_rem)
indices.append(rem) indices.append(rem)
solution_set.append(max(a)) solution_set.append(max(a))
solution_set=np.array(solution_set) return np.array(indices)
return solution_set, indices
if __name__ == '__main__': def train_from_pos(ncomp,data_folder='data_Al13Co4',traj='final.traj', atomseul='H'):
#debut de l initialisation os.chdir(data_folder)
path=os.getcwd()
atomseul='H' #atome depose
traj='final.traj' #bdd 1
data_folder='data'
gamma=1e-5
sys=Trajectory(traj) sys=Trajectory(traj)
species=[] species=[]
elem=sys[0].get_chemical_symbols() elem=sys[0].get_chemical_symbols()
#identification de l atome seul dans la structure #adsorbate index
for el in range(len(elem)): for el in range(len(elem)):
if elem[el] not in species: if elem[el] not in species:
species.append(elem[el]) species.append(elem[el])
if elem[el]== atomseul: if elem[el]== atomseul:
ats=el ats=el
# bases entieres # full data set
params={'species':species,'l_max':2,'n_max':2,'r_cut':7} params={'species':species,'l_max':3,'n_max':6,'r_cut':6}
desc=create_descriptor(method='soap',params=params,ats=ats) desc=create_descriptor(method='soap',params=params,ats=ats)
X_ini=desc.create(traj,load=True,save_file=data_folder) X_ini=desc.create(sys,load=False)
# bases poscars # positions are compared in their initial configurations (POSCAR)
index_pos_100=np.load(data_folder+'/ind_pos.npy') # !! to discriminate positions in the "final.traj" data set, the function
# retrieve_pos_ind look for diffences in adsorbate (x, y) values.
index_pos_100=retrieve_pos_ind(sys)
X_poscar=[X_ini[i[0]] for i in index_pos_100] X_poscar=[X_ini[i[0]] for i in index_pos_100]
len100=len(X_poscar)
print(len100)
#scaling #scaling
scaler=StandardScaler() scaler=StandardScaler()
scaler.fit(X_poscar) scaler.fit(X_poscar)
X_poscar_T=scaler.transform(X_poscar) X_poscar_T=scaler.transform(X_poscar)
ncomp=144 indices=matrice_farthest_search(X_poscar_T, ncomp,)
#initialisation print(indices)
distances,indices=matrice_farthest_search(X_poscar_T, ncomp,) train_set=Trajectory('train.traj','w')
np.save("distances.npy",distances) for ind in indices:
for i in range(1,145): #write the whole DFT relaxation for each position selected
if i in [4,9,16,25,36,49,64,81,100,]: for i in range(index_pos_100[ind,0],index_pos_100[ind,1]+1):
#resultpath train_set.write(sys[i])
resultpath=path+'/'+str(i) train_set.close()
try:
os.mkdir(resultpath) if __name__ == '__main__':
except: ncomp=64
pass train_from_pos(ncomp,data_folder='data_Al13Co4',traj='final.traj', atomseul='H')
#save \ No newline at end of file
print(len(indices[:i]))
np.save(resultpath+'/indices.npy',indices[:i])
...@@ -13,11 +13,12 @@ from sklearn.model_selection import train_test_split, learning_curve, validation ...@@ -13,11 +13,12 @@ from sklearn.model_selection import train_test_split, learning_curve, validation
from joblib import Parallel, delayed from joblib import Parallel, delayed
from ase import neighborlist from ase import neighborlist
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import StandardScaler
from create_descriptor import * from codes.tool import *
from tool import *
if __name__ == '__main__': if __name__ == '__main__':
#initialisation #initialisation
directory='Minimum_Working_Exemple'
os.chdir(directory) #go where the train set (train.traj) and POSCAR are.
atomseul='H' #atome adsorbed atomseul='H' #atome adsorbed
poscar_file='POSCAR' #slab with no adsorbate poscar_file='POSCAR' #slab with no adsorbate
data_folder='.' #folder where decriptors are saved data_folder='.' #folder where decriptors are saved
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment