diff --git a/Minimum Working Exemple/.gitkeep b/Minimum_Working_Exemple/.gitkeep
similarity index 100%
rename from Minimum Working Exemple/.gitkeep
rename to Minimum_Working_Exemple/.gitkeep
diff --git a/Minimum Working Exemple/AEM_image.py b/Minimum_Working_Exemple/AEM_image.py
similarity index 100%
rename from Minimum Working Exemple/AEM_image.py
rename to Minimum_Working_Exemple/AEM_image.py
diff --git a/Minimum Working Exemple/Ag111_AEM.pdf b/Minimum_Working_Exemple/Ag111_AEM.pdf
similarity index 100%
rename from Minimum Working Exemple/Ag111_AEM.pdf
rename to Minimum_Working_Exemple/Ag111_AEM.pdf
diff --git a/Minimum Working Exemple/POSCAR b/Minimum_Working_Exemple/POSCAR
similarity index 100%
rename from Minimum Working Exemple/POSCAR
rename to Minimum_Working_Exemple/POSCAR
diff --git a/Minimum Working Exemple/train.traj b/Minimum_Working_Exemple/train.traj
similarity index 100%
rename from Minimum Working Exemple/train.traj
rename to Minimum_Working_Exemple/train.traj
diff --git a/README.md b/README.md
index 2f4ec3e545a8f029d57e1f87d104a661669ef64d..52d5d8da0f011bb7dd040faa9e89f0adbbc636c2 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ pip install -r requirements.txt
 
 This code purpose is to recreate Energy Adsorption Map (EAM) using machine learning.
 
-running file "run.py" will train a model using an ase ".traj" object as a training set. It then predicts on a n $\times$ n regular grid above the POSCAR file the adsorption energy. Results are written on a ".txt" file.
+running file "run.py" will train a model using an ase "train.traj" object as a training set. It then predicts on a n $\times$ n regular grid above the POSCAR file the adsorption energy. Results are written on "resultat.txt".
 
 train set file, POSCAR file, n value as well as parameters for the SOAP and Gaussian Process Regression can be modify in the "run.py" file.
 
@@ -30,14 +30,21 @@ The code to create AEM image like "Ag111_AEM.pdf" is also given and uses matplot
 
 ## Data sets
 
-Two data bases are given in this git. The minimum working exemple is focused on the adsorption of Hydrogen on Ag(111). We also give the data for Hydrogen adsorption on Al<sub>13</sub>Co<sub>4</sub>(100) on wich the related [paper](https://doi.org/10.1021/acs.jctc.4c00367) focuses.
+Two data bases are given in this git. The minimum working exemple is focused on the adsorption of Hydrogen on Ag(111), its corresponding train set (3 Hydrogen positions relaxed by DFT) is used by default when running "run.py". We also give the data for Hydrogen adsorption on Al<sub>13</sub>Co<sub>4</sub>(100) on wich the related [paper](https://doi.org/10.1021/acs.jctc.4c00367) focuses.
 
-The data "final.traj" located in the folder "data_Al13Co4" consist of the 20 $\times$ 20 positions above the surface where DFT relaxation is done. In order to recreate results obtained in the [paper](https://doi.org/10.1021/acs.jctc.4c00367), one may create a "train.traj" from a fraction of the "final.traj", then run the code as similarly done in the minimum working exemple.
+The data "final.traj" located in the folder "data_Al13Co4" consist of the 20 $\times$ 20 positions above the surface where DFT relaxation is done. In order to recreate results obtained in the [paper](https://doi.org/10.1021/acs.jctc.4c00367), one may create a "train.traj" from a fraction of the "final.traj". In order to select the positions run the code "position_selection.py" which uses the Farthest Point Sampling method ; you can change the value "ncomp" in the first line of this code to choose the number of position selected. Once the "train.traj" is create, you can run code for Al<sub>13</sub>Co<sub>4</sub>(100) after changing the directory in "run.py".
 
 
  ## Citation
 
-
+@inproceedings{Agazzotti23deep,
+author    = {Boulangeot, Nathanand Brix, Florian and Sur, Frédéric and Gaudry, Émilie},
+title     = {Hydrogen, oxygen and lead adsorbates onAl13Co4(100) : accurate potential energysurfaces at low computational cost bymachine learning and DFT-based data},
+journal   = {Journal of Chemical Theory and Computation},
+year      = {2024},
+doi       = {10.1021/acs.jctc.4c00367}
+} 
+```
 
 
 
diff --git a/codes/create_descriptor.py b/codes/create_descriptor.py
deleted file mode 100644
index 16246930acc88975b62199ea67f7fdc3ff44e7e4..0000000000000000000000000000000000000000
--- a/codes/create_descriptor.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import numpy as np
-from ase.io import Trajectory
-from dscribe.descriptors import SOAP
-import os
-import ase
-
-class create_descriptor():
-    def __init__(self,method='soap',params={'species':['Al','Co','H'],'l_max':2,'n_max':2,'r_cut':7},ats=0):
-        self.ats=ats
-        self.method=method
-        if method == 'soap':
-            self.decr=SOAP(periodic=True,**params)
-            self.title=method+'_l'+str(params['l_max'])+'_n'+str(params['n_max'])+'_r'+str(params['r_cut'])
-    def create(self,atoms,load=False,save_file=None):
-        if load and os.path.exists(save_file+'/'+self.title+'.npy'):  
-            X=np.load(save_file+'/'+self.title+'.npy')
-        else:
-            X=np.squeeze(self.decr.create(atoms,centers=[[self.ats]]*len(atoms)))
-            if load:
-                 np.save(save_file+'/'+self.title+'.npy',X)
-        return X
-    
\ No newline at end of file
diff --git a/codes/tool.py b/codes/tool.py
index 0e9c1cf3dc686e367163da44ea8b985da2a4bea6..8da726030422230d3aff75d80d5cd347fc77637c 100644
--- a/codes/tool.py
+++ b/codes/tool.py
@@ -14,9 +14,23 @@ from sklearn.model_selection import GridSearchCV
 from joblib import Parallel, delayed
 from ase import neighborlist 
 from sklearn.preprocessing import StandardScaler
-from create_descriptor import *
-
 
+class create_descriptor():
+    def __init__(self,method='soap',params={'species':['Al','Co','H'],'l_max':2,'n_max':2,'r_cut':7},ats=0):
+        self.ats=ats
+        self.method=method
+        if method == 'soap':
+            self.decr=SOAP(periodic=True,**params)
+            self.title=method+'_l'+str(params['l_max'])+'_n'+str(params['n_max'])+'_r'+str(params['r_cut'])
+    def create(self,atoms,load=False,save_file=None):
+        if load and os.path.exists(save_file+'/'+self.title+'.npy'):  
+            X=np.load(save_file+'/'+self.title+'.npy')
+        else:
+            X=np.squeeze(self.decr.create(atoms,centers=[[self.ats]]*len(atoms)))
+            if load:
+                 np.save(save_file+'/'+self.title+'.npy',X)
+        return X
+    
 
 def sign(x):
     if x<0:
diff --git a/codes/position_selection.py b/position_selection.py
similarity index 51%
rename from codes/position_selection.py
rename to position_selection.py
index 892cb5e910c2f3e31e124c451b849a49caef17e4..1b3e31dd620f0b4b08a5becd60d400dd5df751d0 100644
--- a/codes/position_selection.py
+++ b/position_selection.py
@@ -4,14 +4,30 @@ from sklearn.metrics import pairwise_kernels
 from ase.io import Trajectory
 from scipy.spatial import distance_matrix
 from sklearn.preprocessing import StandardScaler
-from create_descriptor import *
+from codes.tool import *
 
+def retrieve_pos_ind(traj,atom=0):
+      postrain=None
+      ind_split=[]
+      for ind,atoms in enumerate(traj):
+          pos=atoms.get_positions()[atom][:2]
+          if postrain is None:
+             postrain=[pos]
+             count=0
+          elif  any(np.equal(postrain,pos).all(1)):
+                pass
+          else:
+                postrain=np.vstack((postrain,pos))
+                ind_split.append([count,ind-1])
+                count=ind
+      ind_split.append([count,len(traj)-1])
+      return np.array(ind_split)
 
 def matrice_farthest_search(points, k,mode='euclidian',gamma=1e-5,n=3):
-         if mode is 'euclidian':
+         if mode == 'euclidian':
              def distance_m(a):
                 return distance_matrix(a,a)
-         if mode is 'rbf':
+         if mode == 'rbf':
               def distance_m(a):
                 DM=pairwise_kernels(a,a,metric='rbf',gamma=gamma)
                 return 1-DM
@@ -30,57 +46,44 @@ def matrice_farthest_search(points, k,mode='euclidian',gamma=1e-5,n=3):
              rem=indiceleft.pop(ind_rem)
              indices.append(rem)   
              solution_set.append(max(a))
-         solution_set=np.array(solution_set)
-         return solution_set, indices   
-
-
-
+         return np.array(indices)
 
-if __name__ == '__main__':
-    #debut de l initialisation
-    path=os.getcwd()
-    atomseul='H' #atome depose
-    traj='final.traj' #bdd 1
-    data_folder='data'
-    gamma=1e-5
-    
+def train_from_pos(ncomp,data_folder='data_Al13Co4',traj='final.traj', atomseul='H'):
+    os.chdir(data_folder)
     sys=Trajectory(traj)  
     species=[]
     elem=sys[0].get_chemical_symbols() 
-    #identification de l atome seul dans la structure
+    #adsorbate index
     for el in range(len(elem)): 
         if elem[el] not in species:
             species.append(elem[el])
             if elem[el]== atomseul:
                 ats=el
 
-    # bases entieres
-    params={'species':species,'l_max':2,'n_max':2,'r_cut':7}
+    # full data set
+    params={'species':species,'l_max':3,'n_max':6,'r_cut':6}
     desc=create_descriptor(method='soap',params=params,ats=ats)
-    X_ini=desc.create(traj,load=True,save_file=data_folder)
+    X_ini=desc.create(sys,load=False)
      
-    # bases poscars
-    index_pos_100=np.load(data_folder+'/ind_pos.npy')
+    # positions are compared in their initial configurations (POSCAR) 
+    # !! to discriminate positions in the "final.traj" data set, the function
+    # retrieve_pos_ind look for diffences in adsorbate (x, y) values.
+    index_pos_100=retrieve_pos_ind(sys)
     X_poscar=[X_ini[i[0]] for i in index_pos_100]
-    len100=len(X_poscar)
-    print(len100)
          
     #scaling
     scaler=StandardScaler()
     scaler.fit(X_poscar)
     X_poscar_T=scaler.transform(X_poscar)
-    ncomp=144        
-    #initialisation
-    distances,indices=matrice_farthest_search(X_poscar_T, ncomp,)
-    np.save("distances.npy",distances)
-    for i in range(1,145):           
-        if i in [4,9,16,25,36,49,64,81,100,]:
-            #resultpath    
-            resultpath=path+'/'+str(i)      
-            try:
-                os.mkdir(resultpath)
-            except:
-                pass
-            #save
-            print(len(indices[:i]))
-            np.save(resultpath+'/indices.npy',indices[:i])
+    indices=matrice_farthest_search(X_poscar_T, ncomp,)
+    print(indices)
+    train_set=Trajectory('train.traj','w')
+    for ind in indices:
+        #write the whole DFT relaxation for each position selected
+        for i in range(index_pos_100[ind,0],index_pos_100[ind,1]+1):
+            train_set.write(sys[i])
+    train_set.close()
+
+if __name__ == '__main__':
+        ncomp=64
+        train_from_pos(ncomp,data_folder='data_Al13Co4',traj='final.traj', atomseul='H')
\ No newline at end of file
diff --git a/Minimum Working Exemple/run.py b/run.py
similarity index 94%
rename from Minimum Working Exemple/run.py
rename to run.py
index e78ee9e66b3da200982deec6dfaf81177e0d3c28..992d4d292f6dc16b26ce64219241ecaa1bf87439 100644
--- a/Minimum Working Exemple/run.py	
+++ b/run.py
@@ -13,11 +13,12 @@ from sklearn.model_selection import train_test_split, learning_curve, validation
 from joblib import Parallel, delayed
 from ase import neighborlist 
 from sklearn.preprocessing import StandardScaler
-from create_descriptor import *
-from tool import *
+from codes.tool import *
 
 if __name__ == '__main__':
     #initialisation
+    directory='Minimum_Working_Exemple'
+    os.chdir(directory) #go where the train set (train.traj) and POSCAR are.
     atomseul='H' #atome adsorbed
     poscar_file='POSCAR' #slab with no adsorbate
     data_folder='.' #folder where decriptors are saved