generate_citation.py

# ===================================================================================
#        Génération de citations formatées sur la base d'un export JSON Dataverse
# ===================================================================================


print ("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
print ("# ===========================================================================#")
print ("#             Génération de citations formatées sur la base                  #")
print ("#                      d'un export JSON Dataverse                            #")
print ("# ===========================================================================#")
print ("\n")

# Génération de citations selon le modèle décrit sur le site de Dataverse :
# https://dataverse.org/best-practices/data-citation

# !!!!!!!!!!
# TODO
# - proposer différents styles de citations
# - accepter le nom de fichier comme argument en ligne de commande

# -----------------------
# Imports et déclarations
# -----------------------

import json
import os
from pyDataverse.api import NativeApi, DataAccessApi
from pyDataverse.models import Dataverse

# -------------------------------------
# Fonction de génération de la citation
# -------------------------------------

def citation_generation(cit_authors,cit_title,cit_year,cit_data_repository,cit_version_number,cit_version_minor_number,cit_doi,cit_unf):
	cit_authors_len = len(cit_authors)
	cit_authors_len_test = cit_authors_len
	for cit_author in cit_authors:
		if cit_authors_len_test == cit_authors_len : # le nom du premier auteur est suivi d'un point-virgule, les autres d'une simple virgule
			citation = f"{cit_author}; "
		else:
			citation = f"{cit_author}, "
		cit_authors_len_test = cit_authors_len_test - 1
	citation += f"{cit_year}, "
	citation += f"\"{cit_title}\", "
	citation += f"{cit_data_repository}, "
	citation += f"V{cit_version_number}.{cit_version_minor_number}, "
	citation += f"http://dx.doi.org/{cit_doi}"
	if cit_unf !="" :
		citation += f"UNF: {cit_unf}."
	else :
		citation += ".\n"
	return (citation)


# ----------------
# Fichier d'entrée
# ----------------

# filename_input = input ("Entrez le nom du fichier à traiter [export_json.json] : ")
# if filename_input =="":
# 	filename = "export_json.json"
# else:
# 	filename = filename_input

filename = "export_json.json"

with open (filename) as f:
	complete_list = json.load(f)

bibliography_list = []

for doi,metadata in complete_list.items(): #
	# print (f"\n{doi} : \n")
	authors_list=[]
	subjects_list=[]

	metadata = metadata[0] # le dictionnaire étant enserré entre des crochets, on prend le premier et unique élément de cette "liste"
	# on récupère tout ce qu'on peut au niveau 1 de l'entrée et dans "latestVersion"
	dataset_id = metadata["id"]
	dataset_identifier = metadata["identifier"]
	dataset_persistentUrl = metadata["persistentUrl"]
	dataset_publicationDate = metadata["publicationDate"]
	dataset_licence = metadata["latestVersion"]["license"]
	dataset_publisher = metadata["publisher"]
	dataset_publicationDate : metadata["publicationDate"]
	dataset_yearofPublication = dataset_publicationDate[0:4]
	dataset_versionNumber = metadata["latestVersion"]["versionNumber"]
	dataset_versionMinorNumber = metadata ["latestVersion"]["versionMinorNumber"]
	if "unf" in metadata["latestVersion"]:
		dataset_unf = metadata["latestVersion"]["UNF"]
	else:
		dataset_unf = ""

	fields = metadata["latestVersion"]["metadataBlocks"]["citation"]["fields"]
	for field in fields: # chaque champ est pris l'un après l'autre. field est un dictionnaire
		# print (field)
		# print (f"{field['typeName']} : {field['value']}")
		typeName = field['typeName']
		# title = "" # on définit par avance l'ensemble des champs en leur attribuant une valeur vide
		# authors = ""
		# datasetContacts = ""
		# dsDescriptions = ""
		# subjects = ""
		# keywords = ""
		# languages = ""
		# depositors = ""
		# dateOfDeposit = ""
		if typeName == 'title':
			title = field['value'] # champ unique
		if typeName == 'author':
			authors = field['value'] # liste de dictionnaires donc chaque élément est un auteur
			for author in authors:
				authorName = author["authorName"]["value"]
				authors_list.append(authorName)
		if typeName == 'datasetContact':
			datasetContacts = field['value'] # champ multiple
		if typeName == 'dsDescription':
			dsDescriptions = field['value'] # champ multiple
		if typeName == 'subject':
			subjects = field['value'] # champ multiple
		if typeName == 'language':
			languages = field['value'] # liste
		if typeName == 'depositor':
			depositors = field['value']
		if typeName == 'keyword':
			keywords = field['value']
		if typeName == 'dateofDeposit':
			dateofDeposit = field['value']

	# Appel de la fonction

	citation_current = citation_generation(cit_authors = authors_list,cit_title=title,cit_year=dataset_yearofPublication,cit_data_repository=dataset_publisher,cit_version_number=dataset_versionNumber,cit_version_minor_number=dataset_versionMinorNumber, cit_doi=doi,cit_unf=dataset_unf)
	print (citation_current)
	bibliography_list.append(citation_current)

# Génération de la bibliographie
bibliography = ""
for reference in bibliography_list:
	bibliography += (f"{reference}\n")
export_filename_input = input ("Entrez un nom de fichier pour la sortie en format texte (défaut datasets_list.txt) : ")
if export_filename_input == "":
	export_filename = "datasets_list.txt"
else:
	export_filename = export_filename_input

with open(export_filename, 'w') as f:
	f.write(bibliography)