266 lines
10 KiB
Python
266 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
|
|
'''Totem2XML
|
|
Script Python permettant de convertir une liste de fichiers de budget au format Totem XML, en fichiers CSV conformes au schéma national du SCDL pour les données budgétaires.
|
|
|
|
Utilisation:
|
|
> python run.py file1.xml [file2.xml file2.xml ... filen.xml]
|
|
|
|
Les fichiers générés sont localisés au même endroit que chaque fichier XML et aura le même nom.
|
|
Ainsi, "./mon/fichier1.xml" sera converti en "./mon/fichier1.csv".
|
|
'''
|
|
|
|
import sys
|
|
import os
|
|
import glob
|
|
import json
|
|
import csv
|
|
import xmltodict
|
|
import jmespath
|
|
import requests
|
|
|
|
|
|
paths = {
|
|
'budget_libellecoll': 'DocumentBudgetaire.EnTeteDocBudgetaire.LibelleColl.V',
|
|
'budget_nomenclature': 'DocumentBudgetaire.Budget.EnTeteBudget.Nomenclature.V',
|
|
'budget_idetab': 'DocumentBudgetaire.Budget.EnTeteBudget.IdEtab.V',
|
|
'budget_year': 'DocumentBudgetaire.Budget.BlocBudget.Exer.V',
|
|
'budget_natdec': 'DocumentBudgetaire.Budget.BlocBudget.NatDec.V',
|
|
'budget_lines': 'DocumentBudgetaire.Budget.LigneBudget[*]',
|
|
|
|
'nomenclature_nature_chapitres': 'Nomenclature[0].Nature[0].Chapitres[0].Chapitre[*]',
|
|
'nomenclature_nature_comptes': 'Nomenclature[0].Nature[0].Comptes[0].Compte[*]',
|
|
'comptes': 'Compte[*]',
|
|
|
|
'nomenclature_fonction_chapitres': 'Nomenclature[0].Fonction[0].Chapitres[0].Chapitre[*]',
|
|
'nomenclature_fonction_references': 'Nomenclature[0].Fonction[0].RefFonctionnelles[0].RefFonc[*]',
|
|
'references_fonctionnelles': 'RefFonc[*]',
|
|
}
|
|
|
|
codes_natdec = {
|
|
'01': 'budget primitif',
|
|
'02': 'décision modificative',
|
|
'03': 'budget supplémentaire',
|
|
'09': 'compte administratif',
|
|
'ERROR': 'NetDec inconnu'
|
|
}
|
|
|
|
codes_section = {
|
|
'I': 'investissement',
|
|
'F': 'fonctionnement',
|
|
'ERROR': 'ERROR'
|
|
}
|
|
|
|
codes_opbudg = {
|
|
'0': 'réel',
|
|
'1': 'ordre',
|
|
'ERROR': 'ERROR'
|
|
}
|
|
|
|
codes_codrd = {
|
|
'R': 'recette',
|
|
'D': 'dépense',
|
|
'ERROR': 'ERROR'
|
|
}
|
|
|
|
codes_artspe = {
|
|
'false': 'non spécialisé',
|
|
'true': 'spécialisé',
|
|
'empty': ''
|
|
}
|
|
|
|
csv_header = ["BGT_NATDEC", "BGT_ANNEE", "BGT_SIRET", "BGT_NOM", "BGT_CONTNAT", "BGT_CONTNAT_LABEL", "BGT_NATURE", "BGT_NATURE_LABEL", "BGT_FONCTION", "BGT_FONCTION_LABEL", "BGT_OPERATION", "BGT_SECTION", "BGT_OPBUDG", "BGT_CODRD", "BGT_MTREAL", "BGT_MTBUDGPREC", "BGT_MTRARPREC", "BGT_MTPROPNOUV", "BGT_MTPREV", "BGT_CREDOUV", "BGT_MTRAR3112", "BGT_ARTSPE"]
|
|
|
|
# pdc_root_url = "http://odm-budgetaire.org/composants/normes"
|
|
pdc_directory = './pdc'
|
|
|
|
|
|
def get_children(parents, children_name, children_path, results):
|
|
"""Get all children from a dictonary and return a list"""
|
|
|
|
for parent in parents:
|
|
children = jmespath.search(children_path, parent)
|
|
if children is not None:
|
|
get_children(children, children_name, children_path, results)
|
|
del parent[children_name]
|
|
results.append(parent)
|
|
else:
|
|
results.append(parent)
|
|
return results
|
|
|
|
|
|
def line2csv(line, config):
|
|
"Convert line to CSV"
|
|
|
|
ContNat = jmespath.search('ContNat.V', line) or 'ERROR'
|
|
# ContNat_Label = jmespath.search('[?Code==`"{ContNat}"`].Libelle'.format(ContNat=ContNat), config['nomenclature_nature_chapitres'])
|
|
ContNat_Label = jmespath.search('[?Code==`"{ContNat}"`].Libelle'.format(ContNat=ContNat), config['nature_chapitres'])
|
|
ContNat_Label = ContNat_Label[0] if len(ContNat_Label) else 'ERROR'
|
|
Nature = jmespath.search('Nature.V', line) or 'ERROR'
|
|
Nature_Label = jmespath.search('[?Code==`"{Nature}"`].Libelle'.format(Nature=Nature), config['nature_comptes'])
|
|
Nature_Label = Nature_Label[0] if len(Nature_Label) else 'ERROR'
|
|
Fonction = jmespath.search('Fonction.V', line) or ''
|
|
Fonction_Label = jmespath.search('[?Code==`"{Fonction}"`].Libelle'.format(Fonction=Fonction), config['fonction_references'])
|
|
Fonction_Label = Fonction_Label[0] if len(Fonction_Label) else ''
|
|
Operation = jmespath.search('Operation.V', line) or ''
|
|
# Section_Code = jmespath.search('[?Code==`"{ContNat}"`].Section'.format(ContNat=ContNat), config['nomenclature_nature_chapitres'])
|
|
Section_Code = jmespath.search('[?Code==`"{ContNat}"`].Section'.format(ContNat=ContNat), config['nature_chapitres'])
|
|
Section = codes_section[Section_Code[0]] if len(Section_Code) else 'ERROR'
|
|
OpBudg_Code = jmespath.search('OpBudg.V', line) or 'ERROR'
|
|
OpBudg = codes_opbudg[OpBudg_Code]
|
|
CodRD_Code = jmespath.search('CodRD.V', line) or 'ERROR'
|
|
CodRD = codes_codrd[CodRD_Code]
|
|
MtReal = jmespath.search('MtReal.V', line) or ''
|
|
MtBudgPrec = jmespath.search('MtBudgPrec.V', line) or ''
|
|
MtRARPrec = jmespath.search('MtRARPrec.V', line) or ''
|
|
MtPropNouv = jmespath.search('MtPropNouv.V', line) or ''
|
|
MtPrev = jmespath.search('MtPrev.V', line) or ''
|
|
CredOuv = jmespath.search('CredOuv.V', line) or ''
|
|
MtRAR3112 = jmespath.search('MtRAR3112.V', line) or ''
|
|
ArtSpe_Code = jmespath.search('ArtSpe.V', line) or 'empty'
|
|
ArtSpe = codes_artspe[ArtSpe_Code]
|
|
|
|
# # Debug:
|
|
# if Section == 'ERROR':
|
|
# print(ContNat)
|
|
# print(Section_Code)
|
|
# print(Section)
|
|
|
|
return {
|
|
'BGT_NATDEC': config['NatDec'],
|
|
# 'BGT_NUMDEC': '',
|
|
'BGT_ANNEE': config['Exer'],
|
|
'BGT_SIRET': config['IdEtab'],
|
|
'BGT_NOM': config['LibelleColl'],
|
|
'BGT_CONTNAT': ContNat,
|
|
'BGT_CONTNAT_LABEL': ContNat_Label,
|
|
'BGT_NATURE': Nature,
|
|
'BGT_NATURE_LABEL': Nature_Label,
|
|
'BGT_FONCTION': Fonction,
|
|
'BGT_FONCTION_LABEL': Fonction_Label,
|
|
'BGT_OPERATION': Operation,
|
|
'BGT_SECTION': Section,
|
|
'BGT_OPBUDG': OpBudg,
|
|
'BGT_CODRD': CodRD,
|
|
'BGT_MTREAL': MtReal,
|
|
'BGT_MTBUDGPREC': MtBudgPrec,
|
|
'BGT_MTRARPREC': MtRARPrec,
|
|
'BGT_MTPROPNOUV': MtPropNouv,
|
|
'BGT_MTPREV': MtPrev,
|
|
'BGT_CREDOUV': CredOuv,
|
|
'BGT_MTRAR3112': MtRAR3112,
|
|
'BGT_ARTSPE': ArtSpe,
|
|
}
|
|
|
|
|
|
def get_pdc(file):
|
|
with open(file, "r") as f:
|
|
pdc = json.load(f)
|
|
return pdc
|
|
|
|
|
|
def totem2csv(xml_file):
|
|
"""Convert a Totem XML file to CSV according SCDL schema"""
|
|
|
|
filename = xml_file[:-4]
|
|
csv_file = filename + '.csv'
|
|
|
|
# Read XML file
|
|
with open(xml_file, 'r') as xf:
|
|
xml = xf.read()
|
|
xml_dict = xmltodict.parse(xml, attr_prefix='', force_list=False)
|
|
|
|
pdc = jmespath.search(paths['budget_nomenclature'], xml_dict).split('-', 1)[0]
|
|
|
|
# # Define plan de compte
|
|
# year = jmespath.search(paths['budget_year'], xml_dict)
|
|
# nomenclature = jmespath.search(paths['budget_nomenclature'], xml_dict).split('-', 1)
|
|
# pdc_url = pdc_root_url + '/{year}/{n1}/{n2}/planDeCompte.xml'.format(year=year, n1=nomenclature[0], n2=nomenclature[1])
|
|
# print("Plan de compte: {pdc_url}".format(pdc_url=pdc_url))
|
|
|
|
# # Get XML plan de compte from pdc_url
|
|
# pdc_response = requests.get(pdc_url)
|
|
# pdc_dict = xmltodict.parse(pdc_response.content, attr_prefix='', force_list=True)
|
|
|
|
# # Read plan de compte: get "nature chapitres", "nature comptes" et "fonction references"
|
|
# nomenclature_nature_chapitres = jmespath.search(paths['nomenclature_nature_chapitres'], pdc_dict)
|
|
# nomenclature_nature_comptes = jmespath.search(paths['nomenclature_nature_comptes'], pdc_dict)
|
|
# nomenclature_fonction_references = jmespath.search(paths['nomenclature_fonction_references'], pdc_dict)
|
|
# # Get "nature comptes" and "fonction references" lists
|
|
# nature_comptes = get_children(nomenclature_nature_comptes, 'Compte', paths['comptes'], [])
|
|
# fonction_references = get_children(nomenclature_fonction_references, 'RefFonc', paths['references_fonctionnelles'], [])
|
|
|
|
# nomenclature_nature_chapitres
|
|
# nomenclature_nature_comptes
|
|
# nomenclature_fonction_references
|
|
|
|
nature_chapitres = get_pdc('./pdc/' + pdc + '_nature_chapitres.json')
|
|
nature_comptes = get_pdc('./pdc/' + pdc + '_nature_comptes.json')
|
|
fonction_chapitres = get_pdc('./pdc/' + pdc + '_fonction_chapitres.json')
|
|
fonction_comptes = get_pdc('./pdc/' + pdc + '_fonction_comptes.json')
|
|
fonction_references = get_pdc('./pdc/' + pdc + '_fonction_references.json')
|
|
|
|
# Get main informations from XML header
|
|
NatDec_Code = jmespath.search(paths['budget_natdec'], xml_dict) or 'ERROR'
|
|
NatDec = codes_natdec[NatDec_Code]
|
|
Exer = jmespath.search(paths['budget_year'], xml_dict) or 'ERROR'
|
|
IdEtab = jmespath.search(paths['budget_idetab'], xml_dict) or 'ERROR'
|
|
LibelleColl = jmespath.search(paths['budget_libellecoll'], xml_dict) or 'ERROR'
|
|
|
|
config = {
|
|
# 'nomenclature_nature_chapitres': nomenclature_nature_chapitres,
|
|
'nature_chapitres': nature_chapitres,
|
|
'nature_comptes': nature_comptes,
|
|
'fonction_references': fonction_references,
|
|
'NatDec': NatDec,
|
|
'Exer': Exer,
|
|
'IdEtab': IdEtab,
|
|
'LibelleColl': LibelleColl,
|
|
}
|
|
|
|
# Get budget lines
|
|
budget_lines = jmespath.search(paths['budget_lines'], xml_dict)
|
|
nb_xml_lines = len(budget_lines)
|
|
print("{nb_xml_lines} lignes de budget dans le fichier {xml_file}.".format(nb_xml_lines=nb_xml_lines, xml_file=xml_file))
|
|
|
|
# Open CSV file to save lines
|
|
with open(csv_file, 'w', newline='', encoding="utf-8") as cf:
|
|
csv_writer = csv.DictWriter(cf, fieldnames=csv_header, delimiter=',', quoting=csv.QUOTE_NONNUMERIC)
|
|
csv_writer.writeheader()
|
|
nb_csv_lines = 0
|
|
|
|
# Generate and save CSV line
|
|
# Cf. process details in https://gitlab.com/datafin/totem/-/blob/master/totem2csv/xsl/totem2xmlcsv.xsl
|
|
for id_line, line in enumerate(budget_lines):
|
|
|
|
nb_csv_lines += 1
|
|
print('Traitement et enregistrement de la ligne {id_line}/{nb_xml_lines}'.format(id_line=id_line+1, nb_xml_lines=nb_xml_lines))
|
|
|
|
csv_line = line2csv(line, config)
|
|
csv_writer.writerow(csv_line)
|
|
|
|
print("{nb_csv_lines} lignes enregistrées dans le fichier {csv_file} sur {nb_xml_lines}.".format(nb_csv_lines=nb_csv_lines, csv_file=csv_file, nb_xml_lines=nb_xml_lines))
|
|
|
|
|
|
def main(argv):
|
|
if len(argv) == 0:
|
|
print('No XML file in command line')
|
|
sys.exit()
|
|
|
|
if len(argv) == 1 and os.path.isdir(argv[0]):
|
|
files = glob.glob(argv[0] + '/**/*.xml', recursive=True)
|
|
else:
|
|
files = argv
|
|
|
|
for file in files:
|
|
if not os.path.isfile(file) or not file.endswith('.xml'):
|
|
print('{file} not exist or is not XML file'.format(file=file))
|
|
else:
|
|
print('Convert {file} to CSV'.format(file=file))
|
|
totem2csv(file)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main(sys.argv[1:])
|
|
|