first commit
This commit is contained in:
commit
730f4146ae
169
.gitignore
vendored
Normal file
169
.gitignore
vendored
Normal file
@ -0,0 +1,169 @@
|
|||||||
|
# ---> Python
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
#Pipfile.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
#poetry.lock
|
||||||
|
|
||||||
|
# pdm
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||||
|
#pdm.lock
|
||||||
|
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||||
|
# in version control.
|
||||||
|
# https://pdm.fming.dev/#use-with-ide
|
||||||
|
.pdm.toml
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||||
|
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
|
#.idea/
|
||||||
|
|
||||||
|
# Data files
|
||||||
|
*.csv
|
||||||
|
*.xml
|
||||||
|
# *.txt
|
||||||
|
*.ipynb
|
||||||
|
*.json
|
||||||
|
_*.*
|
11
README.md
Normal file
11
README.md
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
# Totem2XML
|
||||||
|
|
||||||
|
Script Python permettant de convertir une liste de fichiers de budget au format Totem XML, en fichiers CSV conformes au schéma national du SCDL pour les données budgétaires.
|
||||||
|
|
||||||
|
Utilisation:
|
||||||
|
```
|
||||||
|
> python run.py file1.xml [file2.xml file2.xml ... filen.xml]
|
||||||
|
```
|
||||||
|
|
||||||
|
Les fichiers générés sont localisés au même endroit que chaque fichier XML et aura le même nom.
|
||||||
|
Ainsi, `./mon/fichier1.xml` sera converti en `./mon/fichier1.csv`.
|
BIN
requirements.txt
Normal file
BIN
requirements.txt
Normal file
Binary file not shown.
238
run.py
Normal file
238
run.py
Normal file
@ -0,0 +1,238 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
'''Totem2XML
|
||||||
|
Script Python permettant de convertir une liste de fichiers de budget au format Totem XML, en fichiers CSV conformes au schéma national du SCDL pour les données budgétaires.
|
||||||
|
|
||||||
|
Utilisation:
|
||||||
|
> python run.py file1.xml [file2.xml file2.xml ... filen.xml]
|
||||||
|
|
||||||
|
Les fichiers générés sont localisés au même endroit que chaque fichier XML et aura le même nom.
|
||||||
|
Ainsi, "./mon/fichier1.xml" sera converti en "./mon/fichier1.csv".
|
||||||
|
'''
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import csv
|
||||||
|
import xmltodict
|
||||||
|
import jmespath
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
paths = {
|
||||||
|
'budget_libellecoll': 'DocumentBudgetaire.EnTeteDocBudgetaire.LibelleColl.V',
|
||||||
|
'budget_nomenclature': 'DocumentBudgetaire.Budget.EnTeteBudget.Nomenclature.V',
|
||||||
|
'budget_idetab': 'DocumentBudgetaire.Budget.EnTeteBudget.IdEtab.V',
|
||||||
|
'budget_year': 'DocumentBudgetaire.Budget.BlocBudget.Exer.V',
|
||||||
|
'budget_natdec': 'DocumentBudgetaire.Budget.BlocBudget.NatDec.V',
|
||||||
|
'budget_lines': 'DocumentBudgetaire.Budget.LigneBudget[*]',
|
||||||
|
|
||||||
|
'nomenclature_nature_chapitres': 'Nomenclature[0].Nature[0].Chapitres[0].Chapitre[*]',
|
||||||
|
'nomenclature_nature_comptes': 'Nomenclature[0].Nature[0].Comptes[0].Compte[*]',
|
||||||
|
'comptes': 'Compte[*]',
|
||||||
|
|
||||||
|
'nomenclature_fonction_chapitres': 'Nomenclature[0].Fonction[0].Chapitres[0].Chapitre[*]',
|
||||||
|
'nomenclature_fonction_references': 'Nomenclature[0].Fonction[0].RefFonctionnelles[0].RefFonc[*]',
|
||||||
|
'references_fonctionnelles': 'RefFonc[*]',
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
codes_natdec = {
|
||||||
|
'01': 'budget primitif',
|
||||||
|
'02': 'décision modificative',
|
||||||
|
'03': 'budget supplémentaire',
|
||||||
|
'09': 'compte administratif',
|
||||||
|
'ERROR': 'NetDec inconnu'
|
||||||
|
}
|
||||||
|
|
||||||
|
codes_section = {
|
||||||
|
'I': 'investissement',
|
||||||
|
'F': 'fonctionnement',
|
||||||
|
'ERROR': 'ERROR'
|
||||||
|
}
|
||||||
|
|
||||||
|
codes_opbudg = {
|
||||||
|
'0': 'réel',
|
||||||
|
'1': 'ordre',
|
||||||
|
'ERROR': 'ERROR'
|
||||||
|
}
|
||||||
|
|
||||||
|
codes_codrd = {
|
||||||
|
'R': 'recette',
|
||||||
|
'D': 'dépense',
|
||||||
|
'ERROR': 'ERROR'
|
||||||
|
}
|
||||||
|
|
||||||
|
codes_artspe = {
|
||||||
|
'false': 'non spécialisé',
|
||||||
|
'true': 'spécialisé',
|
||||||
|
'empty': ''
|
||||||
|
}
|
||||||
|
|
||||||
|
csv_header = ["BGT_NATDEC", "BGT_ANNEE", "BGT_SIRET", "BGT_NOM", "BGT_CONTNAT", "BGT_CONTNAT_LABEL", "BGT_NATURE", "BGT_NATURE_LABEL", "BGT_FONCTION", "BGT_FONCTION_LABEL", "BGT_OPERATION", "BGT_SECTION", "BGT_OPBUDG", "BGT_CODRD", "BGT_MTREAL", "BGT_MTBUDGPREC", "BGT_MTRARPREC", "BGT_MTPROPNOUV", "BGT_MTPREV", "BGT_CREDOUV", "BGT_MTRAR3112", "BGT_ARTSPE"]
|
||||||
|
|
||||||
|
pdc_root_url = "http://odm-budgetaire.org/composants/normes"
|
||||||
|
|
||||||
|
|
||||||
|
def get_children(parents, children_name, children_path, results):
|
||||||
|
"""Get all children from a dictonary and return a list"""
|
||||||
|
|
||||||
|
for parent in parents:
|
||||||
|
children = jmespath.search(children_path, parent)
|
||||||
|
if children is not None:
|
||||||
|
get_children(children, children_name, children_path, results)
|
||||||
|
del parent[children_name]
|
||||||
|
results.append(parent)
|
||||||
|
else:
|
||||||
|
results.append(parent)
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def line2csv(line, config):
|
||||||
|
"Convert line to CSV"
|
||||||
|
|
||||||
|
ContNat = jmespath.search('ContNat.V', line) or 'ERROR'
|
||||||
|
ContNat_Label = jmespath.search('[?Code==`"{ContNat}"`].Libelle'.format(ContNat=ContNat), config['nomenclature_nature_chapitres'])
|
||||||
|
ContNat_Label = ContNat_Label[0] if len(ContNat_Label) else 'ERROR'
|
||||||
|
Nature = jmespath.search('Nature.V', line) or 'ERROR'
|
||||||
|
Nature_Label = jmespath.search('[?Code==`"{Nature}"`].Libelle'.format(Nature=Nature), config['nature_comptes'])
|
||||||
|
Nature_Label = Nature_Label[0] if len(Nature_Label) else 'ERROR'
|
||||||
|
Fonction = jmespath.search('Fonction.V', line) or ''
|
||||||
|
Fonction_Label = jmespath.search('[?Code==`"{Fonction}"`].Libelle'.format(Fonction=Fonction), config['fonction_references'])
|
||||||
|
Fonction_Label = Fonction_Label[0] if len(Fonction_Label) else ''
|
||||||
|
Operation = jmespath.search('Operation.V', line) or ''
|
||||||
|
Section_Code = jmespath.search('[?Code==`"{ContNat}"`].Section'.format(ContNat=ContNat), config['nomenclature_nature_chapitres'])
|
||||||
|
Section = codes_section[Section_Code[0]] if len(Section_Code) else 'ERROR'
|
||||||
|
OpBudg_Code = jmespath.search('OpBudg.V', line) or 'ERROR'
|
||||||
|
OpBudg = codes_opbudg[OpBudg_Code]
|
||||||
|
CodRD_Code = jmespath.search('CodRD.V', line) or 'ERROR'
|
||||||
|
CodRD = codes_codrd[CodRD_Code]
|
||||||
|
MtReal = jmespath.search('MtReal.V', line) or ''
|
||||||
|
MtBudgPrec = jmespath.search('MtBudgPrec.V', line) or ''
|
||||||
|
MtRARPrec = jmespath.search('MtRARPrec.V', line) or ''
|
||||||
|
MtPropNouv = jmespath.search('MtPropNouv.V', line) or ''
|
||||||
|
MtPrev = jmespath.search('MtPrev.V', line) or ''
|
||||||
|
CredOuv = jmespath.search('CredOuv.V', line) or ''
|
||||||
|
MtRAR3112 = jmespath.search('MtRAR3112.V', line) or ''
|
||||||
|
ArtSpe_Code = jmespath.search('ArtSpe.V', line) or 'empty'
|
||||||
|
ArtSpe = codes_artspe[ArtSpe_Code]
|
||||||
|
|
||||||
|
# # Debug:
|
||||||
|
# if Section == 'ERROR':
|
||||||
|
# print(ContNat)
|
||||||
|
# print(Section_Code)
|
||||||
|
# print(Section)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'BGT_NATDEC': config['NatDec'],
|
||||||
|
# 'BGT_NUMDEC': '',
|
||||||
|
'BGT_ANNEE': config['Exer'],
|
||||||
|
'BGT_SIRET': config['IdEtab'],
|
||||||
|
'BGT_NOM': config['LibelleColl'],
|
||||||
|
'BGT_CONTNAT': ContNat,
|
||||||
|
'BGT_CONTNAT_LABEL': ContNat_Label,
|
||||||
|
'BGT_NATURE': Nature,
|
||||||
|
'BGT_NATURE_LABEL': Nature_Label,
|
||||||
|
'BGT_FONCTION': Fonction,
|
||||||
|
'BGT_FONCTION_LABEL': Fonction_Label,
|
||||||
|
'BGT_OPERATION': Operation,
|
||||||
|
'BGT_SECTION': Section,
|
||||||
|
'BGT_OPBUDG': OpBudg,
|
||||||
|
'BGT_CODRD': CodRD,
|
||||||
|
'BGT_MTREAL': MtReal,
|
||||||
|
'BGT_MTBUDGPREC': MtBudgPrec,
|
||||||
|
'BGT_MTRARPREC': MtRARPrec,
|
||||||
|
'BGT_MTPROPNOUV': MtPropNouv,
|
||||||
|
'BGT_MTPREV': MtPrev,
|
||||||
|
'BGT_CREDOUV': CredOuv,
|
||||||
|
'BGT_MTRAR3112': MtRAR3112,
|
||||||
|
'BGT_ARTSPE': ArtSpe,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def totem2csv(xml_file):
|
||||||
|
"""Convert a Totem XML file to CSV according SCDL schema"""
|
||||||
|
|
||||||
|
filename = xml_file[:-4]
|
||||||
|
csv_file = filename + '.csv'
|
||||||
|
|
||||||
|
# Read XML file
|
||||||
|
with open(xml_file, 'r') as xf:
|
||||||
|
xml = xf.read()
|
||||||
|
xml_dict = xmltodict.parse(xml, attr_prefix='', force_list=False)
|
||||||
|
|
||||||
|
# Define plan de compte
|
||||||
|
year = jmespath.search(paths['budget_year'], xml_dict)
|
||||||
|
nomenclature = jmespath.search(paths['budget_nomenclature'], xml_dict).split('-', 1)
|
||||||
|
pdc_url = pdc_root_url + '/{year}/{n1}/{n2}/planDeCompte.xml'.format(year=year, n1=nomenclature[0], n2=nomenclature[1])
|
||||||
|
print("Plan de compte: {pdc_url}".format(pdc_url=pdc_url))
|
||||||
|
|
||||||
|
# Get XML plan de compte from pdc_url
|
||||||
|
pdc_response = requests.get(pdc_url)
|
||||||
|
pdc_dict = xmltodict.parse(pdc_response.content, attr_prefix='', force_list=True)
|
||||||
|
|
||||||
|
# Read plan de compte: get "nature chapitres", "nature comptes" et "fonction references"
|
||||||
|
nomenclature_nature_chapitres = jmespath.search(paths['nomenclature_nature_chapitres'], pdc_dict)
|
||||||
|
nomenclature_nature_comptes = jmespath.search(paths['nomenclature_nature_comptes'], pdc_dict)
|
||||||
|
nomenclature_fonction_references = jmespath.search(paths['nomenclature_fonction_references'], pdc_dict)
|
||||||
|
# Get "nature comptes" and "fonction references" lists
|
||||||
|
nature_comptes = get_children(nomenclature_nature_comptes, 'Compte', paths['comptes'], [])
|
||||||
|
fonction_references = get_children(nomenclature_fonction_references, 'RefFonc', paths['references_fonctionnelles'], [])
|
||||||
|
|
||||||
|
# Get main informations from XML header
|
||||||
|
NatDec_Code = jmespath.search(paths['budget_natdec'], xml_dict) or 'ERROR'
|
||||||
|
NatDec = codes_natdec[NatDec_Code]
|
||||||
|
Exer = jmespath.search(paths['budget_year'], xml_dict) or 'ERROR'
|
||||||
|
IdEtab = jmespath.search(paths['budget_idetab'], xml_dict) or 'ERROR'
|
||||||
|
LibelleColl = jmespath.search(paths['budget_libellecoll'], xml_dict) or 'ERROR'
|
||||||
|
|
||||||
|
config = {
|
||||||
|
'nomenclature_nature_chapitres': nomenclature_nature_chapitres,
|
||||||
|
'nature_comptes': nature_comptes,
|
||||||
|
'fonction_references': fonction_references,
|
||||||
|
'NatDec': NatDec,
|
||||||
|
'Exer': Exer,
|
||||||
|
'IdEtab': IdEtab,
|
||||||
|
'LibelleColl': LibelleColl,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get budget lines
|
||||||
|
budget_lines = jmespath.search(paths['budget_lines'], xml_dict)
|
||||||
|
nb_xml_lines = len(budget_lines)
|
||||||
|
print("{nb_xml_lines} lignes de budget dans le fichier {xml_file}.".format(nb_xml_lines=nb_xml_lines, xml_file=xml_file))
|
||||||
|
|
||||||
|
# Open CSV file to save lines
|
||||||
|
with open(csv_file, 'w', newline='', encoding="utf-8") as cf:
|
||||||
|
csv_writer = csv.DictWriter(cf, fieldnames=csv_header, delimiter=',', quoting=csv.QUOTE_NONNUMERIC)
|
||||||
|
csv_writer.writeheader()
|
||||||
|
nb_csv_lines = 0
|
||||||
|
|
||||||
|
# Generate and save CSV line
|
||||||
|
# Cf. process details in https://gitlab.com/datafin/totem/-/blob/master/totem2csv/xsl/totem2xmlcsv.xsl
|
||||||
|
for id_line, line in enumerate(budget_lines):
|
||||||
|
|
||||||
|
nb_csv_lines += 1
|
||||||
|
print('Traitement et enregistrement de la ligne {id_line}/{nb_xml_lines}'.format(id_line=id_line+1, nb_xml_lines=nb_xml_lines))
|
||||||
|
|
||||||
|
csv_line = line2csv(line, config)
|
||||||
|
csv_writer.writerow(csv_line)
|
||||||
|
|
||||||
|
print("{nb_csv_lines} lignes enregistrées dans le fichier {csv_file} sur {nb_xml_lines}.".format(nb_csv_lines=nb_csv_lines, csv_file=csv_file, nb_xml_lines=nb_xml_lines))
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv):
|
||||||
|
if len(argv) == 0:
|
||||||
|
print('No XML file in command line')
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
for xml_file in argv:
|
||||||
|
if not os.path.isfile(xml_file) or not xml_file.endswith('.xml'):
|
||||||
|
print('File {xml_file} not exist or is not XML file'.format(xml_file=xml_file))
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
totem2csv(xml_file)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main(sys.argv[1:])
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user