From 730f4146aeb8d25c48c14413919d4395c6fc3b57 Mon Sep 17 00:00:00 2001
From: Guillaume RYCKELYNCK <guillaume.ryckelynck@grandest.fr>
Date: Sat, 21 Jan 2023 21:55:17 +0100
Subject: [PATCH] first commit

---
 .gitignore       | 169 +++++++++++++++++++++++++++++++++
 README.md        |  11 +++
 requirements.txt | Bin 0 -> 1460 bytes
 run.py           | 238 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 418 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 README.md
 create mode 100644 requirements.txt
 create mode 100644 run.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7b9e41e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,169 @@
+# ---> Python
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Data files
+*.csv
+*.xml
+# *.txt
+*.ipynb
+*.json
+_*.*
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f0bf45b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,11 @@
+# Totem2XML
+
+Script Python permettant de convertir une liste de fichiers de budget au format Totem XML, en fichiers CSV conformes au schéma national du SCDL pour les données budgétaires.
+
+Utilisation:
+```
+> python run.py file1.xml [file2.xml file2.xml ... filen.xml]
+```
+
+Les fichiers générés sont localisés au même endroit que chaque fichier XML et aura le même nom.
+Ainsi, `./mon/fichier1.xml` sera converti en `./mon/fichier1.csv`.
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0578d78df7a4b726bd438367d9009c147d37fc90
GIT binary patch
literal 1460
zcmZXU-E!Jc41{&vnLY{~Z0D!B$UF26I>vT_2^hz?O~}Ky-A_6Plj$%HLnN(sSCaq!
zsqJL5K1X}8&IbK@)Tgv>{uK7rzSzcUYZPzvz1N3~b;?+KMx)wED7`)D352x}z$yIi
zeahPkV^Wt+*(>!s>es<>d;4${sw>seA^JDU+Iwd0xw-$1GVT)YQIW%iS)oYjkye-+
z`(@8I+S%j8?)2PV;^0r>fD4o7+$v69^v9Q3k+U$4(#Nmx363|8)7iT^HR^;5gZjZT
zc%TIzdutV`J%Ylh?423zPl|TQUl^9j<*0@KY+ErH9O1h%cmjK;$~Ji)&Jha_c5<c@
z<Iy#4FU%H)cj6uRu=pt*dtrrcbleLIw;x=`|JnG~Ic|2Y^31(lIV43<pE)t}Sv{HS
z?A<on2b`lbKIAFKyzqekvaKuCp5(?Lp1ta`)AWvg7^LUmigK)U{xMH{V4rJEX_-}-
zwvJ9@u%&^GPq-PHM%7TgcQ&!l3tOCu%|aU*pM=7WGaWEt#9hRMquZn$_@a$1WZz$u
zKYMn*hAOEWg%W!fw}|csJ+V)tVoYPD=eHPqzSr1@A9C+m9b>`^r@~!|jk?~qmDr=D
z`_vZ|%a%p{N79$CA3w6|XM1zME9u9DO1qoAI_vrm_L{0ugzLcsHLvO%(_h$@SKUuS
ze)sCQTilImHYVnsRm25?zsw+RpX+SpKJ2bzyd9VZGtajVN7RQpJm=lm>upnAc$AKU
htIYXLdxHb5Fyac~5W7G;ZUrXnN~0`zkW*ak=0B`2)Vcrw

literal 0
HcmV?d00001

diff --git a/run.py b/run.py
new file mode 100644
index 0000000..1259a35
--- /dev/null
+++ b/run.py
@@ -0,0 +1,238 @@
+#!/usr/bin/env python3
+
+'''Totem2XML
+Script Python permettant de convertir une liste de fichiers de budget au format Totem XML, en fichiers CSV conformes au schéma national du SCDL pour les données budgétaires.
+
+Utilisation:
+> python run.py file1.xml [file2.xml file2.xml ... filen.xml]
+
+Les fichiers générés sont localisés au même endroit que chaque fichier XML et aura le même nom.
+Ainsi, "./mon/fichier1.xml" sera converti en "./mon/fichier1.csv".
+'''
+
+import sys
+import os
+import json
+import csv
+import xmltodict
+import jmespath
+import requests
+
+
+paths = {
+    'budget_libellecoll': 'DocumentBudgetaire.EnTeteDocBudgetaire.LibelleColl.V',
+    'budget_nomenclature': 'DocumentBudgetaire.Budget.EnTeteBudget.Nomenclature.V',
+    'budget_idetab': 'DocumentBudgetaire.Budget.EnTeteBudget.IdEtab.V',
+    'budget_year': 'DocumentBudgetaire.Budget.BlocBudget.Exer.V',
+    'budget_natdec': 'DocumentBudgetaire.Budget.BlocBudget.NatDec.V',
+    'budget_lines': 'DocumentBudgetaire.Budget.LigneBudget[*]',
+    
+    'nomenclature_nature_chapitres': 'Nomenclature[0].Nature[0].Chapitres[0].Chapitre[*]',
+    'nomenclature_nature_comptes': 'Nomenclature[0].Nature[0].Comptes[0].Compte[*]',
+    'comptes': 'Compte[*]',
+    
+    'nomenclature_fonction_chapitres': 'Nomenclature[0].Fonction[0].Chapitres[0].Chapitre[*]',
+    'nomenclature_fonction_references': 'Nomenclature[0].Fonction[0].RefFonctionnelles[0].RefFonc[*]',
+    'references_fonctionnelles': 'RefFonc[*]',
+    
+}
+
+codes_natdec = {
+    '01': 'budget primitif',
+    '02': 'décision modificative',
+    '03': 'budget supplémentaire',
+    '09': 'compte administratif',
+    'ERROR': 'NetDec inconnu'
+}
+
+codes_section = {
+    'I': 'investissement',
+    'F': 'fonctionnement',
+    'ERROR': 'ERROR'
+}
+
+codes_opbudg = {
+    '0': 'réel',
+    '1': 'ordre',
+    'ERROR': 'ERROR'
+}
+
+codes_codrd = {
+    'R': 'recette',
+    'D': 'dépense',
+    'ERROR': 'ERROR'
+}
+
+codes_artspe = {
+    'false': 'non spécialisé',
+    'true': 'spécialisé',
+    'empty': ''
+}
+
+csv_header = ["BGT_NATDEC", "BGT_ANNEE", "BGT_SIRET", "BGT_NOM", "BGT_CONTNAT", "BGT_CONTNAT_LABEL", "BGT_NATURE", "BGT_NATURE_LABEL", "BGT_FONCTION", "BGT_FONCTION_LABEL", "BGT_OPERATION", "BGT_SECTION", "BGT_OPBUDG", "BGT_CODRD", "BGT_MTREAL", "BGT_MTBUDGPREC", "BGT_MTRARPREC", "BGT_MTPROPNOUV", "BGT_MTPREV", "BGT_CREDOUV", "BGT_MTRAR3112", "BGT_ARTSPE"]
+
+pdc_root_url = "http://odm-budgetaire.org/composants/normes"
+
+
+def get_children(parents, children_name, children_path, results):
+    """Get all children from a dictonary and return a list"""
+    
+    for parent in parents:
+        children = jmespath.search(children_path, parent)
+        if children is not None:
+            get_children(children, children_name, children_path, results)
+            del parent[children_name]
+            results.append(parent)
+        else:
+            results.append(parent)
+    return results
+
+
+def line2csv(line, config):
+    "Convert line to CSV"
+    
+    ContNat = jmespath.search('ContNat.V', line) or 'ERROR'
+    ContNat_Label = jmespath.search('[?Code==`"{ContNat}"`].Libelle'.format(ContNat=ContNat), config['nomenclature_nature_chapitres'])
+    ContNat_Label = ContNat_Label[0] if len(ContNat_Label) else 'ERROR'
+    Nature = jmespath.search('Nature.V', line) or 'ERROR'
+    Nature_Label = jmespath.search('[?Code==`"{Nature}"`].Libelle'.format(Nature=Nature), config['nature_comptes'])
+    Nature_Label = Nature_Label[0] if len(Nature_Label) else 'ERROR'
+    Fonction = jmespath.search('Fonction.V', line) or ''
+    Fonction_Label = jmespath.search('[?Code==`"{Fonction}"`].Libelle'.format(Fonction=Fonction), config['fonction_references'])
+    Fonction_Label = Fonction_Label[0] if len(Fonction_Label) else ''
+    Operation = jmespath.search('Operation.V', line) or ''
+    Section_Code = jmespath.search('[?Code==`"{ContNat}"`].Section'.format(ContNat=ContNat), config['nomenclature_nature_chapitres'])
+    Section = codes_section[Section_Code[0]] if len(Section_Code) else 'ERROR'
+    OpBudg_Code = jmespath.search('OpBudg.V', line) or 'ERROR'
+    OpBudg = codes_opbudg[OpBudg_Code]
+    CodRD_Code = jmespath.search('CodRD.V', line) or 'ERROR'
+    CodRD = codes_codrd[CodRD_Code]
+    MtReal = jmespath.search('MtReal.V', line) or ''
+    MtBudgPrec = jmespath.search('MtBudgPrec.V', line) or ''
+    MtRARPrec = jmespath.search('MtRARPrec.V', line) or ''
+    MtPropNouv = jmespath.search('MtPropNouv.V', line) or ''
+    MtPrev = jmespath.search('MtPrev.V', line) or ''
+    CredOuv = jmespath.search('CredOuv.V', line) or ''
+    MtRAR3112 = jmespath.search('MtRAR3112.V', line) or ''
+    ArtSpe_Code = jmespath.search('ArtSpe.V', line) or 'empty'
+    ArtSpe = codes_artspe[ArtSpe_Code]
+    
+    # # Debug:
+    # if Section == 'ERROR':
+    #     print(ContNat)
+    #     print(Section_Code)
+    #     print(Section)
+    
+    return {
+        'BGT_NATDEC': config['NatDec'],
+        # 'BGT_NUMDEC': '',
+        'BGT_ANNEE': config['Exer'],
+        'BGT_SIRET': config['IdEtab'],
+        'BGT_NOM': config['LibelleColl'],
+        'BGT_CONTNAT': ContNat,
+        'BGT_CONTNAT_LABEL': ContNat_Label,
+        'BGT_NATURE': Nature,
+        'BGT_NATURE_LABEL': Nature_Label,
+        'BGT_FONCTION': Fonction,
+        'BGT_FONCTION_LABEL': Fonction_Label,
+        'BGT_OPERATION': Operation,
+        'BGT_SECTION': Section,
+        'BGT_OPBUDG': OpBudg,
+        'BGT_CODRD': CodRD,
+        'BGT_MTREAL': MtReal,
+        'BGT_MTBUDGPREC': MtBudgPrec,
+        'BGT_MTRARPREC': MtRARPrec,
+        'BGT_MTPROPNOUV': MtPropNouv,
+        'BGT_MTPREV': MtPrev,
+        'BGT_CREDOUV': CredOuv,
+        'BGT_MTRAR3112': MtRAR3112,
+        'BGT_ARTSPE': ArtSpe,
+    }
+
+
+def totem2csv(xml_file):
+    """Convert a Totem XML file to CSV according SCDL schema"""
+    
+    filename = xml_file[:-4]
+    csv_file = filename + '.csv'
+
+    # Read XML file
+    with open(xml_file, 'r') as xf:
+        xml = xf.read()
+    xml_dict = xmltodict.parse(xml, attr_prefix='', force_list=False)
+
+    # Define plan de compte
+    year = jmespath.search(paths['budget_year'], xml_dict)
+    nomenclature = jmespath.search(paths['budget_nomenclature'], xml_dict).split('-', 1)
+    pdc_url = pdc_root_url + '/{year}/{n1}/{n2}/planDeCompte.xml'.format(year=year, n1=nomenclature[0], n2=nomenclature[1])
+    print("Plan de compte: {pdc_url}".format(pdc_url=pdc_url))
+
+    # Get XML plan de compte from pdc_url
+    pdc_response = requests.get(pdc_url)
+    pdc_dict = xmltodict.parse(pdc_response.content, attr_prefix='', force_list=True)
+
+    # Read plan de compte: get "nature chapitres", "nature comptes" et "fonction references"
+    nomenclature_nature_chapitres = jmespath.search(paths['nomenclature_nature_chapitres'], pdc_dict)
+    nomenclature_nature_comptes = jmespath.search(paths['nomenclature_nature_comptes'], pdc_dict)
+    nomenclature_fonction_references = jmespath.search(paths['nomenclature_fonction_references'], pdc_dict)
+    # Get "nature comptes" and "fonction references" lists
+    nature_comptes = get_children(nomenclature_nature_comptes, 'Compte', paths['comptes'], [])
+    fonction_references = get_children(nomenclature_fonction_references, 'RefFonc', paths['references_fonctionnelles'], [])
+
+    # Get main informations from XML header
+    NatDec_Code = jmespath.search(paths['budget_natdec'], xml_dict) or 'ERROR'
+    NatDec = codes_natdec[NatDec_Code]
+    Exer = jmespath.search(paths['budget_year'], xml_dict) or 'ERROR'
+    IdEtab = jmespath.search(paths['budget_idetab'], xml_dict) or 'ERROR'
+    LibelleColl = jmespath.search(paths['budget_libellecoll'], xml_dict) or 'ERROR'
+    
+    config = {
+        'nomenclature_nature_chapitres': nomenclature_nature_chapitres,
+        'nature_comptes': nature_comptes,
+        'fonction_references': fonction_references,
+        'NatDec': NatDec,
+        'Exer': Exer,
+        'IdEtab': IdEtab,
+        'LibelleColl': LibelleColl,
+    }
+
+    # Get budget lines
+    budget_lines = jmespath.search(paths['budget_lines'], xml_dict)
+    nb_xml_lines = len(budget_lines)
+    print("{nb_xml_lines} lignes de budget dans le fichier {xml_file}.".format(nb_xml_lines=nb_xml_lines, xml_file=xml_file))
+    
+    # Open CSV file to save lines
+    with open(csv_file, 'w', newline='', encoding="utf-8") as cf:
+        csv_writer = csv.DictWriter(cf, fieldnames=csv_header, delimiter=',', quoting=csv.QUOTE_NONNUMERIC)
+        csv_writer.writeheader()
+        nb_csv_lines = 0
+
+        # Generate and save CSV line 
+        # Cf. process details in https://gitlab.com/datafin/totem/-/blob/master/totem2csv/xsl/totem2xmlcsv.xsl
+        for id_line, line in enumerate(budget_lines):
+            
+            nb_csv_lines += 1
+            print('Traitement et enregistrement de la ligne {id_line}/{nb_xml_lines}'.format(id_line=id_line+1, nb_xml_lines=nb_xml_lines))
+        
+            csv_line = line2csv(line, config)
+            csv_writer.writerow(csv_line)
+
+    print("{nb_csv_lines} lignes enregistrées dans le fichier {csv_file} sur {nb_xml_lines}.".format(nb_csv_lines=nb_csv_lines, csv_file=csv_file, nb_xml_lines=nb_xml_lines))
+
+
+def main(argv):
+    if len(argv) == 0:
+        print('No XML file in command line')
+        sys.exit()
+    
+    for xml_file in argv:
+        if not os.path.isfile(xml_file) or not xml_file.endswith('.xml'):
+            print('File {xml_file} not exist or is not XML file'.format(xml_file=xml_file))
+            sys.exit()
+        
+        totem2csv(xml_file)
+
+
+if __name__ == "__main__":
+   main(sys.argv[1:])
+