commit
eab4a985cc
22 changed files with 2139 additions and 0 deletions
@ -0,0 +1,121 @@
|
||||
*.md |
||||
*.MD |
||||
*.XML |
||||
*.xml |
||||
*.XSD |
||||
*.xsd |
||||
*.p7m |
||||
*.P7M |
||||
*.PEM |
||||
*.pem |
||||
*.xslt |
||||
*.XSLT |
||||
*.html |
||||
*.HTML |
||||
*.PDF |
||||
*.pdf |
||||
|
||||
# Byte-compiled / optimized / DLL files |
||||
__pycache__/ |
||||
*.py[cod] |
||||
*$py.class |
||||
|
||||
# C extensions |
||||
*.so |
||||
|
||||
# Distribution / packaging |
||||
.Python |
||||
build/ |
||||
develop-eggs/ |
||||
dist/ |
||||
downloads/ |
||||
eggs/ |
||||
.eggs/ |
||||
lib/ |
||||
lib64/ |
||||
parts/ |
||||
sdist/ |
||||
var/ |
||||
wheels/ |
||||
*.egg-info/ |
||||
.installed.cfg |
||||
*.egg |
||||
MANIFEST |
||||
|
||||
# PyInstaller |
||||
# Usually these files are written by a python script from a template |
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it. |
||||
*.manifest |
||||
*.spec |
||||
|
||||
# Installer logs |
||||
pip-log.txt |
||||
pip-delete-this-directory.txt |
||||
|
||||
# Unit test / coverage reports |
||||
htmlcov/ |
||||
.tox/ |
||||
.coverage |
||||
.coverage.* |
||||
.cache |
||||
nosetests.xml |
||||
coverage.xml |
||||
*.cover |
||||
.hypothesis/ |
||||
|
||||
# Translations |
||||
*.mo |
||||
*.pot |
||||
|
||||
# Django stuff: |
||||
*.log |
||||
.static_storage/ |
||||
.media/ |
||||
local_settings.py |
||||
|
||||
# Flask stuff: |
||||
instance/ |
||||
.webassets-cache |
||||
|
||||
# Scrapy stuff: |
||||
.scrapy |
||||
|
||||
# Sphinx documentation |
||||
docs/_build/ |
||||
|
||||
# PyBuilder |
||||
target/ |
||||
|
||||
# Jupyter Notebook |
||||
.ipynb_checkpoints |
||||
|
||||
# pyenv |
||||
.python-version |
||||
|
||||
# celery beat schedule file |
||||
celerybeat-schedule |
||||
|
||||
# SageMath parsed files |
||||
*.sage.py |
||||
|
||||
# Environments |
||||
.env |
||||
.venv |
||||
env/ |
||||
venv/ |
||||
ENV/ |
||||
env.bak/ |
||||
venv.bak/ |
||||
|
||||
# Spyder project settings |
||||
.spyderproject |
||||
.spyproject |
||||
|
||||
# Rope project settings |
||||
.ropeproject |
||||
|
||||
# mkdocs documentation |
||||
/site |
||||
|
||||
# mypy |
||||
.mypy_cache/ |
@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env make
|
||||
|
||||
#
|
||||
# Makefile
|
||||
#
|
||||
# Copyright (C) 2019 Franco Masotti <franco.masotti@live.com>
|
||||
#
|
||||
# This file is part of fattura-elettronica-reader.
|
||||
#
|
||||
# fattura-elettronica-reader is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# fattura-elettronica-reader is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with fattura-elettronica-reader. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
default: pep doc test |
||||
|
||||
githook: |
||||
git config core.hooksPath .githooks
|
||||
|
||||
pep: |
||||
yapf --style '{based_on_style: pep8; indent_width: 4}' -i fattura_pa_reader/*.py tests/*.py
|
||||
flake8 fattura_pa_reader/*.py --ignore=F401,E501 tests/*.py
|
||||
|
||||
doc: |
||||
$(MAKE) -C docs html
|
||||
|
||||
install: |
||||
pip install .
|
||||
|
||||
test: |
||||
python setup.py test
|
||||
|
||||
uninstall: |
||||
pip uninstall md_toc
|
||||
|
||||
dist: |
||||
python setup.py sdist
|
||||
python setup.py bdist_wheel
|
||||
|
||||
upload: |
||||
twine upload dist/*
|
||||
|
||||
clean: |
||||
rm -rf build dist *.egg-info
|
||||
|
||||
.PHONY: default pep doc install test uninstall dist upload clean |
@ -0,0 +1,73 @@
|
||||
fattura-elettronica-reader |
||||
========================== |
||||
|
||||
|license| |pyver| |gitter| |
||||
|
||||
.. |license| image:: https://img.shields.io/pypi/l/fattura-elettronica-reader.svg?color=blue |
||||
:alt: PyPI - License |
||||
:target: https://raw.githubusercontent.com/frnmst/fattura-elettronica-reader/master/LICENSE.txt |
||||
|
||||
.. |pyver| image:: https://img.shields.io/pypi/pyversions/fattura-elettronica-reader.svg |
||||
:alt: PyPI - Python Version |
||||
|
||||
.. |gitter| image:: https://badges.gitter.im/fattura-elettronica-reader/community.svg |
||||
:alt: Gitter |
||||
:target: https://gitter.im/fattura-elettronica-reader/community |
||||
|
||||
Validate, extract, and generate printables of electronic invoice files received |
||||
from the "Sistema di Interscambio". |
||||
|
||||
NOTE |
||||
---- |
||||
|
||||
This software is in a pre-alpha stage: it is NOT ready for production use. |
||||
|
||||
All contributions are welcome. |
||||
See http://frnmst.github.io/fattura-elettronica-reader/contributing.html#todo-and-fixme |
||||
|
||||
Discussion here: https://gitter.im/fattura-elettronica-reader/community |
||||
|
||||
Video |
||||
----- |
||||
|
||||
TODO |
||||
|
||||
Documentation |
||||
------------- |
||||
|
||||
http://frnmst.github.io/fattura-elettronica-reader |
||||
|
||||
API examples |
||||
------------ |
||||
|
||||
TODO |
||||
|
||||
CLI helps |
||||
--------- |
||||
|
||||
|
||||
:: |
||||
|
||||
|
||||
$ fattura_elettronica_reader --help |
||||
|
||||
|
||||
License |
||||
------- |
||||
|
||||
Copyright (c) 2018 Enio Carboni - Italy |
||||
Copyright (C) 2019 frnmst (Franco Masotti) <franco.masotti@live.com> |
||||
|
||||
fattura-elettronica-reader is free software: you can redistribute it and/or modify |
||||
it under the terms of the GNU General Public License as published by |
||||
the Free Software Foundation, either version 3 of the License, or |
||||
(at your option) any later version. |
||||
|
||||
fattura-elettronica-reader is distributed in the hope that it will be useful, |
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
GNU General Public License for more details. |
||||
|
||||
You should have received a copy of the GNU General Public License |
||||
along with fattura-elettronica-reader. If not, see <http://www.gnu.org/licenses/>. |
||||
|
@ -0,0 +1,20 @@
|
||||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line.
|
||||
SPHINXOPTS =
|
||||
SPHINXBUILD = sphinx-build
|
||||
SPHINXPROJ = md-toc
|
||||
SOURCEDIR = .
|
||||
BUILDDIR = _build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help: |
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile |
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile |
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
@ -0,0 +1,44 @@
|
||||
Developer Interface |
||||
=================== |
||||
|
||||
.. module:: fattura_elettronica_reader |
||||
|
||||
Main Interface |
||||
-------------- |
||||
|
||||
Examples for the most relevant api functions can be viewed in the test |
||||
file. fattura_elettronica_reader's API uses `type hints`_ instead of assertions to check input and |
||||
output types. |
||||
|
||||
.. _type hints: https://docs.python.org/3/library/typing.html |
||||
|
||||
.. autofunction:: is_xml_file_conforming_to_schema |
||||
.. autofunction:: parse_xml_file |
||||
.. autofunction:: get_invoice_filename |
||||
.. autofunction:: is_invoice_file_signed |
||||
.. autofunction:: invoice_file_checksum_matches |
||||
.. autofunction:: get_remote_file |
||||
.. autofunction:: get_ca_certificates |
||||
.. autofunction:: is_invoice_file_authentic |
||||
.. autofunction:: remove_signature_from_invoice_file |
||||
.. autofunction:: extract_attachments_from_invoice_file |
||||
.. autofunction:: get_invoice_as_html |
||||
.. autofunction:: patch_invoice_schema_file |
||||
.. autofunction:: create_appdirs |
||||
.. autofunction:: define_appdirs_user_data_dir_file_path |
||||
.. autofunction:: define_appdirs_user_config_dir_file_path |
||||
.. autofunction:: write_configuration_file |
||||
.. autofunction:: load_configuration |
||||
.. autofunction:: pipeline |
||||
|
||||
Exceptions |
||||
---------- |
||||
|
||||
.. autoexception:: InvoiceFileDoesNotHaveACoherentCryptographicalSignature |
||||
.. autoexception:: InvoiceFileChecksumFailed |
||||
.. autoexception:: InvoiceFileNotAuthentic |
||||
.. autoexception:: CannotExtractOriginalInvoiceFile |
||||
.. autoexception:: MissingTagInMetadataFile |
||||
.. autoexception:: XMLFileNotConformingToSchema |
||||
.. autoexception:: ExtractedAttachmentNotInExtensionWhitelist |
||||
.. autoexception:: ExtractedAttachmentNotInFileTypeWhitelist |
@ -0,0 +1,181 @@
|
||||
# -*- coding: utf-8 -*- |
||||
# |
||||
# Configuration file for the Sphinx documentation builder. |
||||
# |
||||
# This file does only contain a selection of the most common options. For a |
||||
# full list see the documentation: |
||||
# http://www.sphinx-doc.org/en/master/config |
||||
|
||||
# -- Path setup -------------------------------------------------------------- |
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory, |
||||
# add these directories to sys.path here. If the directory is relative to the |
||||
# documentation root, use os.path.abspath to make it absolute, like shown here. |
||||
# |
||||
import os |
||||
import sys |
||||
sys.path.insert(0, os.path.abspath('..')) |
||||
|
||||
|
||||
# -- Project information ----------------------------------------------------- |
||||
|
||||
project = 'fattura-elettronica-reader' |
||||
copyright = '2019, Franco Masotti' |
||||
author = 'Franco Masotti' |
||||
|
||||
# The short X.Y version |
||||
version = '0.0.1' |
||||
# The full version, including alpha/beta/rc tags |
||||
release = '0.0.1' |
||||
|
||||
|
||||
# -- General configuration --------------------------------------------------- |
||||
|
||||
# If your documentation needs a minimal Sphinx version, state it here. |
||||
# |
||||
# needs_sphinx = '1.0' |
||||
|
||||
# Add any Sphinx extension module names here, as strings. They can be |
||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom |
||||
# ones. |
||||
extensions = [ |
||||
'sphinx.ext.autodoc', |
||||
'sphinx.ext.coverage', |
||||
'sphinx.ext.imgmath', |
||||
'sphinx.ext.viewcode', |
||||
'sphinx.ext.githubpages', |
||||
] |
||||
|
||||
# Add any paths that contain templates here, relative to this directory. |
||||
templates_path = ['_templates'] |
||||
|
||||
# The suffix(es) of source filenames. |
||||
# You can specify multiple suffix as a list of string: |
||||
# |
||||
# source_suffix = ['.rst', '.md'] |
||||
source_suffix = '.rst' |
||||
|
||||
# The master toctree document. |
||||
master_doc = 'index' |
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation |
||||
# for a list of supported languages. |
||||
# |
||||
# This is also used if you do content translation via gettext catalogs. |
||||
# Usually you set "language" from the command line for these cases. |
||||
language = None |
||||
|
||||
# List of patterns, relative to source directory, that match files and |
||||
# directories to ignore when looking for source files. |
||||
# This pattern also affects html_static_path and html_extra_path. |
||||
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] |
||||
|
||||
# The name of the Pygments (syntax highlighting) style to use. |
||||
pygments_style = None |
||||
|
||||
|
||||
# -- Options for HTML output ------------------------------------------------- |
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for |
||||
# a list of builtin themes. |
||||
# |
||||
html_theme = 'alabaster' |
||||
|
||||
# Theme options are theme-specific and customize the look and feel of a theme |
||||
# further. For a list of options available for each theme, see the |
||||
# documentation. |
||||
# |
||||
# html_theme_options = {} |
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here, |
||||
# relative to this directory. They are copied after the builtin static files, |
||||
# so a file named "default.css" will overwrite the builtin "default.css". |
||||
html_static_path = ['_static'] |
||||
|
||||
# Custom sidebar templates, must be a dictionary that maps document names |
||||
# to template names. |
||||
# |
||||
# The default sidebars (for documents that don't match any pattern) are |
||||
# defined by theme itself. Builtin themes are using these templates by |
||||
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', |
||||
# 'searchbox.html']``. |
||||
# |
||||
# html_sidebars = {} |
||||
|
||||
|
||||
# -- Options for HTMLHelp output --------------------------------------------- |
||||
|
||||
# Output file base name for HTML help builder. |
||||
htmlhelp_basename = 'fattura-elettronica-readerdoc' |
||||
|
||||
|
||||
# -- Options for LaTeX output ------------------------------------------------ |
||||
|
||||
latex_elements = { |
||||
# The paper size ('letterpaper' or 'a4paper'). |
||||
# |
||||
# 'papersize': 'letterpaper', |
||||
|
||||
# The font size ('10pt', '11pt' or '12pt'). |
||||
# |
||||
# 'pointsize': '10pt', |
||||
|
||||
# Additional stuff for the LaTeX preamble. |
||||
# |
||||
# 'preamble': '', |
||||
|
||||
# Latex figure (float) alignment |
||||
# |
||||
# 'figure_align': 'htbp', |
||||
} |
||||
|
||||
# Grouping the document tree into LaTeX files. List of tuples |
||||
# (source start file, target name, title, |
||||
# author, documentclass [howto, manual, or own class]). |
||||
latex_documents = [ |
||||
(master_doc, 'fattura-elettronica-reader.tex', 'fattura-elettronica-reader Documentation', |
||||
'Franco Masotti', 'manual'), |
||||
] |
||||
|
||||
|
||||
# -- Options for manual page output ------------------------------------------ |
||||
|
||||
# One entry per manual page. List of tuples |
||||
# (source start file, name, description, authors, manual section). |
||||
man_pages = [ |
||||
(master_doc, 'fattura-elettronica-reader', 'fattura-elettronica-reader Documentation', |
||||
[author], 1) |
||||
] |
||||
|
||||
|
||||
# -- Options for Texinfo output ---------------------------------------------- |
||||
|
||||
# Grouping the document tree into Texinfo files. List of tuples |
||||
# (source start file, target name, title, author, |
||||
# dir menu entry, description, category) |
||||
texinfo_documents = [ |
||||
(master_doc, 'fattura-elettronica-reader', 'fattura-elettronica-reader Documentation', |
||||
author, 'fattura-elettronica-reader', 'One line description of project.', |
||||
'Miscellaneous'), |
||||
] |
||||
|
||||
|
||||
# -- Options for Epub output ------------------------------------------------- |
||||
|
||||
# Bibliographic Dublin Core info. |
||||
epub_title = project |
||||
|
||||
# The unique identifier of the text. This can be a ISBN number |
||||
# or the project homepage. |
||||
# |
||||
# epub_identifier = '' |
||||
|
||||
# A unique identification for the text. |
||||
# |
||||
# epub_uid = '' |
||||
|
||||
# A list of files that should not be packed into the epub file. |
||||
epub_exclude_files = ['search.html'] |
||||
|
||||
|
||||
# -- Extension configuration ------------------------------------------------- |
@ -0,0 +1,177 @@
|
||||
Considerations |
||||
============== |
||||
|
||||
Purpose |
||||
------- |
||||
|
||||
This software aims at: |
||||
|
||||
- correctness |
||||
|
||||
- XML files are validated whenever possible |
||||
|
||||
- integrity |
||||
|
||||
- a checksum is performed on the invoice file |
||||
|
||||
- security |
||||
|
||||
- all cryptographical signature and certificates must be valid |
||||
- invoice attachments are examined before saving them permanently |
||||
|
||||
- privacy |
||||
|
||||
- works offline once you have downloaded the necessary files |
||||
|
||||
Pipeline |
||||
-------- |
||||
|
||||
See the definition of pipeline: |
||||
|
||||
- https://en.wikipedia.org/wiki/Pipeline_(software) |
||||
|
||||
In this software, a pipeline is a sequence of steps that given the signed invoice file |
||||
and its metadata file makes it possible to obtain: |
||||
|
||||
- various integrity and authenticity verifications |
||||
- the html output of the invoice file |
||||
- the extraction of embedded attachments |
||||
|
||||
Description |
||||
``````````` |
||||
|
||||
Without entering in too much detail (you can read the source code), we can divide |
||||
the pipeline into three main steps: |
||||
|
||||
=========== =================================================== |
||||
Step number Actions |
||||
=========== =================================================== |
||||
1 compare metadata file content with the invoice file |
||||
2 check invoice signature and signer's certificate |
||||
3 generate outputs |
||||
=========== =================================================== |
||||
|
||||
Software dependencies |
||||
--------------------- |
||||
|
||||
OpenSSL |
||||
``````` |
||||
|
||||
Support |
||||
~~~~~~~ |
||||
|
||||
Due to the lack of support for most of the PKCS#7 functionality in |
||||
PyOpenSSL, all OpenSSL operations are performed using the ``subprocess`` |
||||
module which calls the ``openssl`` binary installed on the system. |
||||
|
||||
See also: |
||||
|
||||
- https://stackoverflow.com/questions/45104923/pyopenssls-pkcs7-object-provide-very-little-information-how-can-i-get-the-sha1/45111623#45111623 |
||||
- https://stackoverflow.com/questions/45782506/is-there-any-python-package-for-parsing-pkcs7?rq=1 |
||||
|
||||
Bug |
||||
~~~ |
||||
|
||||
There are problems with recent versions of OpenSSL concerning PKCS#7 file decoding: |
||||
|
||||
- https://github.com/eniocarboni/p7m/issues/4 |
||||
- https://github.com/eniocarboni/p7m/issues/7 |
||||
- https://www.mail-archive.com/openssl-users@openssl.org/msg85901.html |
||||
|
||||
A possible solution it use an older system. If you really trust the file you can |
||||
disable signature checking with the appropriate option. I strongly discourage |
||||
this last solution since if you can't prove its authenticity the invoice has no legal |
||||
value. |
||||
|
||||
lxml vs defusedxml |
||||
~~~~~~~~~~~~~~~~~~ |
||||
|
||||
I decided to use lxml because it supports XML stylesheets (XSLT), something that |
||||
defusedxml does not. At first, using defusedxml seemed the best bet because of |
||||
the increased security: |
||||
|
||||
- https://github.com/tiran/defusedxml#python-xml-libraries |
||||
|
||||
There is the possibility to use defusedxml anyway by simply |
||||
editing the API file like this: |
||||
|
||||
|
||||
:: |
||||
|
||||
|
||||
import defusedxml.ElementTree as ET |
||||
|
||||
|
||||
instead of: |
||||
|
||||
|
||||
:: |
||||
|
||||
|
||||
import lxml.etree as ET |
||||
|
||||
|
||||
You must also add defusedxml and re-install the requirements as described |
||||
in the contributing section. |
||||
|
||||
|
||||
:: |
||||
|
||||
|
||||
defusedxml==0.5.0 |
||||
|
||||
|
||||
Official documentation |
||||
---------------------- |
||||
|
||||
- this seems to be legal mumbo jumbo but there might be relevant information: |
||||
|
||||
- https://www.fatturapa.gov.it/export/fatturazione/it/normativa/f-1.htm |
||||
|
||||
- https://www.fatturapa.gov.it/export/fatturazione/it/normativa/f-2.htm |
||||
|
||||
- https://www.fatturapa.gov.it/export/fatturazione/it/normativa/f-3.htm |
||||
|
||||
XML schemas for document validations |
||||
```````````````````````````````````` |
||||
|
||||
Most of XML schemas seem to be non-existing. Only the one for the |
||||
invoice file was found, and was reported by lxml as incorrect, |
||||
but according to the server the schema was last modified on |
||||
``Tue, 25 Jun 2019 10:16:31 GMT``, so they fixed the offending ``xsd`` typo: |
||||
|
||||
- https://www.fatturapa.gov.it/export/fatturazione/sdi/fatturapa/v1.2.1/Schema_del_file_xml_FatturaPA_versione_1.2.1.xsd |
||||
|
||||
Not having access to all schema file is a problem since there is no way to tell if |
||||
|
||||
- the metadata file, |
||||
- the trusted list file, |
||||
- the XML stylesheet file |
||||
|
||||
are correct and conforming to specifications. |
||||
|
||||
If you find these files please let me know and/or open a pull request. |
||||
|
||||
Downloading of the W3C file |
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
||||
|
||||
The W3C schema file is a dependency for the invoice schema file and it needs to be downloaded |
||||
separately. For some reason the downloading of this file takes a few seconds. |
||||
|
||||
Technical differences between "Fattura PA", "Fattura B2B" and other |
||||
``````````````````````````````````````````````````````````````````` |
||||
|
||||
Digital signature |
||||
~~~~~~~~~~~~~~~~~ |
||||
|
||||
Some websites say that digital signature of the "Fattura PA" is compulsory and |
||||
in other cases it is not. |
||||
|
||||
If you find any official source please let me know and/or open a pull request. |
||||
|
||||
Closing note |
||||
------------ |
||||
|
||||
Although this whole system has its merits, it has been put into production |
||||
with lots of missing pieces (see notes above). Given the importance of it I think this |
||||
is unacceptable. I will not make any more comments because I may risk legal actions... |
@ -0,0 +1,116 @@
|
||||
Contributing |
||||
============ |
||||
|
||||
Git branches |
||||
------------ |
||||
|
||||
What follows is a table of the git branches used in fattura-elettronica-reader's repository. |
||||
Please, do NOT open pull requests on the ``master``, ``dev`` or ``gh-pages`` branches. |
||||
Use ``bugfix-${fix_name}`` or ``newfeature-${new_feature_name}`` instead. |
||||
|
||||
===================================== ==================================================== ============================== |
||||
Branch Description Update schedule |
||||
===================================== ==================================================== ============================== |
||||
``master`` the main branch every new release |
||||
``dev`` recent changes are merged here before a new release at will |
||||
``gh-pages`` contains the built documentation only every new release |
||||
``bugfix-${fix_name}`` a generic bug fix |
||||
``newfeature-${new_feature_name}`` a generic new feature |
||||
===================================== ==================================================== ============================== |
||||
|
||||
Dependencies |
||||
------------ |
||||
|
||||
First of all install the software requirements from the ``requirements.txt`` file |
||||
in the repository's root: |
||||
|
||||
|
||||
:: |
||||
|
||||
|
||||
$ pip install -r requirements.txt |
||||
|
||||
|
||||
Unit tests |
||||
---------- |
||||
|
||||
If you have changed parts of the source code you MUST take care of adding |
||||
the corresponding unit tests. Once you have done that run the following command |
||||
in a terminal: |
||||
|
||||
|
||||
:: |
||||
|
||||
|
||||
$ python setup.py test |
||||
|
||||
|
||||
or simply: |
||||
|
||||
|
||||
:: |
||||
|
||||
$ make test |
||||
|
||||
|
||||
You can also add this command before every git commit as by running: |
||||
|
||||
|
||||
:: |
||||
|
||||
$ make githook |
||||
|
||||
|
||||
Python PEP compliancy |
||||
--------------------- |
||||
|
||||
To be able to lint and test for PEP compliancy you need to run: |
||||
|
||||
|
||||
:: |
||||
|
||||
|
||||
$ make pep |
||||
|
||||
|
||||
Documentation |
||||
------------- |
||||
|
||||
You can edit and rebuild all this documentation with: |
||||
|
||||
|
||||
:: |
||||
|
||||
|
||||
$ make doc |
||||
|
||||
|
||||
TODO and FIXME |
||||
-------------- |
||||
|
||||
Go in the repository's root and then: |
||||
|
||||
|
||||
:: |
||||
|
||||
grep -e TODO -e FIXME -n */*.py |
||||
|
||||
|
||||
Important stuff |
||||
``````````````` |
||||
- unit tests: I guess we should use the examples from the government's website, |
||||
if the license permits it. |
||||
- GUI: for example https://github.com/chriskiehl/Gooey |
||||
- i18n (both CLI and GUI): https://docs.python.org/3/library/gettext.html |
||||
|
||||
Contribution Steps |
||||
------------------ |
||||
|
||||
1. clone the repository |
||||
2. install the requirements |
||||
3. write code |
||||
4. write unit tests |
||||
5. run tests |
||||
6. run PEP linter and check |
||||
7. update relevant documentation, if necessary |
||||
8. pull request |
@ -0,0 +1,18 @@
|
||||
Copyright and License |
||||
===================== |
||||
|
||||
Copyright (c) 2018 Enio Carboni - Italy |
||||
Copyright (C) 2019 frnmst (Franco Masotti) <franco.masotti@live.com> |
||||
|
||||
fattura-elettronica-reader is free software: you can redistribute it and/or modify |
||||
it under the terms of the GNU General Public License as published by |
||||
the Free Software Foundation, either version 3 of the License, or |
||||
(at your option) any later version. |
||||
|
||||
fattura-elettronica-reader is distributed in the hope that it will be useful, |
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
GNU General Public License for more details. |
||||
|
||||
You should have received a copy of the GNU General Public License |
||||
along with fattura-elettronica-reader. If not, see <http://www.gnu.org/licenses/>. |
@ -0,0 +1,43 @@
|
||||
Examples |
||||
======== |
||||
|
||||
CLI |
||||
--- |
||||
|
||||
You want everything, you trust nothing |
||||
`````````````````````````````````````` |
||||
|
||||
|
||||
:: |
||||
|
||||
|
||||
$ fattura_elettronica_reader -A ${metadata_file} |
||||
|
||||
|
||||
which is the same as |
||||
|
||||
|
||||
:: |
||||
|
||||
|
||||
$ fattura_elettronica_reader -H -a -o ${metadata_file} |
||||
|
||||
|
||||
You want everything, you trust everything |
||||
````````````````````````````````````````` |
||||
|
||||
|
||||
:: |
||||
|
||||
|
||||
$ fattura_elettronica_reader -SsAkwW ${metadata_file} |
||||
|
||||
|
||||
which is the same as |
||||
|
||||
|
||||
:: |
||||
|
||||
|
||||
$ fattura_elettronica_reader -H -a -o -S -s -w -W ${metadata_file} |
||||
|
@ -0,0 +1,25 @@
|
||||
.. fattura-pa-reader documentation master file, created by |
||||
sphinx-quickstart on Tue Apr 23 16:24:44 2019. |
||||
You can adapt this file completely to your liking, but it should at least |
||||
contain the root `toctree` directive. |
||||
|
||||
Welcome to fattura-pa-reader's documentation! |
||||
============================================= |
||||
|
||||
.. toctree:: |
||||
:maxdepth: 2 |
||||
:caption: Contents: |
||||
|
||||
install |
||||
api |
||||
considerations |
||||
examples |
||||
contributing |
||||
copyright_license |
||||
|
||||
Indices and tables |
||||
================== |
||||
|
||||
* :ref:`genindex` |
||||
* :ref:`modindex` |
||||
* :ref:`search` |
@ -0,0 +1,41 @@
|
||||
Installation |
||||
============ |
||||
|
||||
Run the following command in either a root or normal terminal (depending on |
||||
your Python setup) from the root directory of the project's cloned repository, |
||||
|
||||
:: |
||||
|
||||
pip install . |
||||
|
||||
or simply |
||||
|
||||
:: |
||||
|
||||
make install |
||||
|
||||
|
||||
You can also install md_toc via pip (i.e: without having to download the source |
||||
code): |
||||
|
||||
:: |
||||
|
||||
pip install fattura_elettronica_reader |
||||
|
||||
|
||||
All the necessary dependencies should be installed automatically along with the |
||||
program. |
||||
|
||||
Distribution packages |
||||
--------------------- |
||||
|
||||
- A ``PKGBUILD`` for Arch Linux like distributions is available under |
||||
the ``./packages/aur`` directory as well as on the AUR website. |
||||
|
||||
|
||||
Dependencies |
||||
------------ |
||||
|
||||
- Python 3.5 |
||||
|
||||
|
@ -0,0 +1,36 @@
|
||||
# |
||||
# __init__.py |
||||
# |
||||
# Copyright (c) 2018 Enio Carboni - Italy |
||||
# Copyright (C) 2017-2019 Franco Masotti <franco.masotti@live.com> |
||||
# |
||||
# This file is part of fattura-elettronica-reader. |
||||
# |
||||
# fattura-elettronica-reader is free software: you can redistribute it and/or modify |
||||
# it under the terms of the GNU General Public License as published by |
||||
# the Free Software Foundation, either version 3 of the License, or |
||||
# (at your option) any later version. |
||||
# |
||||
# fattura-elettronica-reader is distributed in the hope that it will be useful, |
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
# GNU General Public License for more details. |
||||
# |
||||
# You should have received a copy of the GNU General Public License |
||||
# along with fattura-elettronica-reader. If not, see <http://www.gnu.org/licenses/>. |
||||
# |
||||
"""Python discovery file.""" |
||||
|
||||
from .api import ( |
||||
is_xml_file_conforming_to_schema, parse_xml_file, get_invoice_filename, |
||||
is_invoice_file_signed, invoice_file_checksum_matches, get_remote_file, |
||||
get_ca_certificates, is_invoice_file_authentic, remove_signature_from_invoice_file, |
||||
extract_attachments_from_invoice_file, get_invoice_as_html, patch_invoice_schema_file, |
||||
create_appdirs, define_appdirs_user_data_dir_file_path, define_appdirs_user_config_dir_file_path, |
||||
write_configuration_file, load_configuration, pipeline) |
||||
from .cli import (CliInterface) |
||||
from .exceptions import (InvoiceFileDoesNotHaveACoherentCryptographicalSignature, |
||||
InvoiceFileChecksumFailed, InvoiceFileNotAuthentic, |
||||
CannotExtractOriginalInvoiceFile, MissingTagInMetadataFile, |
||||
XMLFileNotConformingToSchema, ExtractedAttachmentNotInExtensionWhitelist, |
||||
ExtractedAttachmentNotInFileTypeWhitelist) |
@ -0,0 +1,46 @@
|
||||
# |
||||
# __main__.py |
||||
# |
||||
# Copyright (c) 2018 Enio Carboni - Italy |
||||
# Copyright (C) 2019 Franco Masotti <franco.masotti@live.com> |
||||
# |
||||
# This file is part of fattura-elettronica-reader. |
||||
# |
||||
# fattura-elettronica-reader is free software: you can redistribute it and/or modify |
||||
# it under the terms of the GNU General Public License as published by |
||||
# the Free Software Foundation, either version 3 of the License, or |
||||
# (at your option) any later version. |
||||
# |
||||
# fattura-elettronica-reader is distributed in the hope that it will be useful, |
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
# GNU General Public License for more details. |
||||
# |
||||
# You should have received a copy of the GNU General Public License |
||||
# along with fattura-elettronica-reader. If not, see <http://www.gnu.org/licenses/>. |
||||
# |
||||
"""Call the CLI parser.""" |
||||
|
||||
import sys |
||||
import traceback |
||||
from .cli import CliInterface |
||||
|
||||
|
||||
def main(args=None): |
||||
"""Call the CLI interface and wait for the result.""" |
||||
retcode = 0 |
||||
try: |
||||
ci = CliInterface() |
||||
args = ci.parser.parse_args() |
||||
result = args.func(args) |
||||
if result is not None: |
||||
print(result) |
||||
retcode = 0 |
||||
except Exception: |
||||
retcode = 1 |
||||
traceback.print_exc() |
||||
sys.exit(retcode) |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
main() |
@ -0,0 +1,647 @@
|
||||
# |
||||
# api.py |
||||
# |
||||
# Copyright (c) 2018 Enio Carboni - Italy |
||||
# Copyright (C) 2019 Franco Masotti <franco.masotti@live.com> |
||||
# |
||||
# This file is part of fattura-pa-reader. |
||||
# |
||||
# fattura-elettronica-reader is free software: you can redistribute it and/or modify |
||||
# it under the terms of the GNU General Public License as published by |
||||
# the Free Software Foundation, either version 3 of the License, or |
||||
# (at your option) any later version. |
||||
# |
||||
# fattura-elettronica-reader is distributed in the hope that it will be useful, |
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
# GNU General Public License for more details. |
||||
# |
||||
# You should have received a copy of the GNU General Public License |
||||
# along with fattura-elettronica-reader. If not, see <http://www.gnu.org/licenses/>. |
||||
# |
||||
"""The main file.""" |
||||
|
||||
import subprocess |
||||
import shlex |
||||
import lxml.etree as ET |
||||
import hashlib |
||||
import requests |
||||
import base64 |
||||
import pathlib |
||||
import tempfile |
||||
import shutil |
||||
import atomicwrites |
||||
import filetype |
||||
import appdirs |
||||
import configparser |
||||
from .exceptions import (InvoiceFileDoesNotHaveACoherentCryptographicalSignature, |
||||
InvoiceFileChecksumFailed, InvoiceFileNotAuthentic, |
||||
CannotExtractOriginalInvoiceFile, |
||||
MissingTagInMetadataFile, XMLFileNotConformingToSchema, |
||||
ExtractedAttachmentNotInExtensionWhitelist, |
||||
ExtractedAttachmentNotInFileTypeWhitelist) |
||||
from .constants import (XML, Paths, Downloads, Patch, File) |
||||
|
||||
####### |
||||
# API # |
||||
####### |
||||
|
||||
def is_xml_file_conforming_to_schema(xml_file: str, xml_schema_file: str) -> bool: |
||||
r"""Check that the XML file follows its schema. |
||||
|
||||
:param xml_file: the path of the XML file. |
||||
:param xml_schema_file: the path of the schema file. |
||||
:type xml_file: str |
||||
:type xml_schema_file: str |
||||
:returns: ``True`` if the schema is followed, ``False`` otherwise. |
||||
:rtype: bool |
||||
:raises: an lxml or a built-in exception. |
||||
""" |
||||
xmlschema_doc = ET.parse(xml_schema_file) |
||||
xmlschema = ET.XMLSchema(etree=xmlschema_doc) |
||||
return xmlschema.validate(ET.parse(xml_file)) |
||||
|
||||
def parse_xml_file(xml_file: str): |
||||
r"""Parse the XML file. |
||||
|
||||
:param xml_file: the input XML file. |
||||
:type xml_file: str |
||||
:returns: the XML root as a data structure |
||||
:rtype: ET.parse.getroot |
||||
:raises: an lxml or a built-in exception. |
||||
""" |
||||
tree = ET.parse(xml_file) |
||||
return tree.getroot() |
||||
|
||||
def get_invoice_filename(metadata_file_xml_root, |
||||
metadata_file_invoice_filename_xml_tag: str, |
||||
metadata_file_xml_namespace: str) -> str: |
||||
r"""Return the file name of the invoice file. |
||||
|
||||
:param metadata_file_xml_root: the root of the metadata XML tree. |
||||
:param metadata_file_invoice_filename_xml_tag: the tag name corresponding |
||||
to the invoice filename. |
||||
:param metadata_file_xml_namespace: the XML namespace of the metadata file. |
||||
:type metadata_file_xml_root: lxml.etree._Element |
||||
:type metadata_file_invoice_filename_xml_tag: str |
||||
:type metadata_file_xml_namespace: str |
||||
:returns: the element or ``None``, if no match is found. |
||||
:rtype: str |
||||
:raises: an lxml or a built-in exception. |
||||
""" |
||||
return metadata_file_xml_root.find(metadata_file_invoice_filename_xml_tag, |
||||
metadata_file_xml_namespace).text |
||||
|
||||
|
||||
def is_invoice_file_signed(invoice_file: str) -> bool: |
||||
r"""Check if the invoice file is signed with a PKCS#7 signature. |
||||
|
||||
:param invoice_file: the path of the invoice file. |
||||
:type invoice_file: str |
||||
:returns: True if the file is signed, False otherwise. |
||||
:rtype: bool |
||||
:raises: a subprocess or a built-in exception. |
||||
""" |
||||
command = 'openssl pkcs7 -print_certs -text -noout -inform DER -in {}'.format( |
||||
shlex.quote(invoice_file)) |
||||
return True if subprocess.run( |
||||
shlex.split(command)).returncode == 0 else False |
||||
|
||||
|
||||
def invoice_file_checksum_matches(metadata_file_xml_root, invoice_file: str, |
||||
metadata_file_invoice_checksum_xml_tag: str, |
||||
metadata_file_xml_namespace: str) -> bool: |
||||
r"""Check if the invoice checksum matches the one in the metadata file. |
||||
|
||||
:param metadata_file_xml_root: the root of the metadata XML tree. |
||||
:param invoice_file: the path of the invoice file. |
||||
:param metadata_file_invoice_checksum_xml_tag: the XML tag name |
||||
corresponding to the invoice file checksum. |
||||
:param metadata_file_xml_namespace: the XML namespace of the metadata file. |
||||
:type metadata_file_xml_root: lxml.etree._Element |
||||
:type invoice_file: str |
||||
:type metadata_file_invoice_checksum_xml_tag: str |
||||
:type metadata_file_xml_namespace: str |
||||
:returns: ``True`` if the checksum matches, ``False`` otherwise. |
||||
The expected checksum is also returned. |
||||
:rtype: tuple |
||||
:raises: a hashlib, lxml or a built-in exception. |
||||
""" |
||||
# Get the checksum from the metadata file. |
||||
expected_checksum = metadata_file_xml_root.find( |
||||
metadata_file_invoice_checksum_xml_tag, |
||||
metadata_file_xml_namespace).text |
||||
# Compute the checksum. |
||||
m = hashlib.sha256() |
||||
m.update(open(invoice_file, 'rb').read()) |
||||
computed_checksum = m.hexdigest() |
||||
|
||||
if computed_checksum == expected_checksum: |
||||
return True, expected_checksum |
||||
else: |
||||
return False, expected_checksum |
||||
|
||||
|
||||
def get_remote_file(destination: str, url: str): |
||||
r"""Download and save a remote file. |
||||
|
||||
:param destination: the local path of the downloaded file. |
||||
:param url: the remote path of the file. |
||||
:type destination: str |
||||
:type url: str |
||||
:returns: None |
||||
:rtype: None |
||||
:raises: a built-in exception or a requests error. |
||||
|
||||
.. note: requests also checks that the url is in a valid form. |
||||
""" |
||||
r = requests.get(url) |
||||
if r.ok: |
||||
with atomicwrites.atomic_write(destination, mode='wb', overwrite=True) as f: |
||||
f.write(r.content) |
||||
else: |
||||
r.raise_for_status() |
||||
|
||||
def get_ca_certificates(trusted_list_xml_root: str, |
||||
ca_certificate_pem_file: str, |
||||
trusted_list_file_xml_namespace: str, |
||||
trusted_list_file_xml_certificate_tag: str, |
||||
eol: str = '\n'): |
||||
r"""Write the CA certificates file using the trusted list file. |
||||
|
||||
:param trusted_list_file: the input file. |
||||
:param ca_certificate_pem_file: the destination file. |
||||
:param trusted_list_file_xml_namespace: the XML namespace of the |
||||
trusted list file. |
||||
:param trusted_list_file_xml_certificate_tag: the XML tag name corresponding |
||||
to the certificates in the trusted list file. |
||||
:param eol: the end of line character to be used in the PEM file. |
||||
:type trusted_list_xml_root: str |
||||
:type ca_certificate_pem_file: str |
||||
:type trusted_list_file_xml_namespace: str |
||||
:type trusted_list_file_xml_certificate_tag: str |
||||
:type eol: str |
||||
:returns: None |
||||
:rtype: None |
||||
:raises: an atomicwrites, an lxml or a built-in exception. |
||||
|
||||
.. note: See https://tools.ietf.org/html/rfc7468 |
||||
""" |
||||
preeb = '-----BEGIN CERTIFICATE-----' |
||||
posteb = '-----END CERTIFICATE-----' |
||||
max_line_len = 64 |
||||
with atomicwrites.atomic_write(ca_certificate_pem_file, mode='w', overwrite=True) as f: |
||||
# See https://lxml.de/tutorial.html#elementpath |
||||
# for the exception that gets raised. |
||||
for e in trusted_list_xml_root.iter( |
||||
'{' + trusted_list_file_xml_namespace + '}' + |
||||
trusted_list_file_xml_certificate_tag): |
||||
# This tries to follow RFC7468 even in the variable naming. |
||||
# See https://tools.ietf.org/html/rfc7468#section-3 |
||||
base64fullline = str() |
||||
for i in range(0, len(e.text), max_line_len): |
||||
_64base64char = e.text[i:i + max_line_len] |
||||
base64fullline = base64fullline + _64base64char + eol |
||||
strictbase64finl = str() |
||||
strictbase64text = base64fullline + strictbase64finl |
||||
stricttextualmsg = preeb + eol + strictbase64text + posteb + eol |
||||
f.write(stricttextualmsg) |
||||
|
||||
|
||||
def is_invoice_file_authentic(invoice_file: str, |
||||
ca_certificate_pem_file: str, |
||||
ignore_signature_check: bool = False, |
||||
ignore_signers_certificate_check: bool = False): |
||||
r"""Check authenticity of the invoice file on various levels. |
||||
|
||||
:param invoice_file: the path of the signed invoice file. |
||||
:param ca_certificate_pem_file: the certificates file in PEM format. |
||||
:param ignore_signature_check: avoid checking the signature. |
||||
Defaults to ``False``. |
||||
:param ignore_signers_certificate_check: avoid checking the signer's |
||||
certificate. Defaults to ``False``. |
||||
:type invoice_file: str |
||||
:type ca_certificate_pem_file: str |
||||
:type ignore_signature_check: bool |
||||
:type ignore_signers_certificate_check: bool |
||||
:returns: ``True`` if the operation is successful, ``False`` otherwise. |
||||
:rtype: bool |
||||
:raises: a subprocess or built-in exception. |
||||
""" |
||||
pre = str() |
||||
post = str() |
||||
if ignore_signature_check: |
||||
pre = '-nosigs' |
||||
if ignore_signers_certificate_check: |
||||
post = '-noverify' |
||||
command = ( |
||||
'openssl smime ' + pre + ' -verify ' + post + ' -CAfile {}'.format( |
||||
shlex.quote(ca_certificate_pem_file)) + ' -in {}'.format( |
||||
shlex.quote(invoice_file)) + ' -inform DER -out /dev/null') |
||||
return True if subprocess.run( |
||||
shlex.split(command)).returncode == 0 else False |
||||
|
||||
|
||||
def remove_signature_from_invoice_file(invoice_file: str, |
||||
output_file: str) -> bool: |
||||
r"""Remove signature from the signed invoice file and save the original one. |
||||
|
||||
:param invoice_file: the path of the invoice file. |
||||
:param output_file: the path of the destination file. |
||||
:type invoice_file: str |
||||
:type output_file: str |
||||
:returns: ``True`` if the operation is successful, ``False`` otherwise. |
||||
:rtype: bool |
||||
:raises: a subprocess or built-in exception. |
||||
""" |
||||
command = ('openssl smime -nosigs -verify -noverify -in {}'.format( |
||||
shlex.quote(invoice_file)) + ' -inform DER -out {}'.format( |
||||
shlex.quote(output_file))) |
||||
return True if subprocess.run( |
||||
shlex.split(command)).returncode == 0 else False |
||||
|
||||
|
||||
def extract_attachments_from_invoice_file( |
||||
invoice_file_xml_root, invoice_file_xml_attachment_xpath: str, |
||||
invoice_file_xml_attachment_tag: str, |
||||
invoice_file_xml_attachment_filename_tag: str, |
||||
invoice_file_text_encoding: str, |
||||
ignore_attachment_extension_whitelist: bool = False, |
||||
ignore_attachment_filetype_whitelist: bool = False, |
||||
attachment_extension_whitelist: list = list(), |
||||
attachment_filetype_whitelist: list = list()): |
||||
r"""Extract, decode and save possible attachments within the invoice file. |
||||
|
||||
:param invoice_file_xml_root: the original invoice file. |
||||
:param invoice_file_xml_attachment_xpath: the full path, from the XML root, |
||||
corresponding to the attachments. |
||||
:param invoice_file_xml_attachment_tag: the XML tag name corresponding to the |
||||
attachment content. |
||||
:param invoice_file_xml_attachment_filename_tag: the XML tag name |
||||
corresponing to the attachment filename. |
||||
:param invoice_file_text_encoding: the text encoding used for the |
||||
invoice file. |
||||
:param ignore_attachment_extension_whitelist: avoid cheking file extensions. |
||||
Defaults to ``False``. |
||||
:param ignore_attachment_filetype_whitelist: avoid cheking file types. |
||||
Defaults to ``False``. |
||||
:param attachment_extension_whitelist: . Defaults to ``list()``. |
||||
:param attachment_filetype_whitelist: . Defaults to ``list()``. |
||||
:type invoice_file_xml_root: str |
||||
:type invoice_file_xml_attachment_xpath: str |
||||
:type invoice_file_xml_attachment_tag: str |
||||
:type invoice_file_xml_attachment_filename_tag: str |
||||
:type invoice_file_text_encoding: str |
||||
:type ignore_attachment_extension_whitelist: bool |
||||
:type ignore_attachment_filetype_whitelist: bool |
||||
:type attachment_extension_whitelist: list |
||||
:type attachment_filetype_whitelist: list |
||||
:returns: None |
||||
:rtype: None |
||||
:raises: base64.binascii.Error, filetype, atomicwrites, or a built-in exception. |
||||
""" |
||||
for at in invoice_file_xml_root.findall(invoice_file_xml_attachment_xpath): |
||||
attachment = at.find(invoice_file_xml_attachment_tag).text |
||||
attachment_dest_path = at.find( |
||||
invoice_file_xml_attachment_filename_tag).text |
||||
|
||||
if not ignore_attachment_extension_whitelist: |
||||
if not attachment_dest_path.endswith(tuple(attachment_extension_whitelist)): |
||||
raise ExtractedAttachmentNotInExtensionWhitelist |
||||
|
||||
# b64decode accepts any bytes-like object. There should not be any |
||||
# character encoding problems since base64 characters are represented |
||||
# using the same character ids on UTF-8 and ASCII. |
||||
# Just in case that there are alien characters in the base64 string |
||||
# (sic, it happened!) we use validate=False as an option to skip them. |
||||
decoded = base64.b64decode(attachment.encode(invoice_file_text_encoding),validate=False) |
||||
if not ignore_attachment_filetype_whitelist: |
||||
# See https://h2non.github.io/filetype.py/1.0.0/filetype.m.html#filetype.filetype.get_type |
||||
if filetype.guess(decoded).mime not in attachment_filetype_whitelist: |
||||
raise ExtractedAttachmentNotInFileTypeWhitelist |
||||
|
||||
with atomicwrites.atomic_write(attachment_dest_path, mode='wb', overwrite=True) as f: |
||||
f.write(decoded) |
||||
|
||||
|
||||
def get_invoice_as_html( |
||||
invoice_file_xml_root, invoice_file_xml_stylesheet_root, |
||||
html_output_file: str, invoice_file_text_encoding: str): |
||||
r"""Transform the XML invoice file into a styled HTML file. |
||||
|
||||
:param invoice_file_xml_root: the XML tree root of the invoice file |
||||
:param invoice_file_xml_stylesheet_root: the XML tree root of the stylesheet file |
||||
:param html_output_file: the destination file. |
||||
:param invoice_file_text_encoding: the text encoding used for the |
||||
invoice file. |
||||
:type invoice_file_xml_root: lxml.etree._Element |
||||
:type invoice_file_xml_stylesheet_root: lxml.etree._Element |
||||
:type html_output_file: str |
||||
:type invoice_file_text_encoding: str |
||||
:returns: None |
||||
:rtype: None |
||||
:raises: an lxml, atomicwrites, or a built-in exception. |
||||
""" |
||||
transform = ET.XSLT(invoice_file_xml_stylesheet_root) |
||||
newdom = transform(invoice_file_xml_root) |
||||
with atomicwrites.atomic_write(html_output_file, mode='w', overwrite=True) as f: |
||||
f.write( |
||||
ET.tostring(newdom, |
||||
pretty_print=True).decode(invoice_file_text_encoding)) |
||||
|
||||
|
||||
def patch_invoice_schema_file(invoice_schema_file: str, offending_line: str, fix_line: str): |
||||
r"""Fix the error in the schema file. |
||||
|
||||
:param invoice_schema_file: the path of the schema file. |
||||
:param offending_line: the string in the schema file that needs to be changed. |
||||
:param fix_line: a string that replaces the offending line. |
||||
:type invoice_schema_file: str |
||||
:type offending_line: str |
||||
:type fix_line: str |
||||
:returns: None |
||||
:rtype: None |
||||
:raises: an atomicwrites, or a built-in exception. |
||||
|
||||
.. note: this cannot be patched with lxml because and exception is raised: |
||||
lxml.etree.XMLSyntaxError: Namespace prefix xsd on import is not defined, line 7, column 154 |
||||
|
||||
.. note: this sucks. A better solution needs to be found. |
||||
""" |
||||
save = list() |
||||
with open(invoice_schema_file, 'r') as f: |
||||
for line in f: |
||||
if line == offending_line: |
||||
save.append(fix_line) |
||||
else: |
||||
save.append(line) |
||||
with atomicwrites.atomic_write(invoice_schema_file, mode='w', overwrite=True) as f: |
||||
for s in save: |
||||
f.write(s) |
||||
|
||||
############################## |
||||
# Pipeline related functions # |
||||
############################## |
||||
|
||||
def create_appdirs(program_name: str): |
||||
r"""Create user data and configuration directories. |
||||
|
||||
:param program_name: the name of the software. |
||||
:type program_name: str |
||||
:raises: a pathlib or a built-in exception. |
||||
:returns: None |
||||
:rtype: None |
||||
|
||||
.. note: for security reasons the directories have restrictive perimissions. |
||||
""" |
||||
pathlib.Path(appdirs.user_data_dir(program_name)).mkdir(mode=0o700,parents=True,exist_ok=True) |
||||
pathlib.Path(appdirs.user_config_dir(program_name)).mkdir(mode=0o700,parents=True,exist_ok=True) |
||||
|
||||
def define_appdirs_user_data_dir_file_path(program_name: str, relative_path: str): |
||||
r"""Get the full path of the input file in the users's data directory. |
||||
|
||||
:param program_name: the name of the software. |
||||
:param relative_path: the relative path of the file, i.e: the file name. |
||||
:type program_name: str |
||||
:type relative_path: str |
||||
:returns: a full path. |
||||
:rtype: str |
||||
""" |
||||
return str(pathlib.Path(appdirs.user_data_dir(program_name), relative_path)) |
||||
|
||||
def define_appdirs_user_config_dir_file_path(program_name: str, relative_path: str): |
||||
r"""Get the full path of the input file in the user's cofiguration directory. |
||||
|
||||
:param program_name: the name of the software. |
||||
:param relative_path: the relative path of the file, i.e: the file name. |
||||
:type program_name: str |
||||
:type relative_path: str |
||||
:returns: a path. |
||||
:rtype: str |
||||
""" |
||||
return str(pathlib.Path(appdirs.user_config_dir(program_name), relative_path)) |
||||
|
||||
def write_configuration_file(configuration_file: str): |
||||
r"""Write the default configuration file. |
||||
|
||||
:param configuration_file: the path of the configuration file. |
||||
:type configuration_file: str |
||||
:returns: None |
||||
:rtype: None |
||||
:raises: a configparser or a built-in exception. |
||||
""" |
||||
config = configparser.ConfigParser() |
||||
config.optionxform = str |
||||
config['metadata file'] = { |
||||
'XML namespace': XML['metadata file']['namespaces']['default'], |
||||
'XML invoice checksum tag': XML['metadata file']['tags']['invoice checksum'], |
||||
'XML invoice filename tag': XML['metadata file']['tags']['invoice filename'], |
||||
'XML system id tag': XML['metadata file']['tags']['system id'] |
||||
} |
||||
config['trusted list file'] = { |
||||
'XML namespace': XML['trusted list file']['namespaces']['default'], |
||||
'XML certificate tag': XML['trusted list file']['tags']['certificate'], |
||||
'download': Downloads['trusted list file']['default'], |
||||
} |
||||
config['invoice file'] = { |
||||
'XML namespace': XML['invoice file']['namespaces']['default'], |
||||
'XML attachment tag': XML['invoice file']['tags']['attachment'], |
||||
'XML attachment filename tag': XML['invoice file']['tags']['attachment filename'], |
||||
'XML attachment XPath': XML['invoice file']['XPath']['attachment'], |
||||
'text encoding': XML['invoice file']['proprieties']['text encoding'], |
||||
'XSD download': Downloads['invoice file']['XSD']['default'], |
||||
'W3C XSD download': Downloads['invoice file']['XSD']['W3C Schema for XML Signatures'], |
||||
'XSLT ordinaria download': Downloads['invoice file']['XSLT']['ordinaria'], |
||||
'XSLT PA download': Downloads['invoice file']['XSLT']['PA'], |
||||
'attachment extension whitelist': File['invoice']['attachment']['extension whitelist'], |
||||
'attachment filetype whitelist': File['invoice']['attachment']['filetype whitelist'] |
||||
} |
||||
|
||||
with open(configuration_file, 'w') as configfile: |
||||
config.write(configfile) |
||||
|
||||
def load_configuration(configuration_file: str): |
||||
r"""Attempt to load the configuration file. |
||||
|
||||
:param configuration_file: the path of the configuration file. |
||||
:type configuration_file: str |
||||
:returns: the configuration. |
||||
:rtype: dict |
||||
:raises: a configparser or a built-in exception. |
||||
|
||||
.. note: errors are not raised if the configuration file does not exist. |
||||
""" |
||||
config = configparser.ConfigParser() |
||||
config.optionxform = str |
||||
config.read(configuration_file) |
||||
|
||||
configuration = dict() |
||||
configuration['metadata file']=dict() |
||||
configuration['trusted list file']=dict() |
||||
configuration['invoice file']=dict() |
||||
|
||||
configuration['metadata file']['XML namespace'] = config.get('metadata file', 'XML namespace', fallback=XML['metadata file']['namespaces']['default']) |
||||
configuration['metadata file']['XML invoice checksum tag'] = config.get('metadata file', 'XML invoice checksum tag', fallback=XML['metadata file']['tags']['invoice checksum']) |
||||
configuration['metadata file']['XML invoice filename tag'] = config.get('metadata file', 'invoice filename tag', fallback=XML['metadata file']['tags']['invoice filename']) |
||||
configuration['metadata file']['XML system id tag'] = config.get('metadata file', 'XML system id tag', fallback=XML['metadata file']['tags']['system id']) |
||||
|
||||
configuration['trusted list file']['XML namespace'] = config.get('trusted list file', 'XML namespace', fallback=XML['trusted list file']['namespaces']['default']) |
||||
configuration['trusted list file']['XML certificate tag'] = config.get('trusted list file', 'XML certificate tag', fallback=XML['trusted list file']['tags']['certificate']) |
||||
configuration['trusted list file']['download'] = config.get('trusted list file', 'download', fallback=Downloads['trusted list file']['default']) |
||||
|
||||