Browse Source

First commit.

dev
Franco Masotti 3 years ago
commit
eab4a985cc
Signed by: frnmst
GPG Key ID: 24116ED85666780A
  1. 121
      .gitignore
  2. 55
      Makefile
  3. 73
      README.rst
  4. 20
      docs/Makefile
  5. 44
      docs/api.rst
  6. 181
      docs/conf.py
  7. 177
      docs/considerations.rst
  8. 116
      docs/contributing.rst
  9. 18
      docs/copyright_license.rst
  10. 43
      docs/examples.rst
  11. 25
      docs/index.rst
  12. 41
      docs/install.rst
  13. 36
      fattura_elettronica_reader/__init__.py
  14. 46
      fattura_elettronica_reader/__main__.py
  15. 647
      fattura_elettronica_reader/api.py
  16. 200
      fattura_elettronica_reader/cli.py
  17. 125
      fattura_elettronica_reader/constants.py
  18. 47
      fattura_elettronica_reader/exceptions.py
  19. 12
      requirements.txt
  20. 55
      setup.py
  21. 21
      tests/__init__.py
  22. 36
      tests/tests.py

121
.gitignore vendored

@ -0,0 +1,121 @@
*.md
*.MD
*.XML
*.xml
*.XSD
*.xsd
*.p7m
*.P7M
*.PEM
*.pem
*.xslt
*.XSLT
*.html
*.HTML
*.PDF
*.pdf
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
.static_storage/
.media/
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/

55
Makefile

@ -0,0 +1,55 @@
#!/usr/bin/env make
#
# Makefile
#
# Copyright (C) 2019 Franco Masotti <franco.masotti@live.com>
#
# This file is part of fattura-elettronica-reader.
#
# fattura-elettronica-reader is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# fattura-elettronica-reader is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with fattura-elettronica-reader. If not, see <http://www.gnu.org/licenses/>.
#
default: pep doc test
githook:
git config core.hooksPath .githooks
pep:
yapf --style '{based_on_style: pep8; indent_width: 4}' -i fattura_pa_reader/*.py tests/*.py
flake8 fattura_pa_reader/*.py --ignore=F401,E501 tests/*.py
doc:
$(MAKE) -C docs html
install:
pip install .
test:
python setup.py test
uninstall:
pip uninstall md_toc
dist:
python setup.py sdist
python setup.py bdist_wheel
upload:
twine upload dist/*
clean:
rm -rf build dist *.egg-info
.PHONY: default pep doc install test uninstall dist upload clean

73
README.rst

@ -0,0 +1,73 @@
fattura-elettronica-reader
==========================
|license| |pyver| |gitter|
.. |license| image:: https://img.shields.io/pypi/l/fattura-elettronica-reader.svg?color=blue
:alt: PyPI - License
:target: https://raw.githubusercontent.com/frnmst/fattura-elettronica-reader/master/LICENSE.txt
.. |pyver| image:: https://img.shields.io/pypi/pyversions/fattura-elettronica-reader.svg
:alt: PyPI - Python Version
.. |gitter| image:: https://badges.gitter.im/fattura-elettronica-reader/community.svg
:alt: Gitter
:target: https://gitter.im/fattura-elettronica-reader/community
Validate, extract, and generate printables of electronic invoice files received
from the "Sistema di Interscambio".
NOTE
----
This software is in a pre-alpha stage: it is NOT ready for production use.
All contributions are welcome.
See http://frnmst.github.io/fattura-elettronica-reader/contributing.html#todo-and-fixme
Discussion here: https://gitter.im/fattura-elettronica-reader/community
Video
-----
TODO
Documentation
-------------
http://frnmst.github.io/fattura-elettronica-reader
API examples
------------
TODO
CLI helps
---------
::
$ fattura_elettronica_reader --help
License
-------
Copyright (c) 2018 Enio Carboni - Italy
Copyright (C) 2019 frnmst (Franco Masotti) <franco.masotti@live.com>
fattura-elettronica-reader is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
fattura-elettronica-reader is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with fattura-elettronica-reader. If not, see <http://www.gnu.org/licenses/>.

20
docs/Makefile

@ -0,0 +1,20 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
SPHINXPROJ = md-toc
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

44
docs/api.rst

@ -0,0 +1,44 @@
Developer Interface
===================
.. module:: fattura_elettronica_reader
Main Interface
--------------
Examples for the most relevant api functions can be viewed in the test
file. fattura_elettronica_reader's API uses `type hints`_ instead of assertions to check input and
output types.
.. _type hints: https://docs.python.org/3/library/typing.html
.. autofunction:: is_xml_file_conforming_to_schema
.. autofunction:: parse_xml_file
.. autofunction:: get_invoice_filename
.. autofunction:: is_invoice_file_signed
.. autofunction:: invoice_file_checksum_matches
.. autofunction:: get_remote_file
.. autofunction:: get_ca_certificates
.. autofunction:: is_invoice_file_authentic
.. autofunction:: remove_signature_from_invoice_file
.. autofunction:: extract_attachments_from_invoice_file
.. autofunction:: get_invoice_as_html
.. autofunction:: patch_invoice_schema_file
.. autofunction:: create_appdirs
.. autofunction:: define_appdirs_user_data_dir_file_path
.. autofunction:: define_appdirs_user_config_dir_file_path
.. autofunction:: write_configuration_file
.. autofunction:: load_configuration
.. autofunction:: pipeline
Exceptions
----------
.. autoexception:: InvoiceFileDoesNotHaveACoherentCryptographicalSignature
.. autoexception:: InvoiceFileChecksumFailed
.. autoexception:: InvoiceFileNotAuthentic
.. autoexception:: CannotExtractOriginalInvoiceFile
.. autoexception:: MissingTagInMetadataFile
.. autoexception:: XMLFileNotConformingToSchema
.. autoexception:: ExtractedAttachmentNotInExtensionWhitelist
.. autoexception:: ExtractedAttachmentNotInFileTypeWhitelist

181
docs/conf.py

@ -0,0 +1,181 @@
# -*- coding: utf-8 -*-
#
# Configuration file for the Sphinx documentation builder.
#
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/master/config
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys
sys.path.insert(0, os.path.abspath('..'))
# -- Project information -----------------------------------------------------
project = 'fattura-elettronica-reader'
copyright = '2019, Franco Masotti'
author = 'Franco Masotti'
# The short X.Y version
version = '0.0.1'
# The full version, including alpha/beta/rc tags
release = '0.0.1'
# -- General configuration ---------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.coverage',
'sphinx.ext.imgmath',
'sphinx.ext.viewcode',
'sphinx.ext.githubpages',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'
# The master toctree document.
master_doc = 'index'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = None
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'alabaster'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself. Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# html_sidebars = {}
# -- Options for HTMLHelp output ---------------------------------------------
# Output file base name for HTML help builder.
htmlhelp_basename = 'fattura-elettronica-readerdoc'
# -- Options for LaTeX output ------------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'fattura-elettronica-reader.tex', 'fattura-elettronica-reader Documentation',
'Franco Masotti', 'manual'),
]
# -- Options for manual page output ------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'fattura-elettronica-reader', 'fattura-elettronica-reader Documentation',
[author], 1)
]
# -- Options for Texinfo output ----------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'fattura-elettronica-reader', 'fattura-elettronica-reader Documentation',
author, 'fattura-elettronica-reader', 'One line description of project.',
'Miscellaneous'),
]
# -- Options for Epub output -------------------------------------------------
# Bibliographic Dublin Core info.
epub_title = project
# The unique identifier of the text. This can be a ISBN number
# or the project homepage.
#
# epub_identifier = ''
# A unique identification for the text.
#
# epub_uid = ''
# A list of files that should not be packed into the epub file.
epub_exclude_files = ['search.html']
# -- Extension configuration -------------------------------------------------

177
docs/considerations.rst

@ -0,0 +1,177 @@
Considerations
==============
Purpose
-------
This software aims at:
- correctness
- XML files are validated whenever possible
- integrity
- a checksum is performed on the invoice file
- security
- all cryptographical signature and certificates must be valid
- invoice attachments are examined before saving them permanently
- privacy
- works offline once you have downloaded the necessary files
Pipeline
--------
See the definition of pipeline:
- https://en.wikipedia.org/wiki/Pipeline_(software)
In this software, a pipeline is a sequence of steps that given the signed invoice file
and its metadata file makes it possible to obtain:
- various integrity and authenticity verifications
- the html output of the invoice file
- the extraction of embedded attachments
Description
```````````
Without entering in too much detail (you can read the source code), we can divide
the pipeline into three main steps:
=========== ===================================================
Step number Actions
=========== ===================================================
1 compare metadata file content with the invoice file
2 check invoice signature and signer's certificate
3 generate outputs
=========== ===================================================
Software dependencies
---------------------
OpenSSL
```````
Support
~~~~~~~
Due to the lack of support for most of the PKCS#7 functionality in
PyOpenSSL, all OpenSSL operations are performed using the ``subprocess``
module which calls the ``openssl`` binary installed on the system.
See also:
- https://stackoverflow.com/questions/45104923/pyopenssls-pkcs7-object-provide-very-little-information-how-can-i-get-the-sha1/45111623#45111623
- https://stackoverflow.com/questions/45782506/is-there-any-python-package-for-parsing-pkcs7?rq=1
Bug
~~~
There are problems with recent versions of OpenSSL concerning PKCS#7 file decoding:
- https://github.com/eniocarboni/p7m/issues/4
- https://github.com/eniocarboni/p7m/issues/7
- https://www.mail-archive.com/openssl-users@openssl.org/msg85901.html
A possible solution it use an older system. If you really trust the file you can
disable signature checking with the appropriate option. I strongly discourage
this last solution since if you can't prove its authenticity the invoice has no legal
value.
lxml vs defusedxml
~~~~~~~~~~~~~~~~~~
I decided to use lxml because it supports XML stylesheets (XSLT), something that
defusedxml does not. At first, using defusedxml seemed the best bet because of
the increased security:
- https://github.com/tiran/defusedxml#python-xml-libraries
There is the possibility to use defusedxml anyway by simply
editing the API file like this:
::
import defusedxml.ElementTree as ET
instead of:
::
import lxml.etree as ET
You must also add defusedxml and re-install the requirements as described
in the contributing section.
::
defusedxml==0.5.0
Official documentation
----------------------
- this seems to be legal mumbo jumbo but there might be relevant information:
- https://www.fatturapa.gov.it/export/fatturazione/it/normativa/f-1.htm
- https://www.fatturapa.gov.it/export/fatturazione/it/normativa/f-2.htm
- https://www.fatturapa.gov.it/export/fatturazione/it/normativa/f-3.htm
XML schemas for document validations
````````````````````````````````````
Most of XML schemas seem to be non-existing. Only the one for the
invoice file was found, and was reported by lxml as incorrect,
but according to the server the schema was last modified on
``Tue, 25 Jun 2019 10:16:31 GMT``, so they fixed the offending ``xsd`` typo:
- https://www.fatturapa.gov.it/export/fatturazione/sdi/fatturapa/v1.2.1/Schema_del_file_xml_FatturaPA_versione_1.2.1.xsd
Not having access to all schema file is a problem since there is no way to tell if
- the metadata file,
- the trusted list file,
- the XML stylesheet file
are correct and conforming to specifications.
If you find these files please let me know and/or open a pull request.
Downloading of the W3C file
~~~~~~~~~~~~~~~~~~~~~~~~~~~
The W3C schema file is a dependency for the invoice schema file and it needs to be downloaded
separately. For some reason the downloading of this file takes a few seconds.
Technical differences between "Fattura PA", "Fattura B2B" and other
```````````````````````````````````````````````````````````````````
Digital signature
~~~~~~~~~~~~~~~~~
Some websites say that digital signature of the "Fattura PA" is compulsory and
in other cases it is not.
If you find any official source please let me know and/or open a pull request.
Closing note
------------
Although this whole system has its merits, it has been put into production
with lots of missing pieces (see notes above). Given the importance of it I think this
is unacceptable. I will not make any more comments because I may risk legal actions...

116
docs/contributing.rst

@ -0,0 +1,116 @@
Contributing
============
Git branches
------------
What follows is a table of the git branches used in fattura-elettronica-reader's repository.
Please, do NOT open pull requests on the ``master``, ``dev`` or ``gh-pages`` branches.
Use ``bugfix-${fix_name}`` or ``newfeature-${new_feature_name}`` instead.
===================================== ==================================================== ==============================
Branch Description Update schedule
===================================== ==================================================== ==============================
``master`` the main branch every new release
``dev`` recent changes are merged here before a new release at will
``gh-pages`` contains the built documentation only every new release
``bugfix-${fix_name}`` a generic bug fix
``newfeature-${new_feature_name}`` a generic new feature
===================================== ==================================================== ==============================
Dependencies
------------
First of all install the software requirements from the ``requirements.txt`` file
in the repository's root:
::
$ pip install -r requirements.txt
Unit tests
----------
If you have changed parts of the source code you MUST take care of adding
the corresponding unit tests. Once you have done that run the following command
in a terminal:
::
$ python setup.py test
or simply:
::
$ make test
You can also add this command before every git commit as by running:
::
$ make githook
Python PEP compliancy
---------------------
To be able to lint and test for PEP compliancy you need to run:
::
$ make pep
Documentation
-------------
You can edit and rebuild all this documentation with:
::
$ make doc
TODO and FIXME
--------------
Go in the repository's root and then:
::
grep -e TODO -e FIXME -n */*.py
Important stuff
```````````````
- unit tests: I guess we should use the examples from the government's website,
if the license permits it.
- GUI: for example https://github.com/chriskiehl/Gooey
- i18n (both CLI and GUI): https://docs.python.org/3/library/gettext.html
Contribution Steps
------------------
1. clone the repository
2. install the requirements
3. write code
4. write unit tests
5. run tests
6. run PEP linter and check
7. update relevant documentation, if necessary
8. pull request

18
docs/copyright_license.rst

@ -0,0 +1,18 @@
Copyright and License
=====================
Copyright (c) 2018 Enio Carboni - Italy
Copyright (C) 2019 frnmst (Franco Masotti) <franco.masotti@live.com>
fattura-elettronica-reader is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
fattura-elettronica-reader is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with fattura-elettronica-reader. If not, see <http://www.gnu.org/licenses/>.

43
docs/examples.rst

@ -0,0 +1,43 @@
Examples
========
CLI
---
You want everything, you trust nothing
``````````````````````````````````````
::
$ fattura_elettronica_reader -A ${metadata_file}
which is the same as
::
$ fattura_elettronica_reader -H -a -o ${metadata_file}
You want everything, you trust everything
`````````````````````````````````````````
::
$ fattura_elettronica_reader -SsAkwW ${metadata_file}
which is the same as
::
$ fattura_elettronica_reader -H -a -o -S -s -w -W ${metadata_file}

25
docs/index.rst

@ -0,0 +1,25 @@
.. fattura-pa-reader documentation master file, created by
sphinx-quickstart on Tue Apr 23 16:24:44 2019.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to fattura-pa-reader's documentation!
=============================================
.. toctree::
:maxdepth: 2
:caption: Contents:
install
api
considerations
examples
contributing
copyright_license
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

41
docs/install.rst

@ -0,0 +1,41 @@
Installation
============
Run the following command in either a root or normal terminal (depending on
your Python setup) from the root directory of the project's cloned repository,
::
pip install .
or simply
::
make install
You can also install md_toc via pip (i.e: without having to download the source
code):
::
pip install fattura_elettronica_reader
All the necessary dependencies should be installed automatically along with the
program.
Distribution packages
---------------------
- A ``PKGBUILD`` for Arch Linux like distributions is available under
the ``./packages/aur`` directory as well as on the AUR website.
Dependencies
------------
- Python 3.5

36
fattura_elettronica_reader/__init__.py

@ -0,0 +1,36 @@
#
# __init__.py
#
# Copyright (c) 2018 Enio Carboni - Italy
# Copyright (C) 2017-2019 Franco Masotti <franco.masotti@live.com>
#
# This file is part of fattura-elettronica-reader.
#
# fattura-elettronica-reader is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# fattura-elettronica-reader is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with fattura-elettronica-reader. If not, see <http://www.gnu.org/licenses/>.
#
"""Python discovery file."""
from .api import (
is_xml_file_conforming_to_schema, parse_xml_file, get_invoice_filename,
is_invoice_file_signed, invoice_file_checksum_matches, get_remote_file,
get_ca_certificates, is_invoice_file_authentic, remove_signature_from_invoice_file,
extract_attachments_from_invoice_file, get_invoice_as_html, patch_invoice_schema_file,
create_appdirs, define_appdirs_user_data_dir_file_path, define_appdirs_user_config_dir_file_path,
write_configuration_file, load_configuration, pipeline)
from .cli import (CliInterface)
from .exceptions import (InvoiceFileDoesNotHaveACoherentCryptographicalSignature,
InvoiceFileChecksumFailed, InvoiceFileNotAuthentic,
CannotExtractOriginalInvoiceFile, MissingTagInMetadataFile,
XMLFileNotConformingToSchema, ExtractedAttachmentNotInExtensionWhitelist,
ExtractedAttachmentNotInFileTypeWhitelist)

46
fattura_elettronica_reader/__main__.py

@ -0,0 +1,46 @@
#
# __main__.py
#
# Copyright (c) 2018 Enio Carboni - Italy
# Copyright (C) 2019 Franco Masotti <franco.masotti@live.com>
#
# This file is part of fattura-elettronica-reader.
#
# fattura-elettronica-reader is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# fattura-elettronica-reader is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with fattura-elettronica-reader. If not, see <http://www.gnu.org/licenses/>.
#
"""Call the CLI parser."""
import sys
import traceback
from .cli import CliInterface
def main(args=None):
"""Call the CLI interface and wait for the result."""
retcode = 0
try:
ci = CliInterface()
args = ci.parser.parse_args()
result = args.func(args)
if result is not None:
print(result)
retcode = 0
except Exception:
retcode = 1
traceback.print_exc()
sys.exit(retcode)
if __name__ == '__main__':
main()

647
fattura_elettronica_reader/api.py

@ -0,0 +1,647 @@
#
# api.py
#
# Copyright (c) 2018 Enio Carboni - Italy
# Copyright (C) 2019 Franco Masotti <franco.masotti@live.com>
#
# This file is part of fattura-pa-reader.
#
# fattura-elettronica-reader is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# fattura-elettronica-reader is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with fattura-elettronica-reader. If not, see <http://www.gnu.org/licenses/>.
#
"""The main file."""
import subprocess
import shlex
import lxml.etree as ET
import hashlib
import requests
import base64
import pathlib
import tempfile
import shutil
import atomicwrites
import filetype
import appdirs
import configparser
from .exceptions import (InvoiceFileDoesNotHaveACoherentCryptographicalSignature,
InvoiceFileChecksumFailed, InvoiceFileNotAuthentic,
CannotExtractOriginalInvoiceFile,
MissingTagInMetadataFile, XMLFileNotConformingToSchema,
ExtractedAttachmentNotInExtensionWhitelist,
ExtractedAttachmentNotInFileTypeWhitelist)
from .constants import (XML, Paths, Downloads, Patch, File)
#######
# API #
#######
def is_xml_file_conforming_to_schema(xml_file: str, xml_schema_file: str) -> bool:
r"""Check that the XML file follows its schema.
:param xml_file: the path of the XML file.
:param xml_schema_file: the path of the schema file.
:type xml_file: str
:type xml_schema_file: str
:returns: ``True`` if the schema is followed, ``False`` otherwise.
:rtype: bool
:raises: an lxml or a built-in exception.
"""
xmlschema_doc = ET.parse(xml_schema_file)
xmlschema = ET.XMLSchema(etree=xmlschema_doc)
return xmlschema.validate(ET.parse(xml_file))
def parse_xml_file(xml_file: str):
r"""Parse the XML file.
:param xml_file: the input XML file.
:type xml_file: str
:returns: the XML root as a data structure
:rtype: ET.parse.getroot
:raises: an lxml or a built-in exception.
"""
tree = ET.parse(xml_file)
return tree.getroot()
def get_invoice_filename(metadata_file_xml_root,
metadata_file_invoice_filename_xml_tag: str,
metadata_file_xml_namespace: str) -> str:
r"""Return the file name of the invoice file.
:param metadata_file_xml_root: the root of the metadata XML tree.
:param metadata_file_invoice_filename_xml_tag: the tag name corresponding
to the invoice filename.
:param metadata_file_xml_namespace: the XML namespace of the metadata file.
:type metadata_file_xml_root: lxml.etree._Element
:type metadata_file_invoice_filename_xml_tag: str
:type metadata_file_xml_namespace: str
:returns: the element or ``None``, if no match is found.
:rtype: str
:raises: an lxml or a built-in exception.
"""
return metadata_file_xml_root.find(metadata_file_invoice_filename_xml_tag,
metadata_file_xml_namespace).text
def is_invoice_file_signed(invoice_file: str) -> bool:
r"""Check if the invoice file is signed with a PKCS#7 signature.
:param invoice_file: the path of the invoice file.
:type invoice_file: str
:returns: True if the file is signed, False otherwise.
:rtype: bool
:raises: a subprocess or a built-in exception.
"""
command = 'openssl pkcs7 -print_certs -text -noout -inform DER -in {}'.format(
shlex.quote(invoice_file))
return True if subprocess.run(
shlex.split(command)).returncode == 0 else False
def invoice_file_checksum_matches(metadata_file_xml_root, invoice_file: str,
metadata_file_invoice_checksum_xml_tag: str,
metadata_file_xml_namespace: str) -> bool:
r"""Check if the invoice checksum matches the one in the metadata file.
:param metadata_file_xml_root: the root of the metadata XML tree.
:param invoice_file: the path of the invoice file.
:param metadata_file_invoice_checksum_xml_tag: the XML tag name
corresponding to the invoice file checksum.
:param metadata_file_xml_namespace: the XML namespace of the metadata file.
:type metadata_file_xml_root: lxml.etree._Element
:type invoice_file: str
:type metadata_file_invoice_checksum_xml_tag: str
:type metadata_file_xml_namespace: str
:returns: ``True`` if the checksum matches, ``False`` otherwise.
The expected checksum is also returned.
:rtype: tuple
:raises: a hashlib, lxml or a built-in exception.
"""
# Get the checksum from the metadata file.
expected_checksum = metadata_file_xml_root.find(
metadata_file_invoice_checksum_xml_tag,
metadata_file_xml_namespace).text
# Compute the checksum.
m = hashlib.sha256()
m.update(open(invoice_file, 'rb').read())
computed_checksum = m.hexdigest()
if computed_checksum == expected_checksum:
return True, expected_checksum
else:
return False, expected_checksum
def get_remote_file(destination: str, url: str):
r"""Download and save a remote file.
:param destination: the local path of the downloaded file.
:param url: the remote path of the file.
:type destination: str
:type url: str
:returns: None
:rtype: None
:raises: a built-in exception or a requests error.
.. note: requests also checks that the url is in a valid form.
"""
r = requests.get(url)
if r.ok:
with atomicwrites.atomic_write(destination, mode='wb', overwrite=True) as f:
f.write(r.content)
else:
r.raise_for_status()
def get_ca_certificates(trusted_list_xml_root: str,
ca_certificate_pem_file: str,
trusted_list_file_xml_namespace: str,
trusted_list_file_xml_certificate_tag: str,
eol: str = '\n'):
r"""Write the CA certificates file using the trusted list file.
:param trusted_list_file: the input file.
:param ca_certificate_pem_file: the destination file.
:param trusted_list_file_xml_namespace: the XML namespace of the
trusted list file.
:param trusted_list_file_xml_certificate_tag: the XML tag name corresponding
to the certificates in the trusted list file.
:param eol: the end of line character to be used in the PEM file.
:type trusted_list_xml_root: str
:type ca_certificate_pem_file: str
:type trusted_list_file_xml_namespace: str
:type trusted_list_file_xml_certificate_tag: str
:type eol: str
:returns: None
:rtype: None
:raises: an atomicwrites, an lxml or a built-in exception.
.. note: See https://tools.ietf.org/html/rfc7468
"""
preeb = '-----BEGIN CERTIFICATE-----'
posteb = '-----END CERTIFICATE-----'
max_line_len = 64
with atomicwrites.atomic_write(ca_certificate_pem_file, mode='w', overwrite=True) as f:
# See https://lxml.de/tutorial.html#elementpath
# for the exception that gets raised.
for e in trusted_list_xml_root.iter(
'{' + trusted_list_file_xml_namespace + '}' +
trusted_list_file_xml_certificate_tag):
# This tries to follow RFC7468 even in the variable naming.
# See https://tools.ietf.org/html/rfc7468#section-3
base64fullline = str()
for i in range(0, len(e.text), max_line_len):
_64base64char = e.text[i:i + max_line_len]
base64fullline = base64fullline + _64base64char + eol
strictbase64finl = str()
strictbase64text = base64fullline + strictbase64finl
stricttextualmsg = preeb + eol + strictbase64text + posteb + eol
f.write(stricttextualmsg)
def is_invoice_file_authentic(invoice_file: str,
ca_certificate_pem_file: str,
ignore_signature_check: bool = False,
ignore_signers_certificate_check: bool = False):
r"""Check authenticity of the invoice file on various levels.
:param invoice_file: the path of the signed invoice file.
:param ca_certificate_pem_file: the certificates file in PEM format.
:param ignore_signature_check: avoid checking the signature.
Defaults to ``False``.
:param ignore_signers_certificate_check: avoid checking the signer's
certificate. Defaults to ``False``.
:type invoice_file: str
:type ca_certificate_pem_file: str
:type ignore_signature_check: bool
:type ignore_signers_certificate_check: bool
:returns: ``True`` if the operation is successful, ``False`` otherwise.
:rtype: bool
:raises: a subprocess or built-in exception.
"""
pre = str()
post = str()
if ignore_signature_check:
pre = '-nosigs'
if ignore_signers_certificate_check:
post = '-noverify'
command = (
'openssl smime ' + pre + ' -verify ' + post + ' -CAfile {}'.format(
shlex.quote(ca_certificate_pem_file)) + ' -in {}'.format(
shlex.quote(invoice_file)) + ' -inform DER -out /dev/null')
return True if subprocess.run(
shlex.split(command)).returncode == 0 else False
def remove_signature_from_invoice_file(invoice_file: str,
output_file: str) -> bool:
r"""Remove signature from the signed invoice file and save the original one.
:param invoice_file: the path of the invoice file.
:param output_file: the path of the destination file.
:type invoice_file: str
:type output_file: str
:returns: ``True`` if the operation is successful, ``False`` otherwise.
:rtype: bool
:raises: a subprocess or built-in exception.
"""
command = ('openssl smime -nosigs -verify -noverify -in {}'.format(
shlex.quote(invoice_file)) + ' -inform DER -out {}'.format(
shlex.quote(output_file)))
return True if subprocess.run(
shlex.split(command)).returncode == 0 else False
def extract_attachments_from_invoice_file(
invoice_file_xml_root, invoice_file_xml_attachment_xpath: str,
invoice_file_xml_attachment_tag: str,
invoice_file_xml_attachment_filename_tag: str,
invoice_file_text_encoding: str,
ignore_attachment_extension_whitelist: bool = False,
ignore_attachment_filetype_whitelist: bool = False,
attachment_extension_whitelist: list = list(),
attachment_filetype_whitelist: list = list()):
r"""Extract, decode and save possible attachments within the invoice file.
:param invoice_file_xml_root: the original invoice file.
:param invoice_file_xml_attachment_xpath: the full path, from the XML root,
corresponding to the attachments.
:param invoice_file_xml_attachment_tag: the XML tag name corresponding to the
attachment content.
:param invoice_file_xml_attachment_filename_tag: the XML tag name
corresponing to the attachment filename.
:param invoice_file_text_encoding: the text encoding used for the
invoice file.
:param ignore_attachment_extension_whitelist: avoid cheking file extensions.
Defaults to ``False``.
:param ignore_attachment_filetype_whitelist: avoid cheking file types.
Defaults to ``False``.
:param attachment_extension_whitelist: . Defaults to ``list()``.
:param attachment_filetype_whitelist: . Defaults to ``list()``.
:type invoice_file_xml_root: str
:type invoice_file_xml_attachment_xpath: str
:type invoice_file_xml_attachment_tag: str
:type invoice_file_xml_attachment_filename_tag: str
:type invoice_file_text_encoding: str
:type ignore_attachment_extension_whitelist: bool
:type ignore_attachment_filetype_whitelist: bool
:type attachment_extension_whitelist: list
:type attachment_filetype_whitelist: list
:returns: None
:rtype: None
:raises: base64.binascii.Error, filetype, atomicwrites, or a built-in exception.
"""
for at in invoice_file_xml_root.findall(invoice_file_xml_attachment_xpath):
attachment = at.find(invoice_file_xml_attachment_tag).text
attachment_dest_path = at.find(
invoice_file_xml_attachment_filename_tag).text
if not ignore_attachment_extension_whitelist:
if not attachment_dest_path.endswith(tuple(attachment_extension_whitelist)):
raise ExtractedAttachmentNotInExtensionWhitelist
# b64decode accepts any bytes-like object. There should not be any
# character encoding problems since base64 characters are represented
# using the same character ids on UTF-8 and ASCII.
# Just in case that there are alien characters in the base64 string
# (sic, it happened!) we use validate=False as an option to skip them.
decoded = base64.b64decode(attachment.encode(invoice_file_text_encoding),validate=False)
if not ignore_attachment_filetype_whitelist:
# See https://h2non.github.io/filetype.py/1.0.0/filetype.m.html#filetype.filetype.get_type
if filetype.guess(decoded).mime not in attachment_filetype_whitelist:
raise ExtractedAttachmentNotInFileTypeWhitelist
with atomicwrites.atomic_write(attachment_dest_path, mode='wb', overwrite=True) as f:
f.write(decoded)
def get_invoice_as_html(
invoice_file_xml_root, invoice_file_xml_stylesheet_root,
html_output_file: str, invoice_file_text_encoding: str):
r"""Transform the XML invoice file into a styled HTML file.
:param invoice_file_xml_root: the XML tree root of the invoice file
:param invoice_file_xml_stylesheet_root: the XML tree root of the stylesheet file
:param html_output_file: the destination file.
:param invoice_file_text_encoding: the text encoding used for the
invoice file.
:type invoice_file_xml_root: lxml.etree._Element
:type invoice_file_xml_stylesheet_root: lxml.etree._Element
:type html_output_file: str
:type invoice_file_text_encoding: str
:returns: None
:rtype: None
:raises: an lxml, atomicwrites, or a built-in exception.
"""
transform = ET.XSLT(invoice_file_xml_stylesheet_root)
newdom = transform(invoice_file_xml_root)
with atomicwrites.atomic_write(html_output_file, mode='w', overwrite=True) as f:
f.write(
ET.tostring(newdom,
pretty_print=True).decode(invoice_file_text_encoding))
def patch_invoice_schema_file(invoice_schema_file: str, offending_line: str, fix_line: str):
r"""Fix the error in the schema file.
:param invoice_schema_file: the path of the schema file.
:param offending_line: the string in the schema file that needs to be changed.
:param fix_line: a string that replaces the offending line.
:type invoice_schema_file: str
:type offending_line: str
:type fix_line: str
:returns: None
:rtype: None
:raises: an atomicwrites, or a built-in exception.
.. note: this cannot be patched with lxml because and exception is raised:
lxml.etree.XMLSyntaxError: Namespace prefix xsd on import is not defined, line 7, column 154
.. note: this sucks. A better solution needs to be found.
"""
save = list()
with open(invoice_schema_file, 'r') as f:
for line in f:
if line == offending_line:
save.append(fix_line)
else:
save.append(line)
with atomicwrites.atomic_write(invoice_schema_file, mode='w', overwrite=True) as f:
for s in save:
f.write(s)
##############################
# Pipeline related functions #
##############################
def create_appdirs(program_name: str):
r"""Create user data and configuration directories.
:param program_name: the name of the software.
:type program_name: str
:raises: a pathlib or a built-in exception.
:returns: None
:rtype: None
.. note: for security reasons the directories have restrictive perimissions.
"""
pathlib.Path(appdirs.user_data_dir(program_name)).mkdir(mode=0o700,parents=True,exist_ok=True)
pathlib.Path(appdirs.user_config_dir(program_name)).mkdir(mode=0o700,parents=True,exist_ok=True)
def define_appdirs_user_data_dir_file_path(program_name: str, relative_path: str):
r"""Get the full path of the input file in the users's data directory.
:param program_name: the name of the software.
:param relative_path: the relative path of the file, i.e: the file name.
:type program_name: str
:type relative_path: str
:returns: a full path.
:rtype: str
"""
return str(pathlib.Path(appdirs.user_data_dir(program_name), relative_path))
def define_appdirs_user_config_dir_file_path(program_name: str, relative_path: str):
r"""Get the full path of the input file in the user's cofiguration directory.
:param program_name: the name of the software.
:param relative_path: the relative path of the file, i.e: the file name.
:type program_name: str
:type relative_path: str
:returns: a path.
:rtype: str
"""
return str(pathlib.Path(appdirs.user_config_dir(program_name), relative_path))
def write_configuration_file(configuration_file: str):
r"""Write the default configuration file.
:param configuration_file: the path of the configuration file.
:type configuration_file: str
:returns: None
:rtype: None
:raises: a configparser or a built-in exception.
"""
config = configparser.ConfigParser()
config.optionxform = str
config['metadata file'] = {
'XML namespace': XML['metadata file']['namespaces']['default'],
'XML invoice checksum tag': XML['metadata file']['tags']['invoice checksum'],
'XML invoice filename tag': XML['metadata file']['tags']['invoice filename'],
'XML system id tag': XML['metadata file']['tags']['system id']
}
config['trusted list file'] = {
'XML namespace': XML['trusted list file']['namespaces']['default'],
'XML certificate tag': XML['trusted list file']['tags']['certificate'],
'download': Downloads['trusted list file']['default'],
}
config['invoice file'] = {
'XML namespace': XML['invoice file']['namespaces']['default'],
'XML attachment tag': XML['invoice file']['tags']['attachment'],
'XML attachment filename tag': XML['invoice file']['tags']['attachment filename'],
'XML attachment XPath': XML['invoice file']['XPath']['attachment'],
'text encoding': XML['invoice file']['proprieties']['text encoding'],
'XSD download': Downloads['invoice file']['XSD']['default'],
'W3C XSD download': Downloads['invoice file']['XSD']['W3C Schema for XML Signatures'],
'XSLT ordinaria download': Downloads['invoice file']['XSLT']['ordinaria'],
'XSLT PA download': Downloads['invoice file']['XSLT']['PA'],
'attachment extension whitelist': File['invoice']['attachment']['extension whitelist'],
'attachment filetype whitelist': File['invoice']['attachment']['filetype whitelist']
}
with open(configuration_file, 'w') as configfile:
config.write(configfile)
def load_configuration(configuration_file: str):
r"""Attempt to load the configuration file.
:param configuration_file: the path of the configuration file.
:type configuration_file: str
:returns: the configuration.
:rtype: dict
:raises: a configparser or a built-in exception.
.. note: errors are not raised if the configuration file does not exist.
"""
config = configparser.ConfigParser()
config.optionxform = str
config.read(configuration_file)
configuration = dict()
configuration['metadata file']=dict()
configuration['trusted list file']=dict()
configuration['invoice file']=dict()
configuration['metadata file']['XML namespace'] = config.get('metadata file', 'XML namespace', fallback=XML['metadata file']['namespaces']['default'])
configuration['metadata file']['XML invoice checksum tag'] = config.get('metadata file', 'XML invoice checksum tag', fallback=XML['metadata file']['tags']['invoice checksum'])
configuration['metadata file']['XML invoice filename tag'] = config.get('metadata file', 'invoice filename tag', fallback=XML['metadata file']['tags']['invoice filename'])
configuration['metadata file']['XML system id tag'] = config.get('metadata file', 'XML system id tag', fallback=XML['metadata file']['tags']['system id'])
configuration['trusted list file']['XML namespace'] = config.get('trusted list file', 'XML namespace', fallback=XML['trusted list file']['namespaces']['default'])
configuration['trusted list file']['XML certificate tag'] = config.get('trusted list file', 'XML certificate tag', fallback=XML['trusted list file']['tags']['certificate'])
configuration['trusted list file']['download'] = config.get('trusted list file', 'download', fallback=Downloads['trusted list file']['default'])