Added checksum checks for asset files. Updated documentation.

This commit is contained in:
Franco Masotti 2020-11-24 18:08:17 +01:00
parent c628356544
commit 21d7f73f09
Signed by: frnmst
GPG Key ID: 24116ED85666780A
7 changed files with 86 additions and 8 deletions

View File

@ -28,8 +28,8 @@ output types.
.. autofunction:: define_appdirs_user_data_dir_file_path
.. autofunction:: define_appdirs_user_config_dir_file_path
.. autofunction:: write_configuration_file
.. autofunction:: load_configuration
.. autofunction:: assert_data_structure
.. autofunction:: asset_checksum_matches
.. autofunction:: pipeline
Exceptions
@ -43,3 +43,4 @@ Exceptions
.. autoexception:: XMLFileNotConformingToSchema
.. autoexception:: ExtractedAttachmentNotInExtensionWhitelist
.. autoexception:: ExtractedAttachmentNotInFileTypeWhitelist
.. autoexception:: AssetsChecksumDoesNotMatch

View File

@ -22,6 +22,21 @@ Public keys file https://eidas.agid.gov.i
.. warning:: fattura-elettronica-reader is guaranteed to work with these file versions only!
.. important:: The XML schema file for FatturaPA version 1.2.1 needs to be patched. fattura_elettronica_reader
runs the SHA-512 checksum on the patched version of the file which corresponds to:
::
a1b02818f81ac91f35358260dd12e1bf4480e1545bb457caffa0d434200a1bd05bedd88df2d897969485a989dda78922850ebe978b92524778a37cb0afacba27
Some of these file are checked in the pipeline:
if the checksum does not match the ones present in the source code an exception is raised.
You can use the ``--ignore-assets-checksum`` option to override this behaviour.
Updates
-------

View File

@ -29,7 +29,7 @@ from .api import (
patch_invoice_schema_file, create_appdirs,
define_appdirs_user_data_dir_file_path,
define_appdirs_user_config_dir_file_path, write_configuration_file,
assert_data_structure, pipeline)
assert_data_structure, asset_checksum_matches, pipeline)
from .cli import (CliInterface)
from .exceptions import (P7MFileDoesNotHaveACoherentCryptographicalSignature,
InvoiceFileChecksumFailed, P7MFileNotAuthentic,
@ -37,4 +37,5 @@ from .exceptions import (P7MFileDoesNotHaveACoherentCryptographicalSignature,
MissingTagInMetadataFile,
XMLFileNotConformingToSchema,
ExtractedAttachmentNotInExtensionWhitelist,
ExtractedAttachmentNotInFileTypeWhitelist)
ExtractedAttachmentNotInFileTypeWhitelist,
AssetsChecksumDoesNotMatch)

View File

@ -40,7 +40,8 @@ from .exceptions import (P7MFileDoesNotHaveACoherentCryptographicalSignature,
MissingTagInMetadataFile,
XMLFileNotConformingToSchema,
ExtractedAttachmentNotInExtensionWhitelist,
ExtractedAttachmentNotInFileTypeWhitelist)
ExtractedAttachmentNotInFileTypeWhitelist,
AssetsChecksumDoesNotMatch)
from . import constants as const
#######
@ -219,7 +220,7 @@ def get_ca_certificates(trusted_list_xml_root: str,
def is_p7m_file_authentic(p7m_file: str,
ca_certificate_pem_file: str,
ignore_signature_check: bool = False,
ignore_signers_certificate_check: bool = False):
ignore_signers_certificate_check: bool = False) -> bool:
r"""Check authenticity of the invoice file on various levels.
:param p7m_file: the path of the signed invoice file.
@ -436,7 +437,7 @@ def define_appdirs_user_data_dir_file_path(program_name: str,
def define_appdirs_user_config_dir_file_path(program_name: str,
relative_path: str):
relative_path: str) -> str:
r"""Get the full path of the input file in the user's cofiguration directory.
:param program_name: the name of the software.
@ -527,9 +528,11 @@ def assert_data_structure(source: str, file_type: str, data: dict):
if 'patched' not in data:
raise ValueError
if'configuration file' not in data:
if 'configuration file' not in data:
raise ValueError
if'write default configuration file' not in data:
if 'write default configuration file' not in data:
raise ValueError
if 'ignore assets checksum' not in data:
raise ValueError
if not isinstance(data['patched'], bool):
raise TypeError
@ -537,6 +540,8 @@ def assert_data_structure(source: str, file_type: str, data: dict):
raise TypeError
if not isinstance(data['write default configuration file'], bool):
raise TypeError
if not isinstance(data['ignore assets checksum'], bool):
raise TypeError
if source == 'invoice':
if 'extract attachments' not in data:
@ -660,6 +665,24 @@ def assert_data_structure(source: str, file_type: str, data: dict):
raise ValueError
def asset_checksum_matches(file: str) -> bool:
r"""Check that the asset file is the expected one.
:param file: the file name that needs to be checked.
:type file: str
:returns: matches
:rtype: str
:raises: a built-in exception.
"""
m = hashlib.sha512()
matches = False
m.update(open(file, 'rb').read())
if m.hexdigest() == const.Checksum[pathlib.Path(file).name]:
matches = True
return matches
def pipeline(source: str, file_type: str, data: dict):
r"""Run the pipeline.
@ -699,6 +722,7 @@ def pipeline(source: str, file_type: str, data: dict):
w3c_schema_file_for_xml_signatures = define_appdirs_user_data_dir_file_path(
project_name,
const.Paths['invoice file']['XSD']['W3C Schema for XML Signatures'])
if source == 'invoice':
invoice_schema_file = define_appdirs_user_data_dir_file_path(
project_name, const.Paths['invoice file']['XSD']['default'])
@ -754,6 +778,10 @@ def pipeline(source: str, file_type: str, data: dict):
get_remote_file(trusted_list_file,
config['trusted list file']['download'])
if not data['ignore assets checksum']:
if not asset_checksum_matches(trusted_list_file):
raise AssetsChecksumDoesNotMatch("Run the program with the '--ignore-assets-checksum' option, contact the developer or open a pull request. Have a look at https://frnmst.github.io/fattura-elettronica-reader/assets.html")
trusted_list_xml_root = parse_xml_file(trusted_list_file)
get_ca_certificates(trusted_list_xml_root, ca_certificate_pem_file,
@ -786,6 +814,11 @@ def pipeline(source: str, file_type: str, data: dict):
const.Patch['invoice file']['XSD']['line'][0]['offending'],
const.Patch['invoice file']['XSD']['line'][0]['fix'])
# Verify the checksum of the patched file.
if not data['ignore assets checksum']:
if not asset_checksum_matches(invoice_schema_file):
raise AssetsChecksumDoesNotMatch("Run the program with the '--ignore-assets-checksum' option, contact the developer or open a pull request. Have a look at https://frnmst.github.io/fattura-elettronica-reader/assets.html")
# Create a temporary directory to store the original XML invoice file.
with tempfile.TemporaryDirectory() as tmpdirname:
# file_to_consider_original is the path of the non-signed p7m file. signed files
@ -856,6 +889,11 @@ def pipeline(source: str, file_type: str, data: dict):
config['invoice file']['XSLT ' +
data['invoice xslt type'] +
' download'])
if not data['ignore assets checksum']:
if not asset_checksum_matches(invoice_xslt_file):
raise AssetsChecksumDoesNotMatch("Run the program with the '--ignore-assets-checksum' option, contact the developer or open a pull request. Have a look at https://frnmst.github.io/fattura-elettronica-reader/assets.html")
invoice_xslt_root = parse_xml_file(invoice_xslt_file)
html_output = file_to_consider + '.html'
get_invoice_as_html(invoice_root, invoice_xslt_root,

View File

@ -52,6 +52,8 @@ class CliToApi():
args.configuration_file,
'write default configuration file':
args.write_default_configuration_file,
'ignore assets checksum':
args.ignore_assets_checksum,
}
# Prepare the data structure.
@ -322,6 +324,11 @@ class CliInterface():
action='store_true',
help='write the default configuration file')
parser.add_argument('-k',
'--ignore-assets-checksum',
action='store_true',
help='avoid running checksums for the downloadable assets')
parser.add_argument('-v',
'--version',
action='version',

View File

@ -127,5 +127,17 @@ File['invoice']['attachment'] = {
'filetype whitelist': ['application/pdf']
}
# Checksums.
# SHA-512 checksum of the assets.
Checksum = dict()
Checksum[Paths['invoice file']['XSLT']['PA']] = '301db9da3c0715c0ab5db22c561bfb2812fea3cef150ff4a2124fe6141ebb3cb1c898d7ca3c931f716eff3b7b1946ebc86ca8bdd6d7561979f2f3a0cb95ff560'
Checksum[Paths['invoice file']['XSLT']['ordinaria']] = '849c4b50956b9e9eaccbbbffb04c1f345ff4abdc0dd191a14c54d48092c661984b1fcdb910c4c92291e158a62ecbb1c588d94e6bd6479e61ff6376746154df6c'
# Checksum of the patched schema file, not of the original one which is
# 2a7c3f2913ee390c167e41ae5618c303b481f548f9b2a8d60dddc36804ddd3ebf7cb5003e5cc6996480c67d085b82b438aff7cc0f74d7c104225449785cb575b
Checksum[Paths['invoice file']['XSD']['default']] = 'a1b02818f81ac91f35358260dd12e1bf4480e1545bb457caffa0d434200a1bd05bedd88df2d897969485a989dda78922850ebe978b92524778a37cb0afacba27'
Checksum[Paths['trusted list file']] = '09c2cf39120a6ca869083a659e8861aae8d677957cbbd89be1286f0bb5ef5cbb33a17b15637e6245662e2a4fa60a26efa8fd1101bed2c4dcdff0bbbcbc7811fe'
if __name__ == '__main__':
pass

View File

@ -52,3 +52,7 @@ class ExtractedAttachmentNotInExtensionWhitelist(Exception):
class ExtractedAttachmentNotInFileTypeWhitelist(Exception):
"""An extracted attachment is not in the filetype whitelist."""
class AssetsChecksumDoesNotMatch(Exception):
"""A downloaded file might make this program malfuncioning."""