188 lines
5.3 KiB
Raw Normal View History

#ifndef HASH_H
#define HASH_H
2017-11-12 22:28:52 +01:00
#include "git-compat-util.h"
#if defined(SHA1_PPC)
#include "ppc/sha1.h"
#elif defined(SHA1_APPLE)
#include <CommonCrypto/CommonDigest.h>
#elif defined(SHA1_OPENSSL)
#include <openssl/sha.h>
Makefile: add DC_SHA1 knob This knob lets you use the sha1dc implementation from: which can detect certain types of collision attacks (even when we only see half of the colliding pair). So it mitigates any attack which consists of getting the "good" half of a collision into a trusted repository, and then later replacing it with the "bad" half. The "good" half is rejected by the victim's version of Git (and even if they run an old version of Git, any sha1dc-enabled git will complain loudly if it ever has to interact with the object). The big downside is that it's slower than either the openssl or block-sha1 implementations. Here are some timings based off of linux.git: - compute sha1 over whole packfile sha1dc: 3.580s blk-sha1: 2.046s (-43%) openssl: 1.335s (-62%) - rev-list --all --objects sha1dc: 33.512s blk-sha1: 33.514s (+0.0%) openssl: 33.650s (+0.4%) - git log --no-merges -10000 -p sha1dc: 8.124s blk-sha1: 7.986s (-1.6%) openssl: 8.203s (+0.9%) - index-pack --verify sha1dc: 4m19s blk-sha1: 2m57s (-32%) openssl: 2m19s (-42%) So overall the sha1 computation with collision detection is about 1.75x slower than block-sha1, and 2.7x slower than sha1. But of course most operations do more than just sha1. Normal object access isn't really slowed at all (both the +/- changes there are well within the run-to-run noise); any changes are drowned out by the other work Git is doing. The most-affected operation is `index-pack --verify`, which is essentially just computing the sha1 on every object. This is similar to the `index-pack` invocation that the receiver of a push or fetch would perform. So clearly there's some extra CPU load here. There will also be some latency for the user, though keep in mind that such an operation will generally be network bound (this is about a 1.2GB packfile). Some of that extra CPU is "free" in the sense that we use it while the pack is streaming in anyway. But most of it comes during the delta-resolution phase, after the whole pack has been received. So we can imagine that for this (quite large) push, the user might have to wait an extra 100 seconds over openssl (which is what we use now). If we assume they can push to us at 20Mbit/s, that's 480s for a 1.2GB pack, which is only 20% slower. Signed-off-by: Jeff King <> Signed-off-by: Junio C Hamano <>
2017-03-16 23:09:12 +01:00
#elif defined(SHA1_DC)
#include "sha1dc_git.h"
#else /* SHA1_BLK */
#include "block-sha1/sha1.h"
#if defined(SHA256_GCRYPT)
#include "sha256/gcrypt.h"
#elif defined(SHA256_OPENSSL)
#include <openssl/sha.h>
#include "sha256/block/sha256.h"
#ifndef platform_SHA_CTX
* platform's underlying implementation of SHA-1; could be OpenSSL,
* blk_SHA, Apple CommonCrypto, etc... Note that the relevant
* SHA-1 header may have already defined platform_SHA_CTX for our
* own implementations like block-sha1 and ppc-sha1, so we list
* the default for OpenSSL compatible SHA-1 implementations here.
#define platform_SHA_CTX SHA_CTX
#define platform_SHA1_Init SHA1_Init
#define platform_SHA1_Update SHA1_Update
#define platform_SHA1_Final SHA1_Final
#define git_SHA_CTX platform_SHA_CTX
#define git_SHA1_Init platform_SHA1_Init
#define git_SHA1_Update platform_SHA1_Update
#define git_SHA1_Final platform_SHA1_Final
#ifndef platform_SHA256_CTX
#define platform_SHA256_CTX SHA256_CTX
#define platform_SHA256_Init SHA256_Init
#define platform_SHA256_Update SHA256_Update
#define platform_SHA256_Final SHA256_Final
#define git_SHA256_CTX platform_SHA256_CTX
#define git_SHA256_Init platform_SHA256_Init
#define git_SHA256_Update platform_SHA256_Update
#define git_SHA256_Final platform_SHA256_Final
#ifdef platform_SHA256_Clone
#define git_SHA256_Clone platform_SHA256_Clone
#include "compat/sha1-chunked.h"
#undef git_SHA1_Update
#define git_SHA1_Update git_SHA1_Update_Chunked
static inline void git_SHA1_Clone(git_SHA_CTX *dst, const git_SHA_CTX *src)
memcpy(dst, src, sizeof(*dst));
static inline void git_SHA256_Clone(git_SHA256_CTX *dst, const git_SHA256_CTX *src)
memcpy(dst, src, sizeof(*dst));
2017-11-12 22:28:52 +01:00
* Note that these constants are suitable for indexing the hash_algos array and
* comparing against each other, but are otherwise arbitrary, so they should not
* be exposed to the user or serialized to disk. To know whether a
* git_hash_algo struct points to some usable hash function, test the format_id
* field for being non-zero. Use the name field for user-visible situations and
* the format_id field for fixed-length fields on disk.
/* An unknown hash function. */
/* SHA-1 */
#define GIT_HASH_SHA1 1
/* SHA-256 */
#define GIT_HASH_SHA256 2
2017-11-12 22:28:52 +01:00
/* Number of algorithms supported (including unknown). */
2017-11-12 22:28:52 +01:00
/* A suitably aligned type for stack allocations of hash contexts. */
union git_hash_ctx {
git_SHA_CTX sha1;
git_SHA256_CTX sha256;
typedef union git_hash_ctx git_hash_ctx;
typedef void (*git_hash_init_fn)(git_hash_ctx *ctx);
typedef void (*git_hash_clone_fn)(git_hash_ctx *dst, const git_hash_ctx *src);
typedef void (*git_hash_update_fn)(git_hash_ctx *ctx, const void *in, size_t len);
typedef void (*git_hash_final_fn)(unsigned char *hash, git_hash_ctx *ctx);
2017-11-12 22:28:52 +01:00
struct git_hash_algo {
* The name of the algorithm, as appears in the config file and in
* messages.
const char *name;
/* A four-byte version identifier, used in pack indices. */
uint32_t format_id;
/* The length of the hash in binary. */
size_t rawsz;
/* The length of the hash in hex characters. */
size_t hexsz;
/* The block size of the hash. */
size_t blksz;
2017-11-12 22:28:52 +01:00
/* The hash initialization function. */
git_hash_init_fn init_fn;
/* The hash context cloning function. */
git_hash_clone_fn clone_fn;
2017-11-12 22:28:52 +01:00
/* The hash update function. */
git_hash_update_fn update_fn;
/* The hash finalization function. */
git_hash_final_fn final_fn;
/* The OID of the empty tree. */
const struct object_id *empty_tree;
/* The OID of the empty blob. */
const struct object_id *empty_blob;
extern const struct git_hash_algo hash_algos[GIT_HASH_NALGOS];
* Return a GIT_HASH_* constant based on the name. Returns GIT_HASH_UNKNOWN if
* the name doesn't match a known algorithm.
int hash_algo_by_name(const char *name);
/* Identical, except based on the format ID. */
int hash_algo_by_id(uint32_t format_id);
/* Identical, except based on the length. */
int hash_algo_by_length(int len);
/* Identical, except for a pointer to struct git_hash_algo. */
static inline int hash_algo_by_ptr(const struct git_hash_algo *p)
return p - hash_algos;
/* The length in bytes and in hex digits of an object name (SHA-1 value). */
#define GIT_SHA1_RAWSZ 20
/* The block size of SHA-1. */
#define GIT_SHA1_BLKSZ 64
/* The length in bytes and in hex digits of an object name (SHA-256 value). */
#define GIT_SHA256_RAWSZ 32
#define GIT_SHA256_HEXSZ (2 * GIT_SHA256_RAWSZ)
/* The block size of SHA-256. */
#define GIT_SHA256_BLKSZ 64
/* The length in byte and in hex digits of the largest possible hash value. */
/* The largest possible block size for any supported hash. */
struct object_id {
unsigned char hash[GIT_MAX_RAWSZ];
#define the_hash_algo the_repository->hash_algo