Browse Source

Merge branch 'mc/clean-smudge-with-llp64'

The clean/smudge conversion code path has been prepared to better
work on platforms where ulong is narrower than size_t.

* mc/clean-smudge-with-llp64:
  clean/smudge: allow clean filters to process extremely large files
  odb: guard against data loss checking out a huge file
  git-compat-util: introduce more size_t helpers
  odb: teach read_blob_entry to use size_t
  t1051: introduce a smudge filter test for extremely large files
  test-lib: add prerequisite for 64-bit platforms
  test-tool genzeros: generate large amounts of data more efficiently
  test-genzeros: allow more than 2G zeros in Windows
pull/1061/head
Junio C Hamano 9 months ago
parent
commit
f9ba6acaa9
  1. 2
      convert.c
  2. 6
      delta.h
  3. 8
      entry.c
  4. 2
      entry.h
  5. 25
      git-compat-util.h
  6. 6
      object-file.c
  7. 6
      packfile.c
  8. 2
      parallel-checkout.c
  9. 21
      t/helper/test-genzeros.c
  10. 26
      t/t1051-large-conversion.sh
  11. 4
      t/test-lib.sh

2
convert.c

@ -613,7 +613,7 @@ static int crlf_to_worktree(const char *src, size_t len, struct strbuf *buf,
struct filter_params {
const char *src;
unsigned long size;
size_t size;
int fd;
const char *cmd;
const char *path;

6
delta.h

@ -90,15 +90,15 @@ static inline unsigned long get_delta_hdr_size(const unsigned char **datap,
const unsigned char *top)
{
const unsigned char *data = *datap;
unsigned long cmd, size = 0;
size_t cmd, size = 0;
int i = 0;
do {
cmd = *data++;
size |= (cmd & 0x7f) << i;
size |= st_left_shift(cmd & 0x7f, i);
i += 7;
} while (cmd & 0x80 && data < top);
*datap = data;
return size;
return cast_size_t_to_ulong(size);
}
#endif

8
entry.c

@ -82,11 +82,13 @@ static int create_file(const char *path, unsigned int mode)
return open(path, O_WRONLY | O_CREAT | O_EXCL, mode);
}
void *read_blob_entry(const struct cache_entry *ce, unsigned long *size)
void *read_blob_entry(const struct cache_entry *ce, size_t *size)
{
enum object_type type;
void *blob_data = read_object_file(&ce->oid, &type, size);
unsigned long ul;
void *blob_data = read_object_file(&ce->oid, &type, &ul);
*size = ul;
if (blob_data) {
if (type == OBJ_BLOB)
return blob_data;
@ -271,7 +273,7 @@ static int write_entry(struct cache_entry *ce, char *path, struct conv_attrs *ca
int fd, ret, fstat_done = 0;
char *new_blob;
struct strbuf buf = STRBUF_INIT;
unsigned long size;
size_t size;
ssize_t wrote;
size_t newsize = 0;
struct stat st;

2
entry.h

@ -52,7 +52,7 @@ int finish_delayed_checkout(struct checkout *state, int *nr_checkouts,
*/
void unlink_entry(const struct cache_entry *ce);
void *read_blob_entry(const struct cache_entry *ce, unsigned long *size);
void *read_blob_entry(const struct cache_entry *ce, size_t *size);
int fstat_checkout_output(int fd, const struct checkout *state, struct stat *st);
void update_ce_after_write(const struct checkout *state, struct cache_entry *ce,
struct stat *st);

25
git-compat-util.h

@ -113,6 +113,14 @@
#define unsigned_mult_overflows(a, b) \
((a) && (b) > maximum_unsigned_value_of_type(a) / (a))
/*
* Returns true if the left shift of "a" by "shift" bits will
* overflow. The type of "a" must be unsigned.
*/
#define unsigned_left_shift_overflows(a, shift) \
((shift) < bitsizeof(a) && \
(a) > maximum_unsigned_value_of_type(a) >> (shift))
#ifdef __GNUC__
#define TYPEOF(x) (__typeof__(x))
#else
@ -862,6 +870,23 @@ static inline size_t st_sub(size_t a, size_t b)
return a - b;
}
static inline size_t st_left_shift(size_t a, unsigned shift)
{
if (unsigned_left_shift_overflows(a, shift))
die("size_t overflow: %"PRIuMAX" << %u",
(uintmax_t)a, shift);
return a << shift;
}
static inline unsigned long cast_size_t_to_ulong(size_t a)
{
if (a != (unsigned long)a)
die("object too large to read on this platform: %"
PRIuMAX" is cut off to %lu",
(uintmax_t)a, (unsigned long)a);
return (unsigned long)a;
}
#ifdef HAVE_ALLOCA_H
# include <alloca.h>
# define xalloca(size) (alloca(size))

6
object-file.c

@ -1306,7 +1306,7 @@ static void *unpack_loose_rest(git_zstream *stream,
int parse_loose_header(const char *hdr, struct object_info *oi)
{
const char *type_buf = hdr;
unsigned long size;
size_t size;
int type, type_len = 0;
/*
@ -1341,12 +1341,12 @@ int parse_loose_header(const char *hdr, struct object_info *oi)
if (c > 9)
break;
hdr++;
size = size * 10 + c;
size = st_add(st_mult(size, 10), c);
}
}
if (oi->sizep)
*oi->sizep = size;
*oi->sizep = cast_size_t_to_ulong(size);
/*
* The length must be followed by a zero byte

6
packfile.c

@ -1060,7 +1060,7 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf,
unsigned long len, enum object_type *type, unsigned long *sizep)
{
unsigned shift;
unsigned long size, c;
size_t size, c;
unsigned long used = 0;
c = buf[used++];
@ -1074,10 +1074,10 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf,
break;
}
c = buf[used++];
size += (c & 0x7f) << shift;
size = st_add(size, st_left_shift(c & 0x7f, shift));
shift += 7;
}
*sizep = size;
*sizep = cast_size_t_to_ulong(size);
return used;
}

2
parallel-checkout.c

@ -261,7 +261,7 @@ static int write_pc_item_to_fd(struct parallel_checkout_item *pc_item, int fd,
struct stream_filter *filter;
struct strbuf buf = STRBUF_INIT;
char *blob;
unsigned long size;
size_t size;
ssize_t wrote;
/* Sanity check */

21
t/helper/test-genzeros.c

@ -3,18 +3,31 @@
int cmd__genzeros(int argc, const char **argv)
{
long count;
/* static, so that it is NUL-initialized */
static const char zeros[256 * 1024];
intmax_t count;
ssize_t n;
if (argc > 2) {
fprintf(stderr, "usage: %s [<count>]\n", argv[0]);
return 1;
}
count = argc > 1 ? strtol(argv[1], NULL, 0) : -1L;
count = argc > 1 ? strtoimax(argv[1], NULL, 0) : -1;
while (count < 0 || count--) {
if (putchar(0) == EOF)
/* Writing out individual NUL bytes is slow... */
while (count < 0)
if (write(1, zeros, ARRAY_SIZE(zeros)) < 0)
return -1;
while (count > 0) {
n = write(1, zeros, count < ARRAY_SIZE(zeros) ?
count : ARRAY_SIZE(zeros));
if (n < 0)
return -1;
count -= n;
}
return 0;

26
t/t1051-large-conversion.sh

@ -83,4 +83,30 @@ test_expect_success 'ident converts on output' '
test_cmp small.clean large.clean
'
# This smudge filter prepends 5GB of zeros to the file it checks out. This
# ensures that smudging doesn't mangle large files on 64-bit Windows.
test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
'files over 4GB convert on output' '
test_commit test small "a small file" &&
small_size=$(test_file_size small) &&
test_config filter.makelarge.smudge \
"test-tool genzeros $((5*1024*1024*1024)) && cat" &&
echo "small filter=makelarge" >.gitattributes &&
rm small &&
git checkout -- small &&
size=$(test_file_size small) &&
test "$size" -eq $((5 * 1024 * 1024 * 1024 + $small_size))
'
# This clean filter writes down the size of input it receives. By checking against
# the actual size, we ensure that cleaning doesn't mangle large files on 64-bit Windows.
test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
'files over 4GB convert on input' '
test-tool genzeros $((5*1024*1024*1024)) >big &&
test_config filter.checklarge.clean "wc -c >big.size" &&
echo "big filter=checklarge" >.gitattributes &&
git add big &&
test $(test_file_size big) -eq $(cat big.size)
'
test_done

4
t/test-lib.sh

@ -1734,6 +1734,10 @@ build_option () {
sed -ne "s/^$1: //p"
}
test_lazy_prereq SIZE_T_IS_64BIT '
test 8 -eq "$(build_option sizeof-size_t)"
'
test_lazy_prereq LONG_IS_64BIT '
test 8 -le "$(build_option sizeof-long)"
'

Loading…
Cancel
Save