Browse Source

Merge branch 'tb/cruft-packs'

A mechanism to pack unreachable objects into a "cruft pack",
instead of ejecting them into loose form to be reclaimed later, has
been introduced.

* tb/cruft-packs:
  sha1-file.c: don't freshen cruft packs
  builtin/gc.c: conditionally avoid pruning objects via loose
  builtin/repack.c: add cruft packs to MIDX during geometric repack
  builtin/repack.c: use named flags for existing_packs
  builtin/repack.c: allow configuring cruft pack generation
  builtin/repack.c: support generating a cruft pack
  builtin/pack-objects.c: --cruft with expiration
  reachable: report precise timestamps from objects in cruft packs
  reachable: add options to add_unseen_recent_objects_to_traversal
  builtin/pack-objects.c: --cruft without expiration
  builtin/pack-objects.c: return from create_object_entry()
  t/helper: add 'pack-mtimes' test-tool
  pack-mtimes: support writing pack .mtimes files
  chunk-format.h: extract oid_version()
  pack-write: pass 'struct packing_data' to 'stage_tmp_packfiles'
  pack-mtimes: support reading .mtimes files
  Documentation/technical: add cruft-packs.txt
pull/1274/head
Junio C Hamano 2 months ago
parent
commit
a50036da1a
  1. 1
      Documentation/Makefile
  2. 21
      Documentation/config/gc.txt
  3. 9
      Documentation/config/repack.txt
  4. 5
      Documentation/git-gc.txt
  5. 30
      Documentation/git-pack-objects.txt
  6. 11
      Documentation/git-repack.txt
  7. 123
      Documentation/technical/cruft-packs.txt
  8. 19
      Documentation/technical/pack-format.txt
  9. 2
      Makefile
  10. 10
      builtin/gc.c
  11. 304
      builtin/pack-objects.c
  12. 185
      builtin/repack.c
  13. 2
      bulk-checkin.c
  14. 12
      chunk-format.c
  15. 3
      chunk-format.h
  16. 18
      commit-graph.c
  17. 18
      midx.c
  18. 4
      object-file.c
  19. 12
      object-store.h
  20. 129
      pack-mtimes.c
  21. 26
      pack-mtimes.h
  22. 6
      pack-objects.c
  23. 25
      pack-objects.h
  24. 93
      pack-write.c
  25. 4
      pack.h
  26. 19
      packfile.c
  27. 58
      reachable.c
  28. 9
      reachable.h
  29. 56
      t/helper/test-pack-mtimes.c
  30. 1
      t/helper/test-tool.c
  31. 1
      t/helper/test-tool.h
  32. 739
      t/t5329-pack-objects-cruft.sh

1
Documentation/Makefile

@ -95,6 +95,7 @@ TECH_DOCS += MyFirstObjectWalk
TECH_DOCS += SubmittingPatches
TECH_DOCS += ToolsForGit
TECH_DOCS += technical/bundle-format
TECH_DOCS += technical/cruft-packs
TECH_DOCS += technical/hash-function-transition
TECH_DOCS += technical/http-protocol
TECH_DOCS += technical/index-format

21
Documentation/config/gc.txt

@ -81,14 +81,21 @@ gc.packRefs::
to enable it within all non-bare repos or it can be set to a
boolean value. The default is `true`.
gc.cruftPacks::
Store unreachable objects in a cruft pack (see
linkgit:git-repack[1]) instead of as loose objects. The default
is `false`.
gc.pruneExpire::
When 'git gc' is run, it will call 'prune --expire 2.weeks.ago'.
Override the grace period with this config variable. The value
"now" may be used to disable this grace period and always prune
unreachable objects immediately, or "never" may be used to
suppress pruning. This feature helps prevent corruption when
'git gc' runs concurrently with another process writing to the
repository; see the "NOTES" section of linkgit:git-gc[1].
When 'git gc' is run, it will call 'prune --expire 2.weeks.ago'
(and 'repack --cruft --cruft-expiration 2.weeks.ago' if using
cruft packs via `gc.cruftPacks` or `--cruft`). Override the
grace period with this config variable. The value "now" may be
used to disable this grace period and always prune unreachable
objects immediately, or "never" may be used to suppress pruning.
This feature helps prevent corruption when 'git gc' runs
concurrently with another process writing to the repository; see
the "NOTES" section of linkgit:git-gc[1].
gc.worktreePruneExpire::
When 'git gc' is run, it calls

9
Documentation/config/repack.txt

@ -30,3 +30,12 @@ repack.updateServerInfo::
If set to false, linkgit:git-repack[1] will not run
linkgit:git-update-server-info[1]. Defaults to true. Can be overridden
when true by the `-n` option of linkgit:git-repack[1].
repack.cruftWindow::
repack.cruftWindowMemory::
repack.cruftDepth::
repack.cruftThreads::
Parameters used by linkgit:git-pack-objects[1] when generating
a cruft pack and the respective parameters are not given over
the command line. See similarly named `pack.*` configuration
variables for defaults and meaning.

5
Documentation/git-gc.txt

@ -54,6 +54,11 @@ other housekeeping tasks (e.g. rerere, working trees, reflog...) will
be performed as well.
--cruft::
When expiring unreachable objects, pack them separately into a
cruft pack instead of storing the loose objects as loose
objects.
--prune=<date>::
Prune loose objects older than date (default is 2 weeks ago,
overridable by the config variable `gc.pruneExpire`).

30
Documentation/git-pack-objects.txt

@ -13,6 +13,7 @@ SYNOPSIS
[--no-reuse-delta] [--delta-base-offset] [--non-empty]
[--local] [--incremental] [--window=<n>] [--depth=<n>]
[--revs [--unpacked | --all]] [--keep-pack=<pack-name>]
[--cruft] [--cruft-expiration=<time>]
[--stdout [--filter=<filter-spec>] | <base-name>]
[--shallow] [--keep-true-parents] [--[no-]sparse] < <object-list>
@ -95,6 +96,35 @@ base-name::
Incompatible with `--revs`, or options that imply `--revs` (such as
`--all`), with the exception of `--unpacked`, which is compatible.
--cruft::
Packs unreachable objects into a separate "cruft" pack, denoted
by the existence of a `.mtimes` file. Typically used by `git
repack --cruft`. Callers provide a list of pack names and
indicate which packs will remain in the repository, along with
which packs will be deleted (indicated by the `-` prefix). The
contents of the cruft pack are all objects not contained in the
surviving packs which have not exceeded the grace period (see
`--cruft-expiration` below), or which have exceeded the grace
period, but are reachable from an other object which hasn't.
+
When the input lists a pack containing all reachable objects (and lists
all other packs as pending deletion), the corresponding cruft pack will
contain all unreachable objects (with mtime newer than the
`--cruft-expiration`) along with any unreachable objects whose mtime is
older than the `--cruft-expiration`, but are reachable from an
unreachable object whose mtime is newer than the `--cruft-expiration`).
+
Incompatible with `--unpack-unreachable`, `--keep-unreachable`,
`--pack-loose-unreachable`, `--stdin-packs`, as well as any other
options which imply `--revs`. Also incompatible with `--max-pack-size`;
when this option is set, the maximum pack size is not inferred from
`pack.packSizeLimit`.
--cruft-expiration=<approxidate>::
If specified, objects are eliminated from the cruft pack if they
have an mtime older than `<approxidate>`. If unspecified (and
given `--cruft`), then no objects are eliminated.
--window=<n>::
--depth=<n>::
These two options affect how the objects contained in

11
Documentation/git-repack.txt

@ -63,6 +63,17 @@ to the new separate pack will be written.
Also run 'git prune-packed' to remove redundant
loose object files.
--cruft::
Same as `-a`, unless `-d` is used. Then any unreachable objects
are packed into a separate cruft pack. Unreachable objects can
be pruned using the normal expiry rules with the next `git gc`
invocation (see linkgit:git-gc[1]). Incompatible with `-k`.
--cruft-expiration=<approxidate>::
Expire unreachable objects older than `<approxidate>`
immediately instead of waiting for the next `git gc` invocation.
Only useful with `--cruft -d`.
-l::
Pass the `--local` option to 'git pack-objects'. See
linkgit:git-pack-objects[1].

123
Documentation/technical/cruft-packs.txt

@ -0,0 +1,123 @@
= Cruft packs
The cruft packs feature offer an alternative to Git's traditional mechanism of
removing unreachable objects. This document provides an overview of Git's
pruning mechanism, and how a cruft pack can be used instead to accomplish the
same.
== Background
To remove unreachable objects from your repository, Git offers `git repack -Ad`
(see linkgit:git-repack[1]). Quoting from the documentation:
[quote]
[...] unreachable objects in a previous pack become loose, unpacked objects,
instead of being left in the old pack. [...] loose unreachable objects will be
pruned according to normal expiry rules with the next 'git gc' invocation.
Unreachable objects aren't removed immediately, since doing so could race with
an incoming push which may reference an object which is about to be deleted.
Instead, those unreachable objects are stored as loose objects and stay that way
until they are older than the expiration window, at which point they are removed
by linkgit:git-prune[1].
Git must store these unreachable objects loose in order to keep track of their
per-object mtimes. If these unreachable objects were written into one big pack,
then either freshening that pack (because an object contained within it was
re-written) or creating a new pack of unreachable objects would cause the pack's
mtime to get updated, and the objects within it would never leave the expiration
window. Instead, objects are stored loose in order to keep track of the
individual object mtimes and avoid a situation where all cruft objects are
freshened at once.
This can lead to undesirable situations when a repository contains many
unreachable objects which have not yet left the grace period. Having large
directories in the shards of `.git/objects` can lead to decreased performance in
the repository. But given enough unreachable objects, this can lead to inode
starvation and degrade the performance of the whole system. Since we
can never pack those objects, these repositories often take up a large amount of
disk space, since we can only zlib compress them, but not store them in delta
chains.
== Cruft packs
A cruft pack eliminates the need for storing unreachable objects in a loose
state by including the per-object mtimes in a separate file alongside a single
pack containing all loose objects.
A cruft pack is written by `git repack --cruft` when generating a new pack.
linkgit:git-pack-objects[1]'s `--cruft` option. Note that `git repack --cruft`
is a classic all-into-one repack, meaning that everything in the resulting pack is
reachable, and everything else is unreachable. Once written, the `--cruft`
option instructs `git repack` to generate another pack containing only objects
not packed in the previous step (which equates to packing all unreachable
objects together). This progresses as follows:
1. Enumerate every object, marking any object which is (a) not contained in a
kept-pack, and (b) whose mtime is within the grace period as a traversal
tip.
2. Perform a reachability traversal based on the tips gathered in the previous
step, adding every object along the way to the pack.
3. Write the pack out, along with a `.mtimes` file that records the per-object
timestamps.
This mode is invoked internally by linkgit:git-repack[1] when instructed to
write a cruft pack. Crucially, the set of in-core kept packs is exactly the set
of packs which will not be deleted by the repack; in other words, they contain
all of the repository's reachable objects.
When a repository already has a cruft pack, `git repack --cruft` typically only
adds objects to it. An exception to this is when `git repack` is given the
`--cruft-expiration` option, which allows the generated cruft pack to omit
expired objects instead of waiting for linkgit:git-gc[1] to expire those objects
later on.
It is linkgit:git-gc[1] that is typically responsible for removing expired
unreachable objects.
== Caution for mixed-version environments
Repositories that have cruft packs in them will continue to work with any older
version of Git. Note, however, that previous versions of Git which do not
understand the `.mtimes` file will use the cruft pack's mtime as the mtime for
all of the objects in it. In other words, do not expect older (pre-cruft pack)
versions of Git to interpret or even read the contents of the `.mtimes` file.
Note that having mixed versions of Git GC-ing the same repository can lead to
unreachable objects never being completely pruned. This can happen under the
following circumstances:
- An older version of Git running GC explodes the contents of an existing
cruft pack loose, using the cruft pack's mtime.
- A newer version running GC collects those loose objects into a cruft pack,
where the .mtime file reflects the loose object's actual mtimes, but the
cruft pack mtime is "now".
Repeating this process will lead to unreachable objects not getting pruned as a
result of repeatedly resetting the objects' mtimes to the present time.
If you are GC-ing repositories in a mixed version environment, consider omitting
the `--cruft` option when using linkgit:git-repack[1] and linkgit:git-gc[1], and
leaving the `gc.cruftPacks` configuration unset until all writers understand
cruft packs.
== Alternatives
Notable alternatives to this design include:
- The location of the per-object mtime data, and
- Storing unreachable objects in multiple cruft packs.
On the location of mtime data, a new auxiliary file tied to the pack was chosen
to avoid complicating the `.idx` format. If the `.idx` format were ever to gain
support for optional chunks of data, it may make sense to consolidate the
`.mtimes` format into the `.idx` itself.
Storing unreachable objects among multiple cruft packs (e.g., creating a new
cruft pack during each repacking operation including only unreachable objects
which aren't already stored in an earlier cruft pack) is significantly more
complicated to construct, and so aren't pursued here. The obvious drawback to
the current implementation is that the entire cruft pack must be re-written from
scratch.

19
Documentation/technical/pack-format.txt

@ -294,6 +294,25 @@ Pack file entry: <+
All 4-byte numbers are in network order.
== pack-*.mtimes files have the format:
All 4-byte numbers are in network byte order.
- A 4-byte magic number '0x4d544d45' ('MTME').
- A 4-byte version identifier (= 1).
- A 4-byte hash function identifier (= 1 for SHA-1, 2 for SHA-256).
- A table of 4-byte unsigned integers. The ith value is the
modification time (mtime) of the ith object in the corresponding
pack by lexicographic (index) order. The mtimes count standard
epoch seconds.
- A trailer, containing a checksum of the corresponding packfile,
and a checksum of all of the above (each having length according
to the specified hash function).
== multi-pack-index (MIDX) files have the following format:
The multi-pack-index files refer to multiple pack-files and loose objects.

2
Makefile

@ -740,6 +740,7 @@ TEST_BUILTINS_OBJS += test-oid-array.o
TEST_BUILTINS_OBJS += test-oidmap.o
TEST_BUILTINS_OBJS += test-oidtree.o
TEST_BUILTINS_OBJS += test-online-cpus.o
TEST_BUILTINS_OBJS += test-pack-mtimes.o
TEST_BUILTINS_OBJS += test-parse-options.o
TEST_BUILTINS_OBJS += test-parse-pathspec-file.o
TEST_BUILTINS_OBJS += test-partial-clone.o
@ -996,6 +997,7 @@ LIB_OBJS += oidtree.o
LIB_OBJS += pack-bitmap-write.o
LIB_OBJS += pack-bitmap.o
LIB_OBJS += pack-check.o
LIB_OBJS += pack-mtimes.o
LIB_OBJS += pack-objects.o
LIB_OBJS += pack-revindex.o
LIB_OBJS += pack-write.o

10
builtin/gc.c

@ -42,6 +42,7 @@ static const char * const builtin_gc_usage[] = {
static int pack_refs = 1;
static int prune_reflogs = 1;
static int cruft_packs = 0;
static int aggressive_depth = 50;
static int aggressive_window = 250;
static int gc_auto_threshold = 6700;
@ -152,6 +153,7 @@ static void gc_config(void)
git_config_get_int("gc.auto", &gc_auto_threshold);
git_config_get_int("gc.autopacklimit", &gc_auto_pack_limit);
git_config_get_bool("gc.autodetach", &detach_auto);
git_config_get_bool("gc.cruftpacks", &cruft_packs);
git_config_get_expiry("gc.pruneexpire", &prune_expire);
git_config_get_expiry("gc.worktreepruneexpire", &prune_worktrees_expire);
git_config_get_expiry("gc.logexpiry", &gc_log_expire);
@ -331,7 +333,11 @@ static void add_repack_all_option(struct string_list *keep_pack)
{
if (prune_expire && !strcmp(prune_expire, "now"))
strvec_push(&repack, "-a");
else {
else if (cruft_packs) {
strvec_push(&repack, "--cruft");
if (prune_expire)
strvec_pushf(&repack, "--cruft-expiration=%s", prune_expire);
} else {
strvec_push(&repack, "-A");
if (prune_expire)
strvec_pushf(&repack, "--unpack-unreachable=%s", prune_expire);
@ -551,6 +557,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
{ OPTION_STRING, 0, "prune", &prune_expire, N_("date"),
N_("prune unreferenced objects"),
PARSE_OPT_OPTARG, NULL, (intptr_t)prune_expire },
OPT_BOOL(0, "cruft", &cruft_packs, N_("pack unreferenced objects separately")),
OPT_BOOL(0, "aggressive", &aggressive, N_("be more thorough (increased runtime)")),
OPT_BOOL_F(0, "auto", &auto_gc, N_("enable auto-gc mode"),
PARSE_OPT_NOCOMPLETE),
@ -670,6 +677,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
die(FAILED_RUN, repack.v[0]);
if (prune_expire) {
/* run `git prune` even if using cruft packs */
strvec_push(&prune, prune_expire);
if (quiet)
strvec_push(&prune, "--no-progress");

304
builtin/pack-objects.c

@ -36,6 +36,7 @@
#include "trace2.h"
#include "shallow.h"
#include "promisor-remote.h"
#include "pack-mtimes.h"
/*
* Objects we are going to pack are collected in the `to_pack` structure.
@ -194,6 +195,8 @@ static int reuse_delta = 1, reuse_object = 1;
static int keep_unreachable, unpack_unreachable, include_tag;
static timestamp_t unpack_unreachable_expiration;
static int pack_loose_unreachable;
static int cruft;
static timestamp_t cruft_expiration;
static int local;
static int have_non_local_packs;
static int incremental;
@ -1260,9 +1263,13 @@ static void write_pack_file(void)
&to_pack, written_list, nr_written);
}
if (cruft)
pack_idx_opts.flags |= WRITE_MTIMES;
stage_tmp_packfiles(&tmpname, pack_tmp_name,
written_list, nr_written,
&pack_idx_opts, hash, &idx_tmp_name);
&to_pack, &pack_idx_opts, hash,
&idx_tmp_name);
if (write_bitmap_index) {
size_t tmpname_len = tmpname.len;
@ -1521,13 +1528,13 @@ static int want_object_in_pack(const struct object_id *oid,
return 1;
}
static void create_object_entry(const struct object_id *oid,
enum object_type type,
uint32_t hash,
int exclude,
int no_try_delta,
struct packed_git *found_pack,
off_t found_offset)
static struct object_entry *create_object_entry(const struct object_id *oid,
enum object_type type,
uint32_t hash,
int exclude,
int no_try_delta,
struct packed_git *found_pack,
off_t found_offset)
{
struct object_entry *entry;
@ -1544,6 +1551,8 @@ static void create_object_entry(const struct object_id *oid,
}
entry->no_try_delta = no_try_delta;
return entry;
}
static const char no_closure_warning[] = N_(
@ -3403,6 +3412,217 @@ static void read_packs_list_from_stdin(void)
string_list_clear(&exclude_packs, 0);
}
static void add_cruft_object_entry(const struct object_id *oid, enum object_type type,
struct packed_git *pack, off_t offset,
const char *name, uint32_t mtime)
{
struct object_entry *entry;
display_progress(progress_state, ++nr_seen);
entry = packlist_find(&to_pack, oid);
if (entry) {
if (name) {
entry->hash = pack_name_hash(name);
entry->no_try_delta = no_try_delta(name);
}
} else {
if (!want_object_in_pack(oid, 0, &pack, &offset))
return;
if (!pack && type == OBJ_BLOB && !has_loose_object(oid)) {
/*
* If a traversed tree has a missing blob then we want
* to avoid adding that missing object to our pack.
*
* This only applies to missing blobs, not trees,
* because the traversal needs to parse sub-trees but
* not blobs.
*
* Note we only perform this check when we couldn't
* already find the object in a pack, so we're really
* limited to "ensure non-tip blobs which don't exist in
* packs do exist via loose objects". Confused?
*/
return;
}
entry = create_object_entry(oid, type, pack_name_hash(name),
0, name && no_try_delta(name),
pack, offset);
}
if (mtime > oe_cruft_mtime(&to_pack, entry))
oe_set_cruft_mtime(&to_pack, entry, mtime);
return;
}
static void show_cruft_object(struct object *obj, const char *name, void *data)
{
/*
* if we did not record it earlier, it's at least as old as our
* expiration value. Rather than find it exactly, just use that
* value. This may bump it forward from its real mtime, but it
* will still be "too old" next time we run with the same
* expiration.
*
* if obj does appear in the packing list, this call is a noop (or may
* set the namehash).
*/
add_cruft_object_entry(&obj->oid, obj->type, NULL, 0, name, cruft_expiration);
}
static void show_cruft_commit(struct commit *commit, void *data)
{
show_cruft_object((struct object*)commit, NULL, data);
}
static int cruft_include_check_obj(struct object *obj, void *data)
{
return !has_object_kept_pack(&obj->oid, IN_CORE_KEEP_PACKS);
}
static int cruft_include_check(struct commit *commit, void *data)
{
return cruft_include_check_obj((struct object*)commit, data);
}
static void set_cruft_mtime(const struct object *object,
struct packed_git *pack,
off_t offset, time_t mtime)
{
add_cruft_object_entry(&object->oid, object->type, pack, offset, NULL,
mtime);
}
static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep)
{
struct string_list_item *item = NULL;
for_each_string_list_item(item, packs) {
struct packed_git *p = item->util;
if (!p)
die(_("could not find pack '%s'"), item->string);
p->pack_keep_in_core = keep;
}
}
static void add_unreachable_loose_objects(void);
static void add_objects_in_unpacked_packs(void);
static void enumerate_cruft_objects(void)
{
if (progress)
progress_state = start_progress(_("Enumerating cruft objects"), 0);
add_objects_in_unpacked_packs();
add_unreachable_loose_objects();
stop_progress(&progress_state);
}
static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs)
{
struct packed_git *p;
struct rev_info revs;
int ret;
repo_init_revisions(the_repository, &revs, NULL);
revs.tag_objects = 1;
revs.tree_objects = 1;
revs.blob_objects = 1;
revs.include_check = cruft_include_check;
revs.include_check_obj = cruft_include_check_obj;
revs.ignore_missing_links = 1;
if (progress)
progress_state = start_progress(_("Enumerating cruft objects"), 0);
ret = add_unseen_recent_objects_to_traversal(&revs, cruft_expiration,
set_cruft_mtime, 1);
stop_progress(&progress_state);
if (ret)
die(_("unable to add cruft objects"));
/*
* Re-mark only the fresh packs as kept so that objects in
* unknown packs do not halt the reachability traversal early.
*/
for (p = get_all_packs(the_repository); p; p = p->next)
p->pack_keep_in_core = 0;
mark_pack_kept_in_core(fresh_packs, 1);
if (prepare_revision_walk(&revs))
die(_("revision walk setup failed"));
if (progress)
progress_state = start_progress(_("Traversing cruft objects"), 0);
nr_seen = 0;
traverse_commit_list(&revs, show_cruft_commit, show_cruft_object, NULL);
stop_progress(&progress_state);
}
static void read_cruft_objects(void)
{
struct strbuf buf = STRBUF_INIT;
struct string_list discard_packs = STRING_LIST_INIT_DUP;
struct string_list fresh_packs = STRING_LIST_INIT_DUP;
struct packed_git *p;
ignore_packed_keep_in_core = 1;
while (strbuf_getline(&buf, stdin) != EOF) {
if (!buf.len)
continue;
if (*buf.buf == '-')
string_list_append(&discard_packs, buf.buf + 1);
else
string_list_append(&fresh_packs, buf.buf);
strbuf_reset(&buf);
}
string_list_sort(&discard_packs);
string_list_sort(&fresh_packs);
for (p = get_all_packs(the_repository); p; p = p->next) {
const char *pack_name = pack_basename(p);
struct string_list_item *item;
item = string_list_lookup(&fresh_packs, pack_name);
if (!item)
item = string_list_lookup(&discard_packs, pack_name);
if (item) {
item->util = p;
} else {
/*
* This pack wasn't mentioned in either the "fresh" or
* "discard" list, so the caller didn't know about it.
*
* Mark it as kept so that its objects are ignored by
* add_unseen_recent_objects_to_traversal(). We'll
* unmark it before starting the traversal so it doesn't
* halt the traversal early.
*/
p->pack_keep_in_core = 1;
}
}
mark_pack_kept_in_core(&fresh_packs, 1);
mark_pack_kept_in_core(&discard_packs, 0);
if (cruft_expiration)
enumerate_and_traverse_cruft_objects(&fresh_packs);
else
enumerate_cruft_objects();
strbuf_release(&buf);
string_list_clear(&discard_packs, 0);
string_list_clear(&fresh_packs, 0);
}
static void read_object_list_from_stdin(void)
{
char line[GIT_MAX_HEXSZ + 1 + PATH_MAX + 2];
@ -3535,7 +3755,24 @@ static int add_object_in_unpacked_pack(const struct object_id *oid,
uint32_t pos,
void *_data)
{
add_object_entry(oid, OBJ_NONE, "", 0);
if (cruft) {
off_t offset;
time_t mtime;
if (pack->is_cruft) {
if (load_pack_mtimes(pack) < 0)
die(_("could not load cruft pack .mtimes"));
mtime = nth_packed_mtime(pack, pos);
} else {
mtime = pack->mtime;
}
offset = nth_packed_object_offset(pack, pos);
add_cruft_object_entry(oid, OBJ_NONE, pack, offset,
NULL, mtime);
} else {
add_object_entry(oid, OBJ_NONE, "", 0);
}
return 0;
}
@ -3559,7 +3796,19 @@ static int add_loose_object(const struct object_id *oid, const char *path,
return 0;
}
add_object_entry(oid, type, "", 0);
if (cruft) {
struct stat st;
if (stat(path, &st) < 0) {
if (errno == ENOENT)
return 0;
return error_errno("unable to stat %s", oid_to_hex(oid));
}
add_cruft_object_entry(oid, type, NULL, 0, NULL,
st.st_mtime);
} else {
add_object_entry(oid, type, "", 0);
}
return 0;
}
@ -3799,7 +4048,7 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av)
if (unpack_unreachable_expiration) {
revs->ignore_missing_links = 1;
if (add_unseen_recent_objects_to_traversal(revs,
unpack_unreachable_expiration))
unpack_unreachable_expiration, NULL, 0))
die(_("unable to add recent objects"));
if (prepare_revision_walk(revs))
die(_("revision walk setup failed"));
@ -3876,6 +4125,20 @@ static int option_parse_unpack_unreachable(const struct option *opt,
return 0;
}
static int option_parse_cruft_expiration(const struct option *opt,
const char *arg, int unset)
{
if (unset) {
cruft = 0;
cruft_expiration = 0;
} else {
cruft = 1;
if (arg)
cruft_expiration = approxidate(arg);
}
return 0;
}
struct po_filter_data {
unsigned have_revs:1;
struct rev_info revs;
@ -3965,6 +4228,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
OPT_CALLBACK_F(0, "unpack-unreachable", NULL, N_("time"),
N_("unpack unreachable objects newer than <time>"),
PARSE_OPT_OPTARG, option_parse_unpack_unreachable),
OPT_BOOL(0, "cruft", &cruft, N_("create a cruft pack")),
OPT_CALLBACK_F(0, "cruft-expiration", NULL, N_("time"),
N_("expire cruft objects older than <time>"),
PARSE_OPT_OPTARG, option_parse_cruft_expiration),
OPT_BOOL(0, "sparse", &sparse,
N_("use the sparse reachability algorithm")),
OPT_BOOL(0, "thin", &thin,
@ -4091,7 +4358,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (!HAVE_THREADS && delta_search_threads != 1)
warning(_("no threads support, ignoring --threads"));
if (!pack_to_stdout && !pack_size_limit)
if (!pack_to_stdout && !pack_size_limit && !cruft)
pack_size_limit = pack_size_limit_cfg;
if (pack_to_stdout && pack_size_limit)
die(_("--max-pack-size cannot be used to build a pack for transfer"));
@ -4118,6 +4385,15 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (stdin_packs && use_internal_rev_list)
die(_("cannot use internal rev list with --stdin-packs"));
if (cruft) {
if (use_internal_rev_list)
die(_("cannot use internal rev list with --cruft"));
if (stdin_packs)
die(_("cannot use --stdin-packs with --cruft"));
if (pack_size_limit)
die(_("cannot use --max-pack-size with --cruft"));
}
/*
* "soft" reasons not to use bitmaps - for on-disk repack by default we want
*
@ -4174,7 +4450,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
the_repository);
prepare_packing_data(the_repository, &to_pack);
if (progress)
if (progress && !cruft)
progress_state = start_progress(_("Enumerating objects"), 0);
if (stdin_packs) {
/* avoids adding objects in excluded packs */
@ -4182,6 +4458,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
read_packs_list_from_stdin();
if (rev_list_unpacked)
add_unreachable_loose_objects();
} else if (cruft) {
read_cruft_objects();
} else if (!use_internal_rev_list) {
read_object_list_from_stdin();
} else if (pfd.have_revs) {

185
builtin/repack.c

@ -18,12 +18,21 @@
#include "pack-bitmap.h"
#include "refs.h"
#define ALL_INTO_ONE 1
#define LOOSEN_UNREACHABLE 2
#define PACK_CRUFT 4
#define DELETE_PACK 1
#define CRUFT_PACK 2
static int pack_everything;
static int delta_base_offset = 1;
static int pack_kept_objects = -1;
static int write_bitmaps = -1;
static int use_delta_islands;
static int run_update_server_info = 1;
static char *packdir, *packtmp_name, *packtmp;
static char *cruft_expiration;
static const char *const git_repack_usage[] = {
N_("git repack [<options>]"),
@ -35,9 +44,21 @@ static const char incremental_bitmap_conflict_error[] = N_(
"--no-write-bitmap-index or disable the pack.writebitmaps configuration."
);
struct pack_objects_args {
const char *window;
const char *window_memory;
const char *depth;
const char *threads;
const char *max_pack_size;
int no_reuse_delta;
int no_reuse_object;
int quiet;
int local;
};
static int repack_config(const char *var, const char *value, void *cb)
{
struct pack_objects_args *cruft_po_args = cb;
if (!strcmp(var, "repack.usedeltabaseoffset")) {
delta_base_offset = git_config_bool(var, value);
return 0;
@ -59,6 +80,14 @@ static int repack_config(const char *var, const char *value, void *cb)
run_update_server_info = git_config_bool(var, value);
return 0;
}
if (!strcmp(var, "repack.cruftwindow"))
return git_config_string(&cruft_po_args->window, var, value);
if (!strcmp(var, "repack.cruftwindowmemory"))
return git_config_string(&cruft_po_args->window_memory, var, value);
if (!strcmp(var, "repack.cruftdepth"))
return git_config_string(&cruft_po_args->depth, var, value);
if (!strcmp(var, "repack.cruftthreads"))
return git_config_string(&cruft_po_args->threads, var, value);
return git_default_config(var, value, cb);
}
@ -131,10 +160,15 @@ static void collect_pack_filenames(struct string_list *fname_nonkept_list,
fname = xmemdupz(e->d_name, len);
if ((extra_keep->nr > 0 && i < extra_keep->nr) ||
(file_exists(mkpath("%s/%s.keep", packdir, fname))))
(file_exists(mkpath("%s/%s.keep", packdir, fname)))) {
string_list_append_nodup(fname_kept_list, fname);
else
string_list_append_nodup(fname_nonkept_list, fname);
} else {
struct string_list_item *item;
item = string_list_append_nodup(fname_nonkept_list,
fname);
if (file_exists(mkpath("%s/%s.mtimes", packdir, fname)))
item->util = (void*)(uintptr_t)CRUFT_PACK;
}
}
closedir(dir);
@ -153,18 +187,6 @@ static void remove_redundant_pack(const char *dir_name, const char *base_name)
strbuf_release(&buf);
}
struct pack_objects_args {
const char *window;
const char *window_memory;
const char *depth;
const char *threads;
const char *max_pack_size;
int no_reuse_delta;
int no_reuse_object;
int quiet;
int local;
};
static void prepare_pack_objects(struct child_process *cmd,
const struct pack_objects_args *args)
{
@ -219,6 +241,7 @@ static struct {
} exts[] = {
{".pack"},
{".rev", 1},
{".mtimes", 1},
{".bitmap", 1},
{".promisor", 1},
{".idx"},
@ -306,9 +329,6 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
die(_("could not finish pack-objects to repack promisor objects"));
}
#define ALL_INTO_ONE 1
#define LOOSEN_UNREACHABLE 2
struct pack_geometry {
struct packed_git **pack;
uint32_t pack_nr, pack_alloc;
@ -366,6 +386,8 @@ static void init_pack_geometry(struct pack_geometry **geometry_p,
if (string_list_has_string(existing_kept_packs, buf.buf))
continue;
}
if (p->is_cruft)
continue;
ALLOC_GROW(geometry->pack,
geometry->pack_nr + 1,
@ -572,9 +594,20 @@ static void midx_included_packs(struct string_list *include,
string_list_insert(include, strbuf_detach(&buf, NULL));
}
for_each_string_list_item(item, existing_nonkept_packs) {
if (!((uintptr_t)item->util & CRUFT_PACK)) {
/*
* no need to check DELETE_PACK, since we're not
* doing an ALL_INTO_ONE repack
*/
continue;
}
string_list_insert(include, xstrfmt("%s.idx", item->string));
}
} else {
for_each_string_list_item(item, existing_nonkept_packs) {
if (item->util)
if ((uintptr_t)item->util & DELETE_PACK)
continue;
string_list_insert(include, xstrfmt("%s.idx", item->string));
}
@ -628,6 +661,67 @@ static int write_midx_included_packs(struct string_list *include,
return finish_command(&cmd);
}
static int write_cruft_pack(const struct pack_objects_args *args,
const char *pack_prefix,
struct string_list *names,
struct string_list *existing_packs,
struct string_list *existing_kept_packs)
{
struct child_process cmd = CHILD_PROCESS_INIT;
struct strbuf line = STRBUF_INIT;
struct string_list_item *item;
FILE *in, *out;
int ret;
prepare_pack_objects(&cmd, args);
strvec_push(&cmd.args, "--cruft");
if (cruft_expiration)
strvec_pushf(&cmd.args, "--cruft-expiration=%s",
cruft_expiration);
strvec_push(&cmd.args, "--honor-pack-keep");
strvec_push(&cmd.args, "--non-empty");
strvec_push(&cmd.args, "--max-pack-size=0");
cmd.in = -1;
ret = start_command(&cmd);
if (ret)
return ret;
/*
* names has a confusing double use: it both provides the list
* of just-written new packs, and accepts the name of the cruft
* pack we are writing.
*
* By the time it is read here, it contains only the pack(s)
* that were just written, which is exactly the set of packs we
* want to consider kept.
*/
in = xfdopen(cmd.in, "w");
for_each_string_list_item(item, names)
fprintf(in, "%s-%s.pack\n", pack_prefix, item->string);
for_each_string_list_item(item, existing_packs)
fprintf(in, "-%s.pack\n", item->string);
for_each_string_list_item(item, existing_kept_packs)
fprintf(in, "%s.pack\n", item->string);
fclose(in);
out = xfdopen(cmd.out, "r");
while (strbuf_getline_lf(&line, out) != EOF) {
if (line.len != the_hash_algo->hexsz)
die(_("repack: Expecting full hex object ID lines only "
"from pack-objects."));
string_list_append(names, line.buf);
}
fclose(out);
strbuf_release(&line);
return finish_command(&cmd);
}
int cmd_repack(int argc, const char **argv, const char *prefix)
{
struct child_process cmd = CHILD_PROCESS_INIT;
@ -644,12 +738,12 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
int show_progress;
/* variables to be filled by option parsing */
int pack_everything = 0;
int delete_redundant = 0;
const char *unpack_unreachable = NULL;
int keep_unreachable = 0;
struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
struct pack_objects_args po_args = {NULL};
struct pack_objects_args cruft_po_args = {NULL};
int geometric_factor = 0;
int write_midx = 0;
@ -659,6 +753,11 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
OPT_BIT('A', NULL, &pack_everything,
N_("same as -a, and turn unreachable objects loose"),
LOOSEN_UNREACHABLE | ALL_INTO_ONE),
OPT_BIT(0, "cruft", &pack_everything,
N_("same as -a, pack unreachable cruft objects separately"),
PACK_CRUFT),
OPT_STRING(0, "cruft-expiration", &cruft_expiration, N_("approxidate"),
N_("with -C, expire objects older than this")),
OPT_BOOL('d', NULL, &delete_redundant,
N_("remove redundant packs, and run git-prune-packed")),
OPT_BOOL('f', NULL, &po_args.no_reuse_delta,
@ -699,7 +798,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
OPT_END()
};
git_config(repack_config, NULL);
git_config(repack_config, &cruft_po_args);
argc = parse_options(argc, argv, prefix, builtin_repack_options,
git_repack_usage, 0);
@ -711,6 +810,15 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
(unpack_unreachable || (pack_everything & LOOSEN_UNREACHABLE)))
die(_("options '%s' and '%s' cannot be used together"), "--keep-unreachable", "-A");
if (pack_everything & PACK_CRUFT) {
pack_everything |= ALL_INTO_ONE;
if (unpack_unreachable || (pack_everything & LOOSEN_UNREACHABLE))
die(_("options '%s' and '%s' cannot be used together"), "--cruft", "-A");
if (keep_unreachable)
die(_("options '%s' and '%s' cannot be used together"), "--cruft", "-k");
}
if (write_bitmaps < 0) {
if (!write_midx &&
(!(pack_everything & ALL_INTO_ONE) || !is_bare_repository()))
@ -794,7 +902,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
if (pack_everything & ALL_INTO_ONE) {
repack_promisor_objects(&po_args, &names);
if (existing_nonkept_packs.nr && delete_redundant) {
if (existing_nonkept_packs.nr && delete_redundant &&
!(pack_everything & PACK_CRUFT)) {
for_each_string_list_item(item, &names) {
strvec_pushf(&cmd.args, "--keep-pack=%s-%s.pack",
packtmp_name, item->string);
@ -856,6 +965,33 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
if (!names.nr && !po_args.quiet)
printf_ln(_("Nothing new to pack."));
if (pack_everything & PACK_CRUFT) {
const char *pack_prefix;
if (!skip_prefix(packtmp, packdir, &pack_prefix))
die(_("pack prefix %s does not begin with objdir %s"),
packtmp, packdir);
if (*pack_prefix == '/')
pack_prefix++;
if (!cruft_po_args.window)
cruft_po_args.window = po_args.window;
if (!cruft_po_args.window_memory)
cruft_po_args.window_memory = po_args.window_memory;
if (!cruft_po_args.depth)
cruft_po_args.depth = po_args.depth;
if (!cruft_po_args.threads)
cruft_po_args.threads = po_args.threads;
cruft_po_args.local = po_args.local;
cruft_po_args.quiet = po_args.quiet;
ret = write_cruft_pack(&cruft_po_args, pack_prefix, &names,
&existing_nonkept_packs,
&existing_kept_packs);
if (ret)
return ret;
}
string_list_sort(&names);
for_each_string_list_item(item, &names) {
@ -910,7 +1046,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
* was given) and that we will actually delete this pack
* (if `-d` was given).
*/
item->util = (void*)(intptr_t)!string_list_has_string(&names, sha1);
if (!string_list_has_string(&names, sha1))
item->util = (void*)(uintptr_t)((size_t)item->util | DELETE_PACK);
}
}
@ -934,7 +1071,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
if (delete_redundant) {
int opts = 0;
for_each_string_list_item(item, &existing_nonkept_packs) {
if (!item->util)
if (!((uintptr_t)item->util & DELETE_PACK))
continue;
remove_redundant_pack(packdir, item->string);
}

2
bulk-checkin.c

@ -38,7 +38,7 @@ static void finish_tmp_packfile(struct strbuf *basename,
char *idx_tmp_name = NULL;
stage_tmp_packfiles(basename, pack_tmp_name, written_list, nr_written,
pack_idx_opts, hash, &idx_tmp_name);
NULL, pack_idx_opts, hash, &idx_tmp_name);
rename_tmp_packfile_idx(basename, &idx_tmp_name);
free(idx_tmp_name);

12
chunk-format.c

@ -181,3 +181,15 @@ int read_chunk(struct chunkfile *cf,
return CHUNK_NOT_FOUND;
}
uint8_t oid_version(const struct git_hash_algo *algop)
{
switch (hash_algo_by_ptr(algop)) {
case GIT_HASH_SHA1:
return 1;
case GIT_HASH_SHA256:
return 2;
default:
die(_("invalid hash version"));
}
}

3
chunk-format.h

@ -2,6 +2,7 @@
#define CHUNK_FORMAT_H
#include "git-compat-util.h"
#include "hash.h"
struct hashfile;
struct chunkfile;
@ -65,4 +66,6 @@ int read_chunk(struct chunkfile *cf,
chunk_read_fn fn,
void *data);
uint8_t oid_version(const struct git_hash_algo *algop);
#endif

18
commit-graph.c

@ -193,18 +193,6 @@ char *get_commit_graph_chain_filename(struct object_directory *odb)
return xstrfmt("%s/info/commit-graphs/commit-graph-chain", odb->path);
}
static uint8_t oid_version(void)
{
switch (hash_algo_by_ptr(the_hash_algo)) {
case GIT_HASH_SHA1:
return 1;
case GIT_HASH_SHA256:
return 2;
default:
die(_("invalid hash version"));
}
}
static struct commit_graph *alloc_commit_graph(void)
{
struct commit_graph *g = xcalloc(1, sizeof(*g));
@ -365,9 +353,9 @@ struct commit_graph *parse_commit_graph(struct repository *r,
}
hash_version = *(unsigned char*)(data + 5);
if (hash_version != oid_version()) {
if (hash_version != oid_version(the_hash_algo)) {
error(_("commit-graph hash version %X does not match version %X"),
hash_version, oid_version());
hash_version, oid_version(the_hash_algo));
return NULL;
}
@ -1924,7 +1912,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
hashwrite_be32(f, GRAPH_SIGNATURE);
hashwrite_u8(f, GRAPH_VERSION);
hashwrite_u8(f, oid_version());
hashwrite_u8(f, oid_version(the_hash_algo));
hashwrite_u8(f, get_num_chunks(cf));
hashwrite_u8(f, ctx->num_commit_graphs_after - 1);

18
midx.c

@ -41,18 +41,6 @@
#define PACK_EXPIRED UINT_MAX
static uint8_t oid_version(void)
{
switch (hash_algo_by_ptr(the_hash_algo)) {
case GIT_HASH_SHA1:
return 1;
case GIT_HASH_SHA256:
return 2;
default:
die(_("invalid hash version"));
}
}
const unsigned char *get_midx_checksum(struct multi_pack_index *m)
{
return m->data + m->data_len - the_hash_algo->rawsz;
@ -134,9 +122,9 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local
m->version);
hash_version = m->data[MIDX_BYTE_HASH_VERSION];
if (hash_version != oid_version()) {
if (hash_version != oid_version(the_hash_algo)) {
error(_("multi-pack-index hash version %u does not match version %u"),
hash_version, oid_version());
hash_version, oid_version(the_hash_algo));
goto cleanup_fail;
}
m->hash_len = the_hash_algo->rawsz;
@ -420,7 +408,7 @@ static size_t write_midx_header(struct hashfile *f,
{
hashwrite_be32(f, MIDX_SIGNATURE);
hashwrite_u8(f, MIDX_VERSION);
hashwrite_u8(f, oid_version());
hashwrite_u8(f, oid_version(the_hash_algo));
hashwrite_u8(f, num_chunks);
hashwrite_u8(f, 0); /* unused */
hashwrite_be32(f, num_packs);

4
object-file.c

@ -997,7 +997,7 @@ int has_loose_object_nonlocal(const struct object_id *oid)
return check_and_freshen_nonlocal(oid, 0);
}
static int has_loose_object(const struct object_id *oid)
int has_loose_object(const struct object_id *oid)
{
return check_and_freshen(oid, 0);
}
@ -2040,6 +2040,8 @@ static int freshen_packed_object(const struct object_id *oid)
struct pack_entry e;
if (!find_pack_entry(the_repository, oid, &e))
return 0;
if (e.p->is_cruft)
return 0;
if (e.p->freshened)
return 1;
if (!freshen_file(e.p->pack_name))

12
object-store.h

@ -115,12 +115,20 @@ struct packed_git {
freshened:1,
do_not_close:1,
pack_promisor:1,
multi_pack_index:1;
multi_pack_index:1,
is_cruft:1;
unsigned char hash[GIT_MAX_RAWSZ];
struct revindex_entry *revindex;
const uint32_t *revindex_data;
const uint32_t *revindex_map;
size_t revindex_size;
/*
* mtimes_map points at the beginning of the memory mapped region of
* this pack's corresponding .mtimes file, and mtimes_size is the size
* of that .mtimes file
*/
const uint32_t *mtimes_map;
size_t mtimes_size;
/* something like ".git/objects/pack/xxxxx.pack" */
char pack_name[FLEX_ARRAY]; /* more */