builtin/pack-objects.c: --cruft without expiration
Teach `pack-objects` how to generate a cruft pack when no objects are
dropped (i.e., `--cruft-expiration=never`). Later patches will teach
`pack-objects` how to generate a cruft pack that prunes objects.
When generating a cruft pack which does not prune objects, we want to
collect all unreachable objects into a single pack (noting and updating
their mtimes as we accumulate them). Ordinary use will pass the result
of a `git repack -A` as a kept pack, so when this patch says "kept
pack", readers should think "reachable objects".
Generating a non-expiring cruft packs works as follows:
- Callers provide a list of every pack they know about, and indicate
which packs are about to be removed.
- All packs which are going to be removed (we'll call these the
redundant ones) are marked as kept in-core.
Any packs the caller did not mention (but are known to the
`pack-objects` process) are also marked as kept in-core. Packs not
mentioned by the caller are assumed to be unknown to them, i.e.,
they entered the repository after the caller decided which packs
should be kept and which should be discarded.
Since we do not want to include objects in these "unknown" packs
(because we don't know which of their objects are or aren't
reachable), these are also marked as kept in-core.
- Then, we enumerate all objects in the repository, and add them to
our packing list if they do not appear in an in-core kept pack.
This results in a new cruft pack which contains all known objects that
aren't included in the kept packs. When the kept pack is the result of
`git repack -A`, the resulting pack contains all unreachable objects.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-05-21 01:17:52 +02:00
|
|
|
#!/bin/sh
|
|
|
|
|
|
|
|
test_description='cruft pack related pack-objects tests'
|
|
|
|
. ./test-lib.sh
|
|
|
|
|
|
|
|
objdir=.git/objects
|
|
|
|
packdir=$objdir/pack
|
|
|
|
|
|
|
|
basic_cruft_pack_tests () {
|
|
|
|
expire="$1"
|
|
|
|
|
|
|
|
test_expect_success "unreachable loose objects are packed (expire $expire)" '
|
|
|
|
git init repo &&
|
|
|
|
test_when_finished "rm -fr repo" &&
|
|
|
|
(
|
|
|
|
cd repo &&
|
|
|
|
|
|
|
|
test_commit base &&
|
|
|
|
git repack -Ad &&
|
|
|
|
test_commit loose &&
|
|
|
|
|
|
|
|
test-tool chmtime +2000 "$objdir/$(test_oid_to_path \
|
|
|
|
$(git rev-parse loose:loose.t))" &&
|
|
|
|
test-tool chmtime +1000 "$objdir/$(test_oid_to_path \
|
|
|
|
$(git rev-parse loose^{tree}))" &&
|
|
|
|
|
|
|
|
(
|
|
|
|
git rev-list --objects --no-object-names base..loose |
|
|
|
|
while read oid
|
|
|
|
do
|
|
|
|
path="$objdir/$(test_oid_to_path "$oid")" &&
|
|
|
|
printf "%s %d\n" "$oid" "$(test-tool chmtime --get "$path")"
|
|
|
|
done |
|
|
|
|
sort -k1
|
|
|
|
) >expect &&
|
|
|
|
|
|
|
|
keep="$(basename "$(ls $packdir/pack-*.pack)")" &&
|
|
|
|
cruft="$(echo $keep | git pack-objects --cruft \
|
|
|
|
--cruft-expiration="$expire" $packdir/pack)" &&
|
|
|
|
test-tool pack-mtimes "pack-$cruft.mtimes" >actual &&
|
|
|
|
|
|
|
|
test_cmp expect actual
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success "unreachable packed objects are packed (expire $expire)" '
|
|
|
|
git init repo &&
|
|
|
|
test_when_finished "rm -fr repo" &&
|
|
|
|
(
|
|
|
|
cd repo &&
|
|
|
|
|
|
|
|
test_commit packed &&
|
|
|
|
git repack -Ad &&
|
|
|
|
test_commit other &&
|
|
|
|
|
|
|
|
git rev-list --objects --no-object-names packed.. >objects &&
|
|
|
|
keep="$(basename "$(ls $packdir/pack-*.pack)")" &&
|
|
|
|
other="$(git pack-objects --delta-base-offset \
|
|
|
|
$packdir/pack <objects)" &&
|
|
|
|
git prune-packed &&
|
|
|
|
|
|
|
|
test-tool chmtime --get -100 "$packdir/pack-$other.pack" >expect &&
|
|
|
|
|
|
|
|
cruft="$(git pack-objects --cruft --cruft-expiration="$expire" $packdir/pack <<-EOF
|
|
|
|
$keep
|
|
|
|
-pack-$other.pack
|
|
|
|
EOF
|
|
|
|
)" &&
|
|
|
|
test-tool pack-mtimes "pack-$cruft.mtimes" >actual.raw &&
|
|
|
|
|
|
|
|
cut -d" " -f2 <actual.raw | sort -u >actual &&
|
|
|
|
|
|
|
|
test_cmp expect actual
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success "unreachable cruft objects are repacked (expire $expire)" '
|
|
|
|
git init repo &&
|
|
|
|
test_when_finished "rm -fr repo" &&
|
|
|
|
(
|
|
|
|
cd repo &&
|
|
|
|
|
|
|
|
test_commit packed &&
|
|
|
|
git repack -Ad &&
|
|
|
|
test_commit other &&
|
|
|
|
|
|
|
|
git rev-list --objects --no-object-names packed.. >objects &&
|
|
|
|
keep="$(basename "$(ls $packdir/pack-*.pack)")" &&
|
|
|
|
|
|
|
|
cruft_a="$(echo $keep | git pack-objects --cruft --cruft-expiration="$expire" $packdir/pack)" &&
|
|
|
|
git prune-packed &&
|
|
|
|
cruft_b="$(git pack-objects --cruft --cruft-expiration="$expire" $packdir/pack <<-EOF
|
|
|
|
$keep
|
|
|
|
-pack-$cruft_a.pack
|
|
|
|
EOF
|
|
|
|
)" &&
|
|
|
|
|
|
|
|
test-tool pack-mtimes "pack-$cruft_a.mtimes" >expect.raw &&
|
|
|
|
test-tool pack-mtimes "pack-$cruft_b.mtimes" >actual.raw &&
|
|
|
|
|
|
|
|
sort <expect.raw >expect &&
|
|
|
|
sort <actual.raw >actual &&
|
|
|
|
|
|
|
|
test_cmp expect actual
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success "multiple cruft packs (expire $expire)" '
|
|
|
|
git init repo &&
|
|
|
|
test_when_finished "rm -fr repo" &&
|
|
|
|
(
|
|
|
|
cd repo &&
|
|
|
|
|
|
|
|
test_commit reachable &&
|
|
|
|
git repack -Ad &&
|
|
|
|
keep="$(basename "$(ls $packdir/pack-*.pack)")" &&
|
|
|
|
|
|
|
|
test_commit cruft &&
|
|
|
|
loose="$objdir/$(test_oid_to_path $(git rev-parse cruft))" &&
|
|
|
|
|
|
|
|
# generate three copies of the cruft object in different
|
|
|
|
# cruft packs, each with a unique mtime:
|
|
|
|
# - one expired (1000 seconds ago)
|
|
|
|
# - two non-expired (one 1000 seconds in the future,
|
|
|
|
# one 1500 seconds in the future)
|
|
|
|
test-tool chmtime =-1000 "$loose" &&
|
|
|
|
git pack-objects --cruft $packdir/pack-A <<-EOF &&
|
|
|
|
$keep
|
|
|
|
EOF
|
|
|
|
test-tool chmtime =+1000 "$loose" &&
|
|
|
|
git pack-objects --cruft $packdir/pack-B <<-EOF &&
|
|
|
|
$keep
|
|
|
|
-$(basename $(ls $packdir/pack-A-*.pack))
|
|
|
|
EOF
|
|
|
|
test-tool chmtime =+1500 "$loose" &&
|
|
|
|
git pack-objects --cruft $packdir/pack-C <<-EOF &&
|
|
|
|
$keep
|
|
|
|
-$(basename $(ls $packdir/pack-A-*.pack))
|
|
|
|
-$(basename $(ls $packdir/pack-B-*.pack))
|
|
|
|
EOF
|
|
|
|
|
|
|
|
# ensure the resulting cruft pack takes the most recent
|
|
|
|
# mtime among all copies
|
|
|
|
cruft="$(git pack-objects --cruft \
|
|
|
|
--cruft-expiration="$expire" \
|
|
|
|
$packdir/pack <<-EOF
|
|
|
|
$keep
|
|
|
|
-$(basename $(ls $packdir/pack-A-*.pack))
|
|
|
|
-$(basename $(ls $packdir/pack-B-*.pack))
|
|
|
|
-$(basename $(ls $packdir/pack-C-*.pack))
|
|
|
|
EOF
|
|
|
|
)" &&
|
|
|
|
|
|
|
|
test-tool pack-mtimes "$(basename $(ls $packdir/pack-C-*.mtimes))" >expect.raw &&
|
|
|
|
test-tool pack-mtimes "pack-$cruft.mtimes" >actual.raw &&
|
|
|
|
|
|
|
|
sort expect.raw >expect &&
|
|
|
|
sort actual.raw >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success "cruft packs tolerate missing trees (expire $expire)" '
|
|
|
|
git init repo &&
|
|
|
|
test_when_finished "rm -fr repo" &&
|
|
|
|
(
|
|
|
|
cd repo &&
|
|
|
|
|
|
|
|
test_commit reachable &&
|
|
|
|
test_commit cruft &&
|
|
|
|
|
|
|
|
tree="$(git rev-parse cruft^{tree})" &&
|
|
|
|
|
|
|
|
git reset --hard reachable &&
|
|
|
|
git tag -d cruft &&
|
|
|
|
git reflog expire --all --expire=all &&
|
|
|
|
|
|
|
|
# remove the unreachable tree, but leave the commit
|
|
|
|
# which has it as its root tree intact
|
|
|
|
rm -fr "$objdir/$(test_oid_to_path "$tree")" &&
|
|
|
|
|
|
|
|
git repack -Ad &&
|
|
|
|
basename $(ls $packdir/pack-*.pack) >in &&
|
|
|
|
git pack-objects --cruft --cruft-expiration="$expire" \
|
|
|
|
$packdir/pack <in
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success "cruft packs tolerate missing blobs (expire $expire)" '
|
|
|
|
git init repo &&
|
|
|
|
test_when_finished "rm -fr repo" &&
|
|
|
|
(
|
|
|
|
cd repo &&
|
|
|
|
|
|
|
|
test_commit reachable &&
|
|
|
|
test_commit cruft &&
|
|
|
|
|
|
|
|
blob="$(git rev-parse cruft:cruft.t)" &&
|
|
|
|
|
|
|
|
git reset --hard reachable &&
|
|
|
|
git tag -d cruft &&
|
|
|
|
git reflog expire --all --expire=all &&
|
|
|
|
|
|
|
|
# remove the unreachable blob, but leave the commit (and
|
|
|
|
# the root tree of that commit) intact
|
|
|
|
rm -fr "$objdir/$(test_oid_to_path "$blob")" &&
|
|
|
|
|
|
|
|
git repack -Ad &&
|
|
|
|
basename $(ls $packdir/pack-*.pack) >in &&
|
|
|
|
git pack-objects --cruft --cruft-expiration="$expire" \
|
|
|
|
$packdir/pack <in
|
|
|
|
)
|
|
|
|
'
|
|
|
|
}
|
|
|
|
|
|
|
|
basic_cruft_pack_tests never
|
builtin/pack-objects.c: --cruft with expiration
In a previous patch, pack-objects learned how to generate a cruft pack
so long as no objects are dropped.
This patch teaches pack-objects to handle the case where a non-never
`--cruft-expiration` value is passed. This case is slightly more
complicated than before, because we want pack-objects to save
unreachable objects which would have been pruned when there is another
recent (i.e., non-prunable) unreachable object which reaches the other.
We'll call these objects "unreachable but reachable-from-recent".
Here is how pack-objects handles `--cruft-expiration`:
- Instead of adding all objects outside of the kept pack(s) into the
packing list, only handle the ones whose mtime is within the grace
period.
- Construct a reachability traversal whose tips are the
unreachable-but-recent objects.
- Then, walk along that traversal, stopping if we reach an object in
the kept pack. At each step along the traversal, we add the object
we are visiting to the packing list.
In the majority of these cases, any object we visit in this traversal
will already be in our packing list. But we will sometimes encounter
reachable-from-recent cruft objects, which we want to retain even if
they aged out of the grace period.
The most subtle point of this process is that we actually don't need to
bother to update the rescued object's mtime. Even though we will write
an .mtimes file with a value that is older than the expiration window,
it will continue to survive cruft repacks so long as any objects which
reach it haven't aged out.
That is, a future repack will also exclude that object from the initial
packing list, only to discover it later on when doing the reachability
traversal.
Finally, stopping early once an object is found in a kept pack is safe
to do because the kept packs ordinarily represent which packs will
survive after repacking. Assuming that it _isn't_ safe to halt a
traversal early would mean that there is some ancestor object which is
missing, which implies repository corruption (i.e., the complete set of
reachable objects isn't present).
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-05-21 01:18:00 +02:00
|
|
|
basic_cruft_pack_tests 2.weeks.ago
|
|
|
|
|
|
|
|
test_expect_success 'cruft tags rescue tagged objects' '
|
|
|
|
git init repo &&
|
|
|
|
test_when_finished "rm -fr repo" &&
|
|
|
|
(
|
|
|
|
cd repo &&
|
|
|
|
|
|
|
|
test_commit packed &&
|
|
|
|
git repack -Ad &&
|
|
|
|
|
|
|
|
test_commit tagged &&
|
|
|
|
git tag -a annotated -m tag &&
|
|
|
|
|
|
|
|
git rev-list --objects --no-object-names packed.. >objects &&
|
|
|
|
while read oid
|
|
|
|
do
|
|
|
|
test-tool chmtime -1000 \
|
|
|
|
"$objdir/$(test_oid_to_path $oid)"
|
|
|
|
done <objects &&
|
|
|
|
|
|
|
|
test-tool chmtime -500 \
|
|
|
|
"$objdir/$(test_oid_to_path $(git rev-parse annotated))" &&
|
|
|
|
|
|
|
|
keep="$(basename "$(ls $packdir/pack-*.pack)")" &&
|
|
|
|
cruft="$(echo $keep | git pack-objects --cruft \
|
|
|
|
--cruft-expiration=750.seconds.ago \
|
|
|
|
$packdir/pack)" &&
|
|
|
|
test-tool pack-mtimes "pack-$cruft.mtimes" >actual.raw &&
|
|
|
|
cut -f1 -d" " <actual.raw | sort >actual &&
|
|
|
|
|
|
|
|
(
|
|
|
|
cat objects &&
|
|
|
|
git rev-parse annotated
|
|
|
|
) >expect.raw &&
|
|
|
|
sort <expect.raw >expect &&
|
|
|
|
|
|
|
|
test_cmp expect actual &&
|
|
|
|
cat actual
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'cruft commits rescue parents, trees' '
|
|
|
|
git init repo &&
|
|
|
|
test_when_finished "rm -fr repo" &&
|
|
|
|
(
|
|
|
|
cd repo &&
|
|
|
|
|
|
|
|
test_commit packed &&
|
|
|
|
git repack -Ad &&
|
|
|
|
|
|
|
|
test_commit old &&
|
|
|
|
test_commit new &&
|
|
|
|
|
|
|
|
git rev-list --objects --no-object-names packed..new >objects &&
|
|
|
|
while read object
|
|
|
|
do
|
|
|
|
test-tool chmtime -1000 \
|
|
|
|
"$objdir/$(test_oid_to_path $object)"
|
|
|
|
done <objects &&
|
|
|
|
test-tool chmtime +500 "$objdir/$(test_oid_to_path \
|
|
|
|
$(git rev-parse HEAD))" &&
|
|
|
|
|
|
|
|
keep="$(basename "$(ls $packdir/pack-*.pack)")" &&
|
|
|
|
cruft="$(echo $keep | git pack-objects --cruft \
|
|
|
|
--cruft-expiration=750.seconds.ago \
|
|
|
|
$packdir/pack)" &&
|
|
|
|
test-tool pack-mtimes "pack-$cruft.mtimes" >actual.raw &&
|
|
|
|
|
|
|
|
cut -d" " -f1 <actual.raw | sort >actual &&
|
|
|
|
sort <objects >expect &&
|
|
|
|
|
|
|
|
test_cmp expect actual
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'cruft trees rescue sub-trees, blobs' '
|
|
|
|
git init repo &&
|
|
|
|
test_when_finished "rm -fr repo" &&
|
|
|
|
(
|
|
|
|
cd repo &&
|
|
|
|
|
|
|
|
test_commit packed &&
|
|
|
|
git repack -Ad &&
|
|
|
|
|
|
|
|
mkdir -p dir/sub &&
|
|
|
|
echo foo >foo &&
|
|
|
|
echo bar >dir/bar &&
|
|
|
|
echo baz >dir/sub/baz &&
|
|
|
|
|
|
|
|
test_tick &&
|
|
|
|
git add . &&
|
|
|
|
git commit -m "pruned" &&
|
|
|
|
|
|
|
|
test-tool chmtime -1000 "$objdir/$(test_oid_to_path $(git rev-parse HEAD))" &&
|
|
|
|
test-tool chmtime -1000 "$objdir/$(test_oid_to_path $(git rev-parse HEAD^{tree}))" &&
|
|
|
|
test-tool chmtime -1000 "$objdir/$(test_oid_to_path $(git rev-parse HEAD:foo))" &&
|
|
|
|
test-tool chmtime -500 "$objdir/$(test_oid_to_path $(git rev-parse HEAD:dir))" &&
|
|
|
|
test-tool chmtime -1000 "$objdir/$(test_oid_to_path $(git rev-parse HEAD:dir/bar))" &&
|
|
|
|
test-tool chmtime -1000 "$objdir/$(test_oid_to_path $(git rev-parse HEAD:dir/sub))" &&
|
|
|
|
test-tool chmtime -1000 "$objdir/$(test_oid_to_path $(git rev-parse HEAD:dir/sub/baz))" &&
|
|
|
|
|
|
|
|
keep="$(basename "$(ls $packdir/pack-*.pack)")" &&
|
|
|
|
cruft="$(echo $keep | git pack-objects --cruft \
|
|
|
|
--cruft-expiration=750.seconds.ago \
|
|
|
|
$packdir/pack)" &&
|
|
|
|
test-tool pack-mtimes "pack-$cruft.mtimes" >actual.raw &&
|
|
|
|
cut -f1 -d" " <actual.raw | sort >actual &&
|
|
|
|
|
|
|
|
git rev-parse HEAD:dir HEAD:dir/bar HEAD:dir/sub HEAD:dir/sub/baz >expect.raw &&
|
|
|
|
sort <expect.raw >expect &&
|
|
|
|
|
|
|
|
test_cmp expect actual
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'expired objects are pruned' '
|
|
|
|
git init repo &&
|
|
|
|
test_when_finished "rm -fr repo" &&
|
|
|
|
(
|
|
|
|
cd repo &&
|
|
|
|
|
|
|
|
test_commit packed &&
|
|
|
|
git repack -Ad &&
|
|
|
|
|
|
|
|
test_commit pruned &&
|
|
|
|
|
|
|
|
git rev-list --objects --no-object-names packed..pruned >objects &&
|
|
|
|
while read object
|
|
|
|
do
|
|
|
|
test-tool chmtime -1000 \
|
|
|
|
"$objdir/$(test_oid_to_path $object)"
|
|
|
|
done <objects &&
|
|
|
|
|
|
|
|
keep="$(basename "$(ls $packdir/pack-*.pack)")" &&
|
|
|
|
cruft="$(echo $keep | git pack-objects --cruft \
|
|
|
|
--cruft-expiration=750.seconds.ago \
|
|
|
|
$packdir/pack)" &&
|
|
|
|
|
|
|
|
test-tool pack-mtimes "pack-$cruft.mtimes" >actual &&
|
|
|
|
test_must_be_empty actual
|
|
|
|
)
|
|
|
|
'
|
builtin/pack-objects.c: --cruft without expiration
Teach `pack-objects` how to generate a cruft pack when no objects are
dropped (i.e., `--cruft-expiration=never`). Later patches will teach
`pack-objects` how to generate a cruft pack that prunes objects.
When generating a cruft pack which does not prune objects, we want to
collect all unreachable objects into a single pack (noting and updating
their mtimes as we accumulate them). Ordinary use will pass the result
of a `git repack -A` as a kept pack, so when this patch says "kept
pack", readers should think "reachable objects".
Generating a non-expiring cruft packs works as follows:
- Callers provide a list of every pack they know about, and indicate
which packs are about to be removed.
- All packs which are going to be removed (we'll call these the
redundant ones) are marked as kept in-core.
Any packs the caller did not mention (but are known to the
`pack-objects` process) are also marked as kept in-core. Packs not
mentioned by the caller are assumed to be unknown to them, i.e.,
they entered the repository after the caller decided which packs
should be kept and which should be discarded.
Since we do not want to include objects in these "unknown" packs
(because we don't know which of their objects are or aren't
reachable), these are also marked as kept in-core.
- Then, we enumerate all objects in the repository, and add them to
our packing list if they do not appear in an in-core kept pack.
This results in a new cruft pack which contains all known objects that
aren't included in the kept packs. When the kept pack is the result of
`git repack -A`, the resulting pack contains all unreachable objects.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-05-21 01:17:52 +02:00
|
|
|
|
|
|
|
test_done
|