2007-09-11 05:03:25 +02:00
|
|
|
/*
|
|
|
|
* "git fetch"
|
|
|
|
*/
|
|
|
|
#include "cache.h"
|
2017-06-14 20:07:36 +02:00
|
|
|
#include "config.h"
|
2017-12-12 20:53:52 +01:00
|
|
|
#include "repository.h"
|
2007-09-11 05:03:25 +02:00
|
|
|
#include "refs.h"
|
2018-05-17 00:57:48 +02:00
|
|
|
#include "refspec.h"
|
2018-05-16 01:42:15 +02:00
|
|
|
#include "object-store.h"
|
2019-09-15 23:18:02 +02:00
|
|
|
#include "oidset.h"
|
2007-09-11 05:03:25 +02:00
|
|
|
#include "commit.h"
|
|
|
|
#include "builtin.h"
|
2008-07-21 20:03:49 +02:00
|
|
|
#include "string-list.h"
|
2007-09-11 05:03:25 +02:00
|
|
|
#include "remote.h"
|
|
|
|
#include "transport.h"
|
2007-11-11 08:29:47 +01:00
|
|
|
#include "run-command.h"
|
2007-12-04 08:25:47 +01:00
|
|
|
#include "parse-options.h"
|
chain kill signals for cleanup functions
If a piece of code wanted to do some cleanup before exiting
(e.g., cleaning up a lockfile or a tempfile), our usual
strategy was to install a signal handler that did something
like this:
do_cleanup(); /* actual work */
signal(signo, SIG_DFL); /* restore previous behavior */
raise(signo); /* deliver signal, killing ourselves */
For a single handler, this works fine. However, if we want
to clean up two _different_ things, we run into a problem.
The most recently installed handler will run, but when it
removes itself as a handler, it doesn't put back the first
handler.
This patch introduces sigchain, a tiny library for handling
a stack of signal handlers. You sigchain_push each handler,
and use sigchain_pop to restore whoever was before you in
the stack.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-01-22 07:02:35 +01:00
|
|
|
#include "sigchain.h"
|
2015-08-18 02:22:00 +02:00
|
|
|
#include "submodule-config.h"
|
2010-11-12 13:54:52 +01:00
|
|
|
#include "submodule.h"
|
2011-09-03 01:33:22 +02:00
|
|
|
#include "connected.h"
|
2020-07-28 22:23:39 +02:00
|
|
|
#include "strvec.h"
|
2016-07-01 18:03:30 +02:00
|
|
|
#include "utf8.h"
|
2017-08-19 00:20:21 +02:00
|
|
|
#include "packfile.h"
|
2017-12-08 16:58:44 +01:00
|
|
|
#include "list-objects-filter-options.h"
|
2018-07-20 18:33:04 +02:00
|
|
|
#include "commit-reach.h"
|
2019-08-19 11:11:20 +02:00
|
|
|
#include "branch.h"
|
2019-06-25 15:40:31 +02:00
|
|
|
#include "promisor-remote.h"
|
2019-09-03 04:22:02 +02:00
|
|
|
#include "commit-graph.h"
|
2020-04-30 21:48:50 +02:00
|
|
|
#include "shallow.h"
|
2021-12-01 23:15:44 +01:00
|
|
|
#include "worktree.h"
|
2007-09-11 05:03:25 +02:00
|
|
|
|
2019-06-18 22:25:27 +02:00
|
|
|
#define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
|
|
|
|
|
2007-12-04 08:25:47 +01:00
|
|
|
static const char * const builtin_fetch_usage[] = {
|
2012-08-20 14:32:09 +02:00
|
|
|
N_("git fetch [<options>] [<repository> [<refspec>...]]"),
|
|
|
|
N_("git fetch [<options>] <group>"),
|
|
|
|
N_("git fetch --multiple [<options>] [(<repository> | <group>)...]"),
|
|
|
|
N_("git fetch --all [<options>]"),
|
2007-12-04 08:25:47 +01:00
|
|
|
NULL
|
|
|
|
};
|
2007-09-11 05:03:25 +02:00
|
|
|
|
2007-12-04 08:25:47 +01:00
|
|
|
enum {
|
|
|
|
TAGS_UNSET = 0,
|
|
|
|
TAGS_DEFAULT = 1,
|
|
|
|
TAGS_SET = 2
|
|
|
|
};
|
|
|
|
|
2013-07-13 11:36:24 +02:00
|
|
|
static int fetch_prune_config = -1; /* unspecified */
|
2019-06-18 22:25:26 +02:00
|
|
|
static int fetch_show_forced_updates = 1;
|
2019-06-18 22:25:27 +02:00
|
|
|
static uint64_t forced_updates_ms = 0;
|
2021-04-16 14:49:57 +02:00
|
|
|
static int prefetch = 0;
|
2013-07-13 11:36:24 +02:00
|
|
|
static int prune = -1; /* unspecified */
|
|
|
|
#define PRUNE_BY_DEFAULT 0 /* do we prune by default? */
|
|
|
|
|
fetch: add a --prune-tags option and fetch.pruneTags config
Add a --prune-tags option to git-fetch, along with fetch.pruneTags
config option and a -P shorthand (-p is --prune). This allows for
doing any of:
git fetch -p -P
git fetch --prune --prune-tags
git fetch -p -P origin
git fetch --prune --prune-tags origin
Or simply:
git config fetch.prune true &&
git config fetch.pruneTags true &&
git fetch
Instead of the much more verbose:
git fetch --prune origin 'refs/tags/*:refs/tags/*' '+refs/heads/*:refs/remotes/origin/*'
Before this feature it was painful to support the use-case of pulling
from a repo which is having both its branches *and* tags deleted
regularly, and have our local references to reflect upstream.
At work we create deployment tags in the repo for each rollout, and
there's *lots* of those, so they're archived within weeks for
performance reasons.
Without this change it's hard to centrally configure such repos in
/etc/gitconfig (on servers that are only used for working with
them). You need to set fetch.prune=true globally, and then for each
repo:
git -C {} config --replace-all remote.origin.fetch "refs/tags/*:refs/tags/*" "^\+*refs/tags/\*:refs/tags/\*$"
Now I can simply set fetch.pruneTags=true in /etc/gitconfig as well,
and users running "git pull" will automatically get the pruning
semantics I want.
Even though "git remote" has corresponding "prune" and "update
--prune" subcommands I'm intentionally not adding a corresponding
prune-tags or "update --prune --prune-tags" mode to that command.
It's advertised (as noted in my recent "git remote doc: correct
dangerous lies about what prune does") as only modifying remote
tracking references, whereas any --prune-tags option is always going
to modify what from the user's perspective is a local copy of the tag,
since there's no such thing as a remote tracking tag.
Ideally add_prune_tags_to_fetch_refspec() would be something that
would use ALLOC_GROW() to grow the 'fetch` member of the 'remote'
struct. Instead I'm realloc-ing remote->fetch and adding the
tag_refspec to the end.
The reason is that parse_{fetch,push}_refspec which allocate the
refspec (ultimately remote->fetch) struct are called many places that
don't have access to a 'remote' struct. It would be hard to change all
their callsites to be amenable to carry around the bookkeeping
variables required for dynamic allocation.
All the other callers of the API first incrementally construct the
string version of the refspec in remote->fetch_refspec via
add_fetch_refspec(), before finally calling parse_fetch_refspec() via
some variation of remote_get().
It's less of a pain to deal with the one special case that needs to
modify already constructed refspecs than to chase down and change all
the other callsites. The API I'm adding is intentionally not
generalized because if we add more of these we'd probably want to
re-visit how this is done.
See my "Re: [BUG] git remote prune removes local tags, depending on
fetch config" (87po6ahx87.fsf@evledraar.gmail.com;
https://public-inbox.org/git/87po6ahx87.fsf@evledraar.gmail.com/) for
more background info.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-02-09 21:32:15 +01:00
|
|
|
static int fetch_prune_tags_config = -1; /* unspecified */
|
|
|
|
static int prune_tags = -1; /* unspecified */
|
|
|
|
#define PRUNE_TAGS_BY_DEFAULT 0 /* do we prune tags by default? */
|
|
|
|
|
2019-08-19 11:11:20 +02:00
|
|
|
static int all, append, dry_run, force, keep, multiple, update_head_ok;
|
2020-08-18 16:25:22 +02:00
|
|
|
static int write_fetch_head = 1;
|
2019-08-19 11:11:20 +02:00
|
|
|
static int verbosity, deepen_relative, set_upstream;
|
2017-06-23 21:13:01 +02:00
|
|
|
static int progress = -1;
|
2019-06-19 11:46:30 +02:00
|
|
|
static int enable_auto_gc = 1;
|
2016-06-12 12:53:59 +02:00
|
|
|
static int tags = TAGS_DEFAULT, unshallow, update_shallow, deepen;
|
2019-10-05 20:46:40 +02:00
|
|
|
static int max_jobs = -1, submodule_fetch_jobs_config = -1;
|
|
|
|
static int fetch_parallel_config = 1;
|
fetch: implement support for atomic reference updates
When executing a fetch, then git will currently allocate one reference
transaction per reference update and directly commit it. This means that
fetches are non-atomic: even if some of the reference updates fail,
others may still succeed and modify local references.
This is fine in many scenarios, but this strategy has its downsides.
- The view of remote references may be inconsistent and may show a
bastardized state of the remote repository.
- Batching together updates may improve performance in certain
scenarios. While the impact probably isn't as pronounced with loose
references, the upcoming reftable backend may benefit as it needs to
write less files in case the update is batched.
- The reference-update hook is currently being executed twice per
updated reference. While this doesn't matter when there is no such
hook, we have seen severe performance regressions when doing a
git-fetch(1) with reference-transaction hook when the remote
repository has hundreds of thousands of references.
Similar to `git push --atomic`, this commit thus introduces atomic
fetches. Instead of allocating one reference transaction per updated
reference, it causes us to only allocate a single transaction and commit
it as soon as all updates were received. If locking of any reference
fails, then we abort the complete transaction and don't update any
reference, which gives us an all-or-nothing fetch.
Note that this may not completely fix the first of above downsides, as
the consistent view also depends on the server-side. If the server
doesn't have a consistent view of its own references during the
reference negotiation phase, then the client would get the same
inconsistent view the server has. This is a separate problem though and,
if it actually exists, can be fixed at a later point.
This commit also changes the way we write FETCH_HEAD in case `--atomic`
is passed. Instead of writing changes as we go, we need to accumulate
all changes first and only commit them at the end when we know that all
reference updates succeeded. Ideally, we'd just do so via a temporary
file so that we don't need to carry all updates in-memory. This isn't
trivially doable though considering the `--append` mode, where we do not
truncate the file but simply append to it. And given that we support
concurrent processes appending to FETCH_HEAD at the same time without
any loss of data, seeding the temporary file with current contents of
FETCH_HEAD initially and then doing a rename wouldn't work either. So
this commit implements the simple strategy of buffering all changes and
appending them to the file on commit.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-12 13:27:52 +01:00
|
|
|
static int atomic_fetch;
|
2016-02-03 05:09:14 +01:00
|
|
|
static enum transport_family family;
|
2007-11-11 08:29:47 +01:00
|
|
|
static const char *depth;
|
2016-06-12 12:53:59 +02:00
|
|
|
static const char *deepen_since;
|
2007-12-04 08:25:47 +01:00
|
|
|
static const char *upload_pack;
|
2016-06-12 12:54:04 +02:00
|
|
|
static struct string_list deepen_not = STRING_LIST_INIT_NODUP;
|
2007-12-04 08:25:46 +01:00
|
|
|
static struct strbuf default_rla = STRBUF_INIT;
|
2013-08-08 00:38:45 +02:00
|
|
|
static struct transport *gtransport;
|
fetch: work around "transport-take-over" hack
A Git-aware "connect" transport allows the "transport_take_over" to
redirect generic transport requests like fetch(), push_refs() and
get_refs_list() to the native Git transport handling methods. The
take-over process replaces transport->data with a fake data that
these method implementations understand.
While this hack works OK for a single request, it breaks when the
transport needs to make more than one requests. transport->data
that used to hold necessary information for the specific helper to
work correctly is destroyed during the take-over process.
One codepath that this matters is "git fetch" in auto-follow mode;
when it does not get all the tags that ought to point at the history
it got (which can be determined by looking at the peeled tags in the
initial advertisement) from the primary transfer, it internally
makes a second request to complete the fetch. Because "take-over"
hack has already destroyed the data necessary to talk to the
transport helper by the time this happens, the second request cannot
make a request to the helper to make another connection to fetch
these additional tags.
Mark such a transport as "cannot_reuse", and use a separate
transport to perform the backfill fetch in order to work around
this breakage.
Note that this problem does not manifest itself when running t5802,
because our upload-pack gives you all the necessary auto-followed
tags during the primary transfer. You would need to step through
"git fetch" in a debugger, stop immediately after the primary
transfer finishes and writes these auto-followed tags, remove the
tag references and repack/prune the repository to convince the
"find-non-local-tags" procedure that the primary transfer failed to
give us all the necessary tags, and then let it continue, in order
to trigger the bug in the secondary transfer this patch fixes.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-08 00:47:18 +02:00
|
|
|
static struct transport *gsecondary;
|
2010-11-12 13:54:52 +01:00
|
|
|
static const char *submodule_prefix = "";
|
2017-06-23 21:13:01 +02:00
|
|
|
static int recurse_submodules = RECURSE_SUBMODULES_DEFAULT;
|
2017-06-27 23:31:59 +02:00
|
|
|
static int recurse_submodules_default = RECURSE_SUBMODULES_ON_DEMAND;
|
2014-01-03 03:28:51 +01:00
|
|
|
static int shown_url = 0;
|
2018-05-17 00:58:05 +02:00
|
|
|
static struct refspec refmap = REFSPEC_INIT_FETCH;
|
2017-12-08 16:58:44 +01:00
|
|
|
static struct list_objects_filter_options filter_options;
|
2018-04-24 00:46:24 +02:00
|
|
|
static struct string_list server_options = STRING_LIST_INIT_DUP;
|
2018-07-03 00:39:44 +02:00
|
|
|
static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
|
2019-11-03 01:21:56 +01:00
|
|
|
static int fetch_write_commit_graph = -1;
|
2020-08-18 06:01:32 +02:00
|
|
|
static int stdin_refspecs = 0;
|
fetch: teach independent negotiation (no packfile)
Currently, the packfile negotiation step within a Git fetch cannot be
done independent of sending the packfile, even though there is at least
one application wherein this is useful. Therefore, make it possible for
this negotiation step to be done independently. A subsequent commit will
use this for one such application - push negotiation.
This feature is for protocol v2 only. (An implementation for protocol v0
would require a separate implementation in the fetch, transport, and
transport helper code.)
In the protocol, the main hindrance towards independent negotiation is
that the server can unilaterally decide to send the packfile. This is
solved by a "wait-for-done" argument: the server will then wait for the
client to say "done". In practice, the client will never say it; instead
it will cease requests once it is satisfied.
In the client, the main change lies in the transport and transport
helper code. fetch_refs_via_pack() performs everything needed - protocol
version and capability checks, and the negotiation itself.
There are 2 code paths that do not go through fetch_refs_via_pack() that
needed to be individually excluded: the bundle transport (excluded
through requiring smart_options, which the bundle transport doesn't
support) and transport helpers that do not support takeover. If or when
we support independent negotiation for protocol v0, we will need to
modify these 2 code paths to support it. But for now, report failure if
independent negotiation is requested in these cases.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-04 23:16:01 +02:00
|
|
|
static int negotiate_only;
|
2007-09-14 09:31:25 +02:00
|
|
|
|
2013-07-13 11:36:24 +02:00
|
|
|
static int git_fetch_config(const char *k, const char *v, void *cb)
|
|
|
|
{
|
|
|
|
if (!strcmp(k, "fetch.prune")) {
|
|
|
|
fetch_prune_config = git_config_bool(k, v);
|
|
|
|
return 0;
|
|
|
|
}
|
2017-06-01 02:30:50 +02:00
|
|
|
|
fetch: add a --prune-tags option and fetch.pruneTags config
Add a --prune-tags option to git-fetch, along with fetch.pruneTags
config option and a -P shorthand (-p is --prune). This allows for
doing any of:
git fetch -p -P
git fetch --prune --prune-tags
git fetch -p -P origin
git fetch --prune --prune-tags origin
Or simply:
git config fetch.prune true &&
git config fetch.pruneTags true &&
git fetch
Instead of the much more verbose:
git fetch --prune origin 'refs/tags/*:refs/tags/*' '+refs/heads/*:refs/remotes/origin/*'
Before this feature it was painful to support the use-case of pulling
from a repo which is having both its branches *and* tags deleted
regularly, and have our local references to reflect upstream.
At work we create deployment tags in the repo for each rollout, and
there's *lots* of those, so they're archived within weeks for
performance reasons.
Without this change it's hard to centrally configure such repos in
/etc/gitconfig (on servers that are only used for working with
them). You need to set fetch.prune=true globally, and then for each
repo:
git -C {} config --replace-all remote.origin.fetch "refs/tags/*:refs/tags/*" "^\+*refs/tags/\*:refs/tags/\*$"
Now I can simply set fetch.pruneTags=true in /etc/gitconfig as well,
and users running "git pull" will automatically get the pruning
semantics I want.
Even though "git remote" has corresponding "prune" and "update
--prune" subcommands I'm intentionally not adding a corresponding
prune-tags or "update --prune --prune-tags" mode to that command.
It's advertised (as noted in my recent "git remote doc: correct
dangerous lies about what prune does") as only modifying remote
tracking references, whereas any --prune-tags option is always going
to modify what from the user's perspective is a local copy of the tag,
since there's no such thing as a remote tracking tag.
Ideally add_prune_tags_to_fetch_refspec() would be something that
would use ALLOC_GROW() to grow the 'fetch` member of the 'remote'
struct. Instead I'm realloc-ing remote->fetch and adding the
tag_refspec to the end.
The reason is that parse_{fetch,push}_refspec which allocate the
refspec (ultimately remote->fetch) struct are called many places that
don't have access to a 'remote' struct. It would be hard to change all
their callsites to be amenable to carry around the bookkeeping
variables required for dynamic allocation.
All the other callers of the API first incrementally construct the
string version of the refspec in remote->fetch_refspec via
add_fetch_refspec(), before finally calling parse_fetch_refspec() via
some variation of remote_get().
It's less of a pain to deal with the one special case that needs to
modify already constructed refspecs than to chase down and change all
the other callsites. The API I'm adding is intentionally not
generalized because if we add more of these we'd probably want to
re-visit how this is done.
See my "Re: [BUG] git remote prune removes local tags, depending on
fetch config" (87po6ahx87.fsf@evledraar.gmail.com;
https://public-inbox.org/git/87po6ahx87.fsf@evledraar.gmail.com/) for
more background info.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-02-09 21:32:15 +01:00
|
|
|
if (!strcmp(k, "fetch.prunetags")) {
|
|
|
|
fetch_prune_tags_config = git_config_bool(k, v);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-06-18 22:25:26 +02:00
|
|
|
if (!strcmp(k, "fetch.showforcedupdates")) {
|
|
|
|
fetch_show_forced_updates = git_config_bool(k, v);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-06-01 02:30:50 +02:00
|
|
|
if (!strcmp(k, "submodule.recurse")) {
|
|
|
|
int r = git_config_bool(k, v) ?
|
|
|
|
RECURSE_SUBMODULES_ON : RECURSE_SUBMODULES_OFF;
|
|
|
|
recurse_submodules = r;
|
|
|
|
}
|
|
|
|
|
2017-08-02 21:49:18 +02:00
|
|
|
if (!strcmp(k, "submodule.fetchjobs")) {
|
2019-10-05 20:46:40 +02:00
|
|
|
submodule_fetch_jobs_config = parse_submodule_fetchjobs(k, v);
|
2017-08-02 21:49:18 +02:00
|
|
|
return 0;
|
2017-08-02 21:49:19 +02:00
|
|
|
} else if (!strcmp(k, "fetch.recursesubmodules")) {
|
|
|
|
recurse_submodules = parse_fetch_recurse_submodules_arg(k, v);
|
|
|
|
return 0;
|
2017-08-02 21:49:18 +02:00
|
|
|
}
|
|
|
|
|
2019-10-05 20:46:40 +02:00
|
|
|
if (!strcmp(k, "fetch.parallel")) {
|
|
|
|
fetch_parallel_config = git_config_int(k, v);
|
|
|
|
if (fetch_parallel_config < 0)
|
|
|
|
die(_("fetch.parallel cannot be negative"));
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
fetch: load all default config at startup
When we start the git-fetch program, we call git_config to
load all config, but our callback only processes the
fetch.prune option; we do not chain to git_default_config at
all.
This means that we may not load some core configuration
which will have an effect. For instance, we do not load
core.logAllRefUpdates, which impacts whether or not we
create reflogs in a bare repository.
Note that I said "may" above. It gets even more exciting. If
we have to transfer actual objects as part of the fetch,
then we call fetch_pack as part of the same process. That
function loads its own config, which does chain to
git_default_config, impacting global variables which are
used by the rest of fetch. But if the fetch is a pure ref
update (e.g., a new ref which is a copy of an old one), we
skip fetch_pack entirely. So we get inconsistent results
depending on whether or not we have actual objects to
transfer or not!
Let's just load the core config at the start of fetch, so we
know we have it (we may also load it again as part of
fetch_pack, but that's OK; it's designed to be idempotent).
Our tests check both cases (with and without a pack). We
also check similar behavior for push for good measure, but
it already works as expected.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-11-04 14:11:19 +01:00
|
|
|
return git_default_config(k, v, cb);
|
2013-07-13 11:36:24 +02:00
|
|
|
}
|
|
|
|
|
2014-05-30 00:21:31 +02:00
|
|
|
static int parse_refmap_arg(const struct option *opt, const char *arg, int unset)
|
|
|
|
{
|
assert NOARG/NONEG behavior of parse-options callbacks
When we define a parse-options callback, the flags we put in the option
struct must match what the callback expects. For example, a callback
which does not handle the "unset" parameter should only be used with
PARSE_OPT_NONEG. But since the callback and the option struct are not
defined next to each other, it's easy to get this wrong (as earlier
patches in this series show).
Fortunately, the compiler can help us here: compiling with
-Wunused-parameters can show us which callbacks ignore their "unset"
parameters (and likewise, ones that ignore "arg" expect to be triggered
with PARSE_OPT_NOARG).
But after we've inspected a callback and determined that all of its
callers use the right flags, what do we do next? We'd like to silence
the compiler warning, but do so in a way that will catch any wrong calls
in the future.
We can do that by actually checking those variables and asserting that
they match our expectations. Because this is such a common pattern,
we'll introduce some helper macros. The resulting messages aren't
as descriptive as we could make them, but the file/line information from
BUG() is enough to identify the problem (and anyway, the point is that
these should never be seen).
Each of the annotated callbacks in this patch triggers
-Wunused-parameters, and was manually inspected to make sure all callers
use the correct options (so none of these BUGs should be triggerable).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-11-05 07:45:42 +01:00
|
|
|
BUG_ON_OPT_NEG(unset);
|
|
|
|
|
2014-05-30 00:21:31 +02:00
|
|
|
/*
|
|
|
|
* "git fetch --refmap='' origin foo"
|
|
|
|
* can be used to tell the command not to store anywhere
|
|
|
|
*/
|
2018-05-17 00:58:05 +02:00
|
|
|
refspec_append(&refmap, arg);
|
|
|
|
|
2014-05-30 00:21:31 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-12-04 08:25:47 +01:00
|
|
|
static struct option builtin_fetch_options[] = {
|
2008-11-15 01:14:24 +01:00
|
|
|
OPT__VERBOSITY(&verbosity),
|
2013-08-03 13:51:19 +02:00
|
|
|
OPT_BOOL(0, "all", &all,
|
|
|
|
N_("fetch from all remotes")),
|
2019-08-19 11:11:20 +02:00
|
|
|
OPT_BOOL(0, "set-upstream", &set_upstream,
|
|
|
|
N_("set upstream for git pull/fetch")),
|
2013-08-03 13:51:19 +02:00
|
|
|
OPT_BOOL('a', "append", &append,
|
|
|
|
N_("append to .git/FETCH_HEAD instead of overwriting")),
|
fetch: implement support for atomic reference updates
When executing a fetch, then git will currently allocate one reference
transaction per reference update and directly commit it. This means that
fetches are non-atomic: even if some of the reference updates fail,
others may still succeed and modify local references.
This is fine in many scenarios, but this strategy has its downsides.
- The view of remote references may be inconsistent and may show a
bastardized state of the remote repository.
- Batching together updates may improve performance in certain
scenarios. While the impact probably isn't as pronounced with loose
references, the upcoming reftable backend may benefit as it needs to
write less files in case the update is batched.
- The reference-update hook is currently being executed twice per
updated reference. While this doesn't matter when there is no such
hook, we have seen severe performance regressions when doing a
git-fetch(1) with reference-transaction hook when the remote
repository has hundreds of thousands of references.
Similar to `git push --atomic`, this commit thus introduces atomic
fetches. Instead of allocating one reference transaction per updated
reference, it causes us to only allocate a single transaction and commit
it as soon as all updates were received. If locking of any reference
fails, then we abort the complete transaction and don't update any
reference, which gives us an all-or-nothing fetch.
Note that this may not completely fix the first of above downsides, as
the consistent view also depends on the server-side. If the server
doesn't have a consistent view of its own references during the
reference negotiation phase, then the client would get the same
inconsistent view the server has. This is a separate problem though and,
if it actually exists, can be fixed at a later point.
This commit also changes the way we write FETCH_HEAD in case `--atomic`
is passed. Instead of writing changes as we go, we need to accumulate
all changes first and only commit them at the end when we know that all
reference updates succeeded. Ideally, we'd just do so via a temporary
file so that we don't need to carry all updates in-memory. This isn't
trivially doable though considering the `--append` mode, where we do not
truncate the file but simply append to it. And given that we support
concurrent processes appending to FETCH_HEAD at the same time without
any loss of data, seeding the temporary file with current contents of
FETCH_HEAD initially and then doing a rename wouldn't work either. So
this commit implements the simple strategy of buffering all changes and
appending them to the file on commit.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-12 13:27:52 +01:00
|
|
|
OPT_BOOL(0, "atomic", &atomic_fetch,
|
|
|
|
N_("use atomic transaction to update references")),
|
2012-08-20 14:32:09 +02:00
|
|
|
OPT_STRING(0, "upload-pack", &upload_pack, N_("path"),
|
|
|
|
N_("path to upload pack on remote end")),
|
2018-08-31 22:09:56 +02:00
|
|
|
OPT__FORCE(&force, N_("force overwrite of local reference"), 0),
|
2013-08-03 13:51:19 +02:00
|
|
|
OPT_BOOL('m', "multiple", &multiple,
|
|
|
|
N_("fetch from multiple remotes")),
|
2007-12-04 08:25:47 +01:00
|
|
|
OPT_SET_INT('t', "tags", &tags,
|
2012-08-20 14:32:09 +02:00
|
|
|
N_("fetch all tags and associated objects"), TAGS_SET),
|
2008-03-13 08:13:15 +01:00
|
|
|
OPT_SET_INT('n', NULL, &tags,
|
2012-08-20 14:32:09 +02:00
|
|
|
N_("do not fetch all tags (--no-tags)"), TAGS_UNSET),
|
2019-10-05 20:46:40 +02:00
|
|
|
OPT_INTEGER('j', "jobs", &max_jobs,
|
2015-12-16 01:04:12 +01:00
|
|
|
N_("number of submodules fetched in parallel")),
|
2021-04-16 14:49:57 +02:00
|
|
|
OPT_BOOL(0, "prefetch", &prefetch,
|
|
|
|
N_("modify the refspec to place all refs within refs/prefetch/")),
|
2013-08-03 13:51:19 +02:00
|
|
|
OPT_BOOL('p', "prune", &prune,
|
|
|
|
N_("prune remote-tracking branches no longer on remote")),
|
fetch: add a --prune-tags option and fetch.pruneTags config
Add a --prune-tags option to git-fetch, along with fetch.pruneTags
config option and a -P shorthand (-p is --prune). This allows for
doing any of:
git fetch -p -P
git fetch --prune --prune-tags
git fetch -p -P origin
git fetch --prune --prune-tags origin
Or simply:
git config fetch.prune true &&
git config fetch.pruneTags true &&
git fetch
Instead of the much more verbose:
git fetch --prune origin 'refs/tags/*:refs/tags/*' '+refs/heads/*:refs/remotes/origin/*'
Before this feature it was painful to support the use-case of pulling
from a repo which is having both its branches *and* tags deleted
regularly, and have our local references to reflect upstream.
At work we create deployment tags in the repo for each rollout, and
there's *lots* of those, so they're archived within weeks for
performance reasons.
Without this change it's hard to centrally configure such repos in
/etc/gitconfig (on servers that are only used for working with
them). You need to set fetch.prune=true globally, and then for each
repo:
git -C {} config --replace-all remote.origin.fetch "refs/tags/*:refs/tags/*" "^\+*refs/tags/\*:refs/tags/\*$"
Now I can simply set fetch.pruneTags=true in /etc/gitconfig as well,
and users running "git pull" will automatically get the pruning
semantics I want.
Even though "git remote" has corresponding "prune" and "update
--prune" subcommands I'm intentionally not adding a corresponding
prune-tags or "update --prune --prune-tags" mode to that command.
It's advertised (as noted in my recent "git remote doc: correct
dangerous lies about what prune does") as only modifying remote
tracking references, whereas any --prune-tags option is always going
to modify what from the user's perspective is a local copy of the tag,
since there's no such thing as a remote tracking tag.
Ideally add_prune_tags_to_fetch_refspec() would be something that
would use ALLOC_GROW() to grow the 'fetch` member of the 'remote'
struct. Instead I'm realloc-ing remote->fetch and adding the
tag_refspec to the end.
The reason is that parse_{fetch,push}_refspec which allocate the
refspec (ultimately remote->fetch) struct are called many places that
don't have access to a 'remote' struct. It would be hard to change all
their callsites to be amenable to carry around the bookkeeping
variables required for dynamic allocation.
All the other callers of the API first incrementally construct the
string version of the refspec in remote->fetch_refspec via
add_fetch_refspec(), before finally calling parse_fetch_refspec() via
some variation of remote_get().
It's less of a pain to deal with the one special case that needs to
modify already constructed refspecs than to chase down and change all
the other callsites. The API I'm adding is intentionally not
generalized because if we add more of these we'd probably want to
re-visit how this is done.
See my "Re: [BUG] git remote prune removes local tags, depending on
fetch config" (87po6ahx87.fsf@evledraar.gmail.com;
https://public-inbox.org/git/87po6ahx87.fsf@evledraar.gmail.com/) for
more background info.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-02-09 21:32:15 +01:00
|
|
|
OPT_BOOL('P', "prune-tags", &prune_tags,
|
|
|
|
N_("prune local tags no longer on remote and clobber changed tags")),
|
Use OPT_CALLBACK and OPT_CALLBACK_F
In the codebase, there are many options which use OPTION_CALLBACK in a
plain ol' struct definition. However, we have the OPT_CALLBACK and
OPT_CALLBACK_F macros which are meant to abstract these plain struct
definitions away. These macros are useful as they semantically signal to
developers that these are just normal callback option with nothing fancy
happening.
Replace plain struct definitions of OPTION_CALLBACK with OPT_CALLBACK or
OPT_CALLBACK_F where applicable. The heavy lifting was done using the
following (disgusting) shell script:
#!/bin/sh
do_replacement () {
tr '\n' '\r' |
sed -e 's/{\s*OPTION_CALLBACK,\s*\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\s*0,\(\s*[^[:space:]}]*\)\s*}/OPT_CALLBACK(\1,\2,\3,\4,\5,\6)/g' |
sed -e 's/{\s*OPTION_CALLBACK,\s*\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\(\s*[^[:space:]}]*\)\s*}/OPT_CALLBACK_F(\1,\2,\3,\4,\5,\6,\7)/g' |
tr '\r' '\n'
}
for f in $(git ls-files \*.c)
do
do_replacement <"$f" >"$f.tmp"
mv "$f.tmp" "$f"
done
The result was manually inspected and then reformatted to match the
style of the surrounding code. Finally, using
`git grep OPTION_CALLBACK \*.c`, leftover results which were not handled
by the script were manually transformed.
Signed-off-by: Denton Liu <liu.denton@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-28 10:36:28 +02:00
|
|
|
OPT_CALLBACK_F(0, "recurse-submodules", &recurse_submodules, N_("on-demand"),
|
2012-08-20 14:32:09 +02:00
|
|
|
N_("control recursive fetching of submodules"),
|
Use OPT_CALLBACK and OPT_CALLBACK_F
In the codebase, there are many options which use OPTION_CALLBACK in a
plain ol' struct definition. However, we have the OPT_CALLBACK and
OPT_CALLBACK_F macros which are meant to abstract these plain struct
definitions away. These macros are useful as they semantically signal to
developers that these are just normal callback option with nothing fancy
happening.
Replace plain struct definitions of OPTION_CALLBACK with OPT_CALLBACK or
OPT_CALLBACK_F where applicable. The heavy lifting was done using the
following (disgusting) shell script:
#!/bin/sh
do_replacement () {
tr '\n' '\r' |
sed -e 's/{\s*OPTION_CALLBACK,\s*\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\s*0,\(\s*[^[:space:]}]*\)\s*}/OPT_CALLBACK(\1,\2,\3,\4,\5,\6)/g' |
sed -e 's/{\s*OPTION_CALLBACK,\s*\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\(\s*[^[:space:]}]*\)\s*}/OPT_CALLBACK_F(\1,\2,\3,\4,\5,\6,\7)/g' |
tr '\r' '\n'
}
for f in $(git ls-files \*.c)
do
do_replacement <"$f" >"$f.tmp"
mv "$f.tmp" "$f"
done
The result was manually inspected and then reformatted to match the
style of the surrounding code. Finally, using
`git grep OPTION_CALLBACK \*.c`, leftover results which were not handled
by the script were manually transformed.
Signed-off-by: Denton Liu <liu.denton@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-28 10:36:28 +02:00
|
|
|
PARSE_OPT_OPTARG, option_fetch_parse_recurse_submodules),
|
2013-08-03 13:51:19 +02:00
|
|
|
OPT_BOOL(0, "dry-run", &dry_run,
|
|
|
|
N_("dry run")),
|
2020-08-18 16:25:22 +02:00
|
|
|
OPT_BOOL(0, "write-fetch-head", &write_fetch_head,
|
|
|
|
N_("write fetched references to the FETCH_HEAD file")),
|
2013-08-03 13:51:19 +02:00
|
|
|
OPT_BOOL('k', "keep", &keep, N_("keep downloaded pack")),
|
|
|
|
OPT_BOOL('u', "update-head-ok", &update_head_ok,
|
2012-08-20 14:32:09 +02:00
|
|
|
N_("allow updating of HEAD ref")),
|
|
|
|
OPT_BOOL(0, "progress", &progress, N_("force progress reporting")),
|
|
|
|
OPT_STRING(0, "depth", &depth, N_("depth"),
|
|
|
|
N_("deepen history of shallow clone")),
|
2016-06-12 12:53:59 +02:00
|
|
|
OPT_STRING(0, "shallow-since", &deepen_since, N_("time"),
|
|
|
|
N_("deepen history of shallow repository based on time")),
|
2016-06-12 12:54:04 +02:00
|
|
|
OPT_STRING_LIST(0, "shallow-exclude", &deepen_not, N_("revision"),
|
2016-12-04 23:03:59 +01:00
|
|
|
N_("deepen history of shallow clone, excluding rev")),
|
fetch, upload-pack: --deepen=N extends shallow boundary by N commits
In git-fetch, --depth argument is always relative with the latest
remote refs. This makes it a bit difficult to cover this use case,
where the user wants to make the shallow history, say 3 levels
deeper. It would work if remote refs have not moved yet, but nobody
can guarantee that, especially when that use case is performed a
couple months after the last clone or "git fetch --depth". Also,
modifying shallow boundary using --depth does not work well with
clones created by --since or --not.
This patch fixes that. A new argument --deepen=<N> will add <N> more (*)
parent commits to the current history regardless of where remote refs
are.
Have/Want negotiation is still respected. So if remote refs move, the
server will send two chunks: one between "have" and "want" and another
to extend shallow history. In theory, the client could send no "want"s
in order to get the second chunk only. But the protocol does not allow
that. Either you send no want lines, which means ls-remote; or you
have to send at least one want line that carries deep-relative to the
server..
The main work was done by Dongcan Jiang. I fixed it up here and there.
And of course all the bugs belong to me.
(*) We could even support --deepen=<N> where <N> is negative. In that
case we can cut some history from the shallow clone. This operation
(and --depth=<shorter depth>) does not require interaction with remote
side (and more complicated to implement as a result).
Helped-by: Duy Nguyen <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 12:54:09 +02:00
|
|
|
OPT_INTEGER(0, "deepen", &deepen_relative,
|
|
|
|
N_("deepen history of shallow clone")),
|
2018-05-20 17:42:58 +02:00
|
|
|
OPT_SET_INT_F(0, "unshallow", &unshallow,
|
|
|
|
N_("convert to a complete repository"),
|
|
|
|
1, PARSE_OPT_NONEG),
|
2012-08-20 14:32:09 +02:00
|
|
|
{ OPTION_STRING, 0, "submodule-prefix", &submodule_prefix, N_("dir"),
|
|
|
|
N_("prepend this to submodule path output"), PARSE_OPT_HIDDEN },
|
Use OPT_CALLBACK and OPT_CALLBACK_F
In the codebase, there are many options which use OPTION_CALLBACK in a
plain ol' struct definition. However, we have the OPT_CALLBACK and
OPT_CALLBACK_F macros which are meant to abstract these plain struct
definitions away. These macros are useful as they semantically signal to
developers that these are just normal callback option with nothing fancy
happening.
Replace plain struct definitions of OPTION_CALLBACK with OPT_CALLBACK or
OPT_CALLBACK_F where applicable. The heavy lifting was done using the
following (disgusting) shell script:
#!/bin/sh
do_replacement () {
tr '\n' '\r' |
sed -e 's/{\s*OPTION_CALLBACK,\s*\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\s*0,\(\s*[^[:space:]}]*\)\s*}/OPT_CALLBACK(\1,\2,\3,\4,\5,\6)/g' |
sed -e 's/{\s*OPTION_CALLBACK,\s*\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\(\s*[^[:space:]}]*\)\s*}/OPT_CALLBACK_F(\1,\2,\3,\4,\5,\6,\7)/g' |
tr '\r' '\n'
}
for f in $(git ls-files \*.c)
do
do_replacement <"$f" >"$f.tmp"
mv "$f.tmp" "$f"
done
The result was manually inspected and then reformatted to match the
style of the surrounding code. Finally, using
`git grep OPTION_CALLBACK \*.c`, leftover results which were not handled
by the script were manually transformed.
Signed-off-by: Denton Liu <liu.denton@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-28 10:36:28 +02:00
|
|
|
OPT_CALLBACK_F(0, "recurse-submodules-default",
|
2017-06-23 21:13:01 +02:00
|
|
|
&recurse_submodules_default, N_("on-demand"),
|
|
|
|
N_("default for recursive fetching of submodules "
|
|
|
|
"(lower priority than config files)"),
|
Use OPT_CALLBACK and OPT_CALLBACK_F
In the codebase, there are many options which use OPTION_CALLBACK in a
plain ol' struct definition. However, we have the OPT_CALLBACK and
OPT_CALLBACK_F macros which are meant to abstract these plain struct
definitions away. These macros are useful as they semantically signal to
developers that these are just normal callback option with nothing fancy
happening.
Replace plain struct definitions of OPTION_CALLBACK with OPT_CALLBACK or
OPT_CALLBACK_F where applicable. The heavy lifting was done using the
following (disgusting) shell script:
#!/bin/sh
do_replacement () {
tr '\n' '\r' |
sed -e 's/{\s*OPTION_CALLBACK,\s*\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\s*0,\(\s*[^[:space:]}]*\)\s*}/OPT_CALLBACK(\1,\2,\3,\4,\5,\6)/g' |
sed -e 's/{\s*OPTION_CALLBACK,\s*\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\(\s*[^[:space:]}]*\)\s*}/OPT_CALLBACK_F(\1,\2,\3,\4,\5,\6,\7)/g' |
tr '\r' '\n'
}
for f in $(git ls-files \*.c)
do
do_replacement <"$f" >"$f.tmp"
mv "$f.tmp" "$f"
done
The result was manually inspected and then reformatted to match the
style of the surrounding code. Finally, using
`git grep OPTION_CALLBACK \*.c`, leftover results which were not handled
by the script were manually transformed.
Signed-off-by: Denton Liu <liu.denton@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-28 10:36:28 +02:00
|
|
|
PARSE_OPT_HIDDEN, option_fetch_parse_recurse_submodules),
|
2013-12-05 14:02:42 +01:00
|
|
|
OPT_BOOL(0, "update-shallow", &update_shallow,
|
|
|
|
N_("accept refs that update .git/shallow")),
|
Use OPT_CALLBACK and OPT_CALLBACK_F
In the codebase, there are many options which use OPTION_CALLBACK in a
plain ol' struct definition. However, we have the OPT_CALLBACK and
OPT_CALLBACK_F macros which are meant to abstract these plain struct
definitions away. These macros are useful as they semantically signal to
developers that these are just normal callback option with nothing fancy
happening.
Replace plain struct definitions of OPTION_CALLBACK with OPT_CALLBACK or
OPT_CALLBACK_F where applicable. The heavy lifting was done using the
following (disgusting) shell script:
#!/bin/sh
do_replacement () {
tr '\n' '\r' |
sed -e 's/{\s*OPTION_CALLBACK,\s*\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\s*0,\(\s*[^[:space:]}]*\)\s*}/OPT_CALLBACK(\1,\2,\3,\4,\5,\6)/g' |
sed -e 's/{\s*OPTION_CALLBACK,\s*\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\(\s*[^[:space:]}]*\)\s*}/OPT_CALLBACK_F(\1,\2,\3,\4,\5,\6,\7)/g' |
tr '\r' '\n'
}
for f in $(git ls-files \*.c)
do
do_replacement <"$f" >"$f.tmp"
mv "$f.tmp" "$f"
done
The result was manually inspected and then reformatted to match the
style of the surrounding code. Finally, using
`git grep OPTION_CALLBACK \*.c`, leftover results which were not handled
by the script were manually transformed.
Signed-off-by: Denton Liu <liu.denton@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-28 10:36:28 +02:00
|
|
|
OPT_CALLBACK_F(0, "refmap", NULL, N_("refmap"),
|
|
|
|
N_("specify fetch refmap"), PARSE_OPT_NONEG, parse_refmap_arg),
|
2018-04-24 00:46:24 +02:00
|
|
|
OPT_STRING_LIST('o', "server-option", &server_options, N_("server-specific"), N_("option to transmit")),
|
2016-02-03 05:09:14 +01:00
|
|
|
OPT_SET_INT('4', "ipv4", &family, N_("use IPv4 addresses only"),
|
|
|
|
TRANSPORT_FAMILY_IPV4),
|
|
|
|
OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"),
|
|
|
|
TRANSPORT_FAMILY_IPV6),
|
2018-07-03 00:39:44 +02:00
|
|
|
OPT_STRING_LIST(0, "negotiation-tip", &negotiation_tip, N_("revision"),
|
|
|
|
N_("report that we have only objects reachable from this object")),
|
fetch: teach independent negotiation (no packfile)
Currently, the packfile negotiation step within a Git fetch cannot be
done independent of sending the packfile, even though there is at least
one application wherein this is useful. Therefore, make it possible for
this negotiation step to be done independently. A subsequent commit will
use this for one such application - push negotiation.
This feature is for protocol v2 only. (An implementation for protocol v0
would require a separate implementation in the fetch, transport, and
transport helper code.)
In the protocol, the main hindrance towards independent negotiation is
that the server can unilaterally decide to send the packfile. This is
solved by a "wait-for-done" argument: the server will then wait for the
client to say "done". In practice, the client will never say it; instead
it will cease requests once it is satisfied.
In the client, the main change lies in the transport and transport
helper code. fetch_refs_via_pack() performs everything needed - protocol
version and capability checks, and the negotiation itself.
There are 2 code paths that do not go through fetch_refs_via_pack() that
needed to be individually excluded: the bundle transport (excluded
through requiring smart_options, which the bundle transport doesn't
support) and transport helpers that do not support takeover. If or when
we support independent negotiation for protocol v0, we will need to
modify these 2 code paths to support it. But for now, report failure if
independent negotiation is requested in these cases.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-04 23:16:01 +02:00
|
|
|
OPT_BOOL(0, "negotiate-only", &negotiate_only,
|
|
|
|
N_("do not fetch a packfile; instead, print ancestors of negotiation tips")),
|
2017-12-08 16:58:44 +01:00
|
|
|
OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
|
2020-09-17 20:11:44 +02:00
|
|
|
OPT_BOOL(0, "auto-maintenance", &enable_auto_gc,
|
|
|
|
N_("run 'maintenance --auto' after fetching")),
|
2019-06-19 11:46:30 +02:00
|
|
|
OPT_BOOL(0, "auto-gc", &enable_auto_gc,
|
2020-09-17 20:11:44 +02:00
|
|
|
N_("run 'maintenance --auto' after fetching")),
|
2019-06-18 22:25:26 +02:00
|
|
|
OPT_BOOL(0, "show-forced-updates", &fetch_show_forced_updates,
|
|
|
|
N_("check for forced-updates on all updated branches")),
|
2019-11-03 01:21:56 +01:00
|
|
|
OPT_BOOL(0, "write-commit-graph", &fetch_write_commit_graph,
|
|
|
|
N_("write the commit-graph after fetching")),
|
2020-08-18 06:01:32 +02:00
|
|
|
OPT_BOOL(0, "stdin", &stdin_refspecs,
|
|
|
|
N_("accept refspecs from stdin")),
|
2007-12-04 08:25:47 +01:00
|
|
|
OPT_END()
|
|
|
|
};
|
|
|
|
|
fetch: fix deadlock when cleaning up lockfiles in async signals
When fetching packfiles, we write a bunch of lockfiles for the packfiles
we're writing into the repository. In order to not leave behind any
cruft in case we exit or receive a signal, we register both an exit
handler as well as signal handlers for common signals like SIGINT. These
handlers will then unlink the locks and free the data structure tracking
them. We have observed a deadlock in this logic though:
(gdb) bt
#0 __lll_lock_wait_private () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95
#1 0x00007f4932bea2cd in _int_free (av=0x7f4932f2eb20 <main_arena>, p=0x3e3e4200, have_lock=0) at malloc.c:3969
#2 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#3 0x0000000000662ab1 in string_list_clear ()
#4 0x000000000044f5bc in unlock_pack_on_signal ()
#5 <signal handler called>
#6 _int_free (av=0x7f4932f2eb20 <main_arena>, p=<optimized out>, have_lock=0) at malloc.c:4024
#7 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#8 0x000000000065afd5 in strbuf_release ()
#9 0x000000000066ddb9 in delete_tempfile ()
#10 0x0000000000610d0b in files_transaction_cleanup.isra ()
#11 0x0000000000611718 in files_transaction_abort ()
#12 0x000000000060d2ef in ref_transaction_abort ()
#13 0x000000000060d441 in ref_transaction_prepare ()
#14 0x000000000060e0b5 in ref_transaction_commit ()
#15 0x00000000004511c2 in fetch_and_consume_refs ()
#16 0x000000000045279a in cmd_fetch ()
#17 0x0000000000407c48 in handle_builtin ()
#18 0x0000000000408df2 in cmd_main ()
#19 0x00000000004078b5 in main ()
The process was killed with a signal, which caused the signal handler to
kick in and try free the data structures after we have unlinked the
locks. It then deadlocks while calling free(3P).
The root cause of this is that it is not allowed to call certain
functions in async-signal handlers, as specified by signal-safety(7).
Next to most I/O functions, this list of disallowed functions also
includes memory-handling functions like malloc(3P) and free(3P) because
they may not be reentrant. As a result, if we execute such functions in
the signal handler, then they may operate on inconistent state and fail
in unexpected ways.
Fix this bug by not calling non-async-signal-safe functions when running
in the signal handler. We're about to re-raise the signal anyway and
will thus exit, so it's not much of a problem to keep the string list of
lockfiles untouched. Note that it's fine though to call unlink(2), so
we'll still clean up the lockfiles correctly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Reviewed-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-07 11:55:47 +01:00
|
|
|
static void unlock_pack(unsigned int flags)
|
2007-09-14 09:31:25 +02:00
|
|
|
{
|
2013-08-08 00:38:45 +02:00
|
|
|
if (gtransport)
|
fetch: fix deadlock when cleaning up lockfiles in async signals
When fetching packfiles, we write a bunch of lockfiles for the packfiles
we're writing into the repository. In order to not leave behind any
cruft in case we exit or receive a signal, we register both an exit
handler as well as signal handlers for common signals like SIGINT. These
handlers will then unlink the locks and free the data structure tracking
them. We have observed a deadlock in this logic though:
(gdb) bt
#0 __lll_lock_wait_private () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95
#1 0x00007f4932bea2cd in _int_free (av=0x7f4932f2eb20 <main_arena>, p=0x3e3e4200, have_lock=0) at malloc.c:3969
#2 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#3 0x0000000000662ab1 in string_list_clear ()
#4 0x000000000044f5bc in unlock_pack_on_signal ()
#5 <signal handler called>
#6 _int_free (av=0x7f4932f2eb20 <main_arena>, p=<optimized out>, have_lock=0) at malloc.c:4024
#7 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#8 0x000000000065afd5 in strbuf_release ()
#9 0x000000000066ddb9 in delete_tempfile ()
#10 0x0000000000610d0b in files_transaction_cleanup.isra ()
#11 0x0000000000611718 in files_transaction_abort ()
#12 0x000000000060d2ef in ref_transaction_abort ()
#13 0x000000000060d441 in ref_transaction_prepare ()
#14 0x000000000060e0b5 in ref_transaction_commit ()
#15 0x00000000004511c2 in fetch_and_consume_refs ()
#16 0x000000000045279a in cmd_fetch ()
#17 0x0000000000407c48 in handle_builtin ()
#18 0x0000000000408df2 in cmd_main ()
#19 0x00000000004078b5 in main ()
The process was killed with a signal, which caused the signal handler to
kick in and try free the data structures after we have unlinked the
locks. It then deadlocks while calling free(3P).
The root cause of this is that it is not allowed to call certain
functions in async-signal handlers, as specified by signal-safety(7).
Next to most I/O functions, this list of disallowed functions also
includes memory-handling functions like malloc(3P) and free(3P) because
they may not be reentrant. As a result, if we execute such functions in
the signal handler, then they may operate on inconistent state and fail
in unexpected ways.
Fix this bug by not calling non-async-signal-safe functions when running
in the signal handler. We're about to re-raise the signal anyway and
will thus exit, so it's not much of a problem to keep the string list of
lockfiles untouched. Note that it's fine though to call unlink(2), so
we'll still clean up the lockfiles correctly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Reviewed-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-07 11:55:47 +01:00
|
|
|
transport_unlock_pack(gtransport, flags);
|
fetch: work around "transport-take-over" hack
A Git-aware "connect" transport allows the "transport_take_over" to
redirect generic transport requests like fetch(), push_refs() and
get_refs_list() to the native Git transport handling methods. The
take-over process replaces transport->data with a fake data that
these method implementations understand.
While this hack works OK for a single request, it breaks when the
transport needs to make more than one requests. transport->data
that used to hold necessary information for the specific helper to
work correctly is destroyed during the take-over process.
One codepath that this matters is "git fetch" in auto-follow mode;
when it does not get all the tags that ought to point at the history
it got (which can be determined by looking at the peeled tags in the
initial advertisement) from the primary transfer, it internally
makes a second request to complete the fetch. Because "take-over"
hack has already destroyed the data necessary to talk to the
transport helper by the time this happens, the second request cannot
make a request to the helper to make another connection to fetch
these additional tags.
Mark such a transport as "cannot_reuse", and use a separate
transport to perform the backfill fetch in order to work around
this breakage.
Note that this problem does not manifest itself when running t5802,
because our upload-pack gives you all the necessary auto-followed
tags during the primary transfer. You would need to step through
"git fetch" in a debugger, stop immediately after the primary
transfer finishes and writes these auto-followed tags, remove the
tag references and repack/prune the repository to convince the
"find-non-local-tags" procedure that the primary transfer failed to
give us all the necessary tags, and then let it continue, in order
to trigger the bug in the secondary transfer this patch fixes.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-08 00:47:18 +02:00
|
|
|
if (gsecondary)
|
fetch: fix deadlock when cleaning up lockfiles in async signals
When fetching packfiles, we write a bunch of lockfiles for the packfiles
we're writing into the repository. In order to not leave behind any
cruft in case we exit or receive a signal, we register both an exit
handler as well as signal handlers for common signals like SIGINT. These
handlers will then unlink the locks and free the data structure tracking
them. We have observed a deadlock in this logic though:
(gdb) bt
#0 __lll_lock_wait_private () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95
#1 0x00007f4932bea2cd in _int_free (av=0x7f4932f2eb20 <main_arena>, p=0x3e3e4200, have_lock=0) at malloc.c:3969
#2 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#3 0x0000000000662ab1 in string_list_clear ()
#4 0x000000000044f5bc in unlock_pack_on_signal ()
#5 <signal handler called>
#6 _int_free (av=0x7f4932f2eb20 <main_arena>, p=<optimized out>, have_lock=0) at malloc.c:4024
#7 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#8 0x000000000065afd5 in strbuf_release ()
#9 0x000000000066ddb9 in delete_tempfile ()
#10 0x0000000000610d0b in files_transaction_cleanup.isra ()
#11 0x0000000000611718 in files_transaction_abort ()
#12 0x000000000060d2ef in ref_transaction_abort ()
#13 0x000000000060d441 in ref_transaction_prepare ()
#14 0x000000000060e0b5 in ref_transaction_commit ()
#15 0x00000000004511c2 in fetch_and_consume_refs ()
#16 0x000000000045279a in cmd_fetch ()
#17 0x0000000000407c48 in handle_builtin ()
#18 0x0000000000408df2 in cmd_main ()
#19 0x00000000004078b5 in main ()
The process was killed with a signal, which caused the signal handler to
kick in and try free the data structures after we have unlinked the
locks. It then deadlocks while calling free(3P).
The root cause of this is that it is not allowed to call certain
functions in async-signal handlers, as specified by signal-safety(7).
Next to most I/O functions, this list of disallowed functions also
includes memory-handling functions like malloc(3P) and free(3P) because
they may not be reentrant. As a result, if we execute such functions in
the signal handler, then they may operate on inconistent state and fail
in unexpected ways.
Fix this bug by not calling non-async-signal-safe functions when running
in the signal handler. We're about to re-raise the signal anyway and
will thus exit, so it's not much of a problem to keep the string list of
lockfiles untouched. Note that it's fine though to call unlink(2), so
we'll still clean up the lockfiles correctly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Reviewed-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-07 11:55:47 +01:00
|
|
|
transport_unlock_pack(gsecondary, flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void unlock_pack_atexit(void)
|
|
|
|
{
|
|
|
|
unlock_pack(0);
|
2007-09-14 09:31:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void unlock_pack_on_signal(int signo)
|
|
|
|
{
|
fetch: fix deadlock when cleaning up lockfiles in async signals
When fetching packfiles, we write a bunch of lockfiles for the packfiles
we're writing into the repository. In order to not leave behind any
cruft in case we exit or receive a signal, we register both an exit
handler as well as signal handlers for common signals like SIGINT. These
handlers will then unlink the locks and free the data structure tracking
them. We have observed a deadlock in this logic though:
(gdb) bt
#0 __lll_lock_wait_private () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95
#1 0x00007f4932bea2cd in _int_free (av=0x7f4932f2eb20 <main_arena>, p=0x3e3e4200, have_lock=0) at malloc.c:3969
#2 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#3 0x0000000000662ab1 in string_list_clear ()
#4 0x000000000044f5bc in unlock_pack_on_signal ()
#5 <signal handler called>
#6 _int_free (av=0x7f4932f2eb20 <main_arena>, p=<optimized out>, have_lock=0) at malloc.c:4024
#7 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#8 0x000000000065afd5 in strbuf_release ()
#9 0x000000000066ddb9 in delete_tempfile ()
#10 0x0000000000610d0b in files_transaction_cleanup.isra ()
#11 0x0000000000611718 in files_transaction_abort ()
#12 0x000000000060d2ef in ref_transaction_abort ()
#13 0x000000000060d441 in ref_transaction_prepare ()
#14 0x000000000060e0b5 in ref_transaction_commit ()
#15 0x00000000004511c2 in fetch_and_consume_refs ()
#16 0x000000000045279a in cmd_fetch ()
#17 0x0000000000407c48 in handle_builtin ()
#18 0x0000000000408df2 in cmd_main ()
#19 0x00000000004078b5 in main ()
The process was killed with a signal, which caused the signal handler to
kick in and try free the data structures after we have unlinked the
locks. It then deadlocks while calling free(3P).
The root cause of this is that it is not allowed to call certain
functions in async-signal handlers, as specified by signal-safety(7).
Next to most I/O functions, this list of disallowed functions also
includes memory-handling functions like malloc(3P) and free(3P) because
they may not be reentrant. As a result, if we execute such functions in
the signal handler, then they may operate on inconistent state and fail
in unexpected ways.
Fix this bug by not calling non-async-signal-safe functions when running
in the signal handler. We're about to re-raise the signal anyway and
will thus exit, so it's not much of a problem to keep the string list of
lockfiles untouched. Note that it's fine though to call unlink(2), so
we'll still clean up the lockfiles correctly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Reviewed-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-07 11:55:47 +01:00
|
|
|
unlock_pack(TRANSPORT_UNLOCK_PACK_IN_SIGNAL_HANDLER);
|
chain kill signals for cleanup functions
If a piece of code wanted to do some cleanup before exiting
(e.g., cleaning up a lockfile or a tempfile), our usual
strategy was to install a signal handler that did something
like this:
do_cleanup(); /* actual work */
signal(signo, SIG_DFL); /* restore previous behavior */
raise(signo); /* deliver signal, killing ourselves */
For a single handler, this works fine. However, if we want
to clean up two _different_ things, we run into a problem.
The most recently installed handler will run, but when it
removes itself as a handler, it doesn't put back the first
handler.
This patch introduces sigchain, a tiny library for handling
a stack of signal handlers. You sigchain_push each handler,
and use sigchain_pop to restore whoever was before you in
the stack.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-01-22 07:02:35 +01:00
|
|
|
sigchain_pop(signo);
|
2007-09-14 09:31:25 +02:00
|
|
|
raise(signo);
|
|
|
|
}
|
2007-09-11 05:03:25 +02:00
|
|
|
|
2007-09-18 10:54:53 +02:00
|
|
|
static void add_merge_config(struct ref **head,
|
2007-10-30 02:05:40 +01:00
|
|
|
const struct ref *remote_refs,
|
2007-09-18 10:54:53 +02:00
|
|
|
struct branch *branch,
|
|
|
|
struct ref ***tail)
|
2007-09-11 05:03:25 +02:00
|
|
|
{
|
2007-09-18 10:54:53 +02:00
|
|
|
int i;
|
2007-09-11 05:03:25 +02:00
|
|
|
|
2007-09-18 10:54:53 +02:00
|
|
|
for (i = 0; i < branch->merge_nr; i++) {
|
|
|
|
struct ref *rm, **old_tail = *tail;
|
2018-05-17 00:57:49 +02:00
|
|
|
struct refspec_item refspec;
|
2007-09-18 10:54:53 +02:00
|
|
|
|
|
|
|
for (rm = *head; rm; rm = rm->next) {
|
|
|
|
if (branch_merge_matches(branch, i, rm->name)) {
|
2013-05-11 18:15:59 +02:00
|
|
|
rm->fetch_head_status = FETCH_HEAD_MERGE;
|
2007-09-18 10:54:53 +02:00
|
|
|
break;
|
|
|
|
}
|
2007-09-11 05:03:25 +02:00
|
|
|
}
|
2007-09-18 10:54:53 +02:00
|
|
|
if (rm)
|
|
|
|
continue;
|
|
|
|
|
2007-10-27 08:09:48 +02:00
|
|
|
/*
|
2010-11-02 16:31:23 +01:00
|
|
|
* Not fetched to a remote-tracking branch? We need to fetch
|
2007-09-18 10:54:53 +02:00
|
|
|
* it anyway to allow this branch's "branch.$name.merge"
|
2008-09-09 12:28:30 +02:00
|
|
|
* to be honored by 'git pull', but we do not have to
|
2007-10-27 08:09:48 +02:00
|
|
|
* fail if branch.$name.merge is misconfigured to point
|
|
|
|
* at a nonexisting branch. If we were indeed called by
|
2008-09-09 12:28:30 +02:00
|
|
|
* 'git pull', it will notice the misconfiguration because
|
2007-10-27 08:09:48 +02:00
|
|
|
* there is no entry in the resulting FETCH_HEAD marked
|
|
|
|
* for merging.
|
2007-09-18 10:54:53 +02:00
|
|
|
*/
|
2010-03-12 23:27:33 +01:00
|
|
|
memset(&refspec, 0, sizeof(refspec));
|
2007-09-18 10:54:53 +02:00
|
|
|
refspec.src = branch->merge[i]->src;
|
2007-10-27 08:09:48 +02:00
|
|
|
get_fetch_map(remote_refs, &refspec, tail, 1);
|
2007-09-18 10:54:53 +02:00
|
|
|
for (rm = *old_tail; rm; rm = rm->next)
|
2013-05-11 18:15:59 +02:00
|
|
|
rm->fetch_head_status = FETCH_HEAD_MERGE;
|
2007-09-11 05:03:25 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-09-15 23:18:02 +02:00
|
|
|
static void create_fetch_oidset(struct ref **head, struct oidset *out)
|
2013-10-30 06:32:55 +01:00
|
|
|
{
|
|
|
|
struct ref *rm = *head;
|
|
|
|
while (rm) {
|
2019-09-15 23:18:02 +02:00
|
|
|
oidset_insert(out, &rm->old_oid);
|
2013-10-30 06:32:55 +01:00
|
|
|
rm = rm->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-25 22:25:04 +02:00
|
|
|
struct refname_hash_entry {
|
2019-10-07 01:30:43 +02:00
|
|
|
struct hashmap_entry ent;
|
2018-09-25 22:25:04 +02:00
|
|
|
struct object_id oid;
|
2019-06-04 04:13:30 +02:00
|
|
|
int ignore;
|
2018-09-25 22:25:04 +02:00
|
|
|
char refname[FLEX_ARRAY];
|
|
|
|
};
|
|
|
|
|
|
|
|
static int refname_hash_entry_cmp(const void *hashmap_cmp_fn_data,
|
2019-10-07 01:30:37 +02:00
|
|
|
const struct hashmap_entry *eptr,
|
|
|
|
const struct hashmap_entry *entry_or_key,
|
2018-09-25 22:25:04 +02:00
|
|
|
const void *keydata)
|
|
|
|
{
|
2019-10-07 01:30:37 +02:00
|
|
|
const struct refname_hash_entry *e1, *e2;
|
2018-09-25 22:25:04 +02:00
|
|
|
|
2019-10-07 01:30:37 +02:00
|
|
|
e1 = container_of(eptr, const struct refname_hash_entry, ent);
|
|
|
|
e2 = container_of(entry_or_key, const struct refname_hash_entry, ent);
|
2018-09-25 22:25:04 +02:00
|
|
|
return strcmp(e1->refname, keydata ? keydata : e2->refname);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct refname_hash_entry *refname_hash_add(struct hashmap *map,
|
|
|
|
const char *refname,
|
|
|
|
const struct object_id *oid)
|
|
|
|
{
|
|
|
|
struct refname_hash_entry *ent;
|
|
|
|
size_t len = strlen(refname);
|
|
|
|
|
|
|
|
FLEX_ALLOC_MEM(ent, refname, refname, len);
|
2019-10-07 01:30:27 +02:00
|
|
|
hashmap_entry_init(&ent->ent, strhash(refname));
|
2018-09-25 22:25:04 +02:00
|
|
|
oidcpy(&ent->oid, oid);
|
2019-10-07 01:30:29 +02:00
|
|
|
hashmap_add(map, &ent->ent);
|
2018-09-25 22:25:04 +02:00
|
|
|
return ent;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int add_one_refname(const char *refname,
|
|
|
|
const struct object_id *oid,
|
|
|
|
int flag, void *cbdata)
|
|
|
|
{
|
|
|
|
struct hashmap *refname_map = cbdata;
|
|
|
|
|
|
|
|
(void) refname_hash_add(refname_map, refname, oid);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void refname_hash_init(struct hashmap *map)
|
|
|
|
{
|
|
|
|
hashmap_init(map, refname_hash_entry_cmp, NULL, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int refname_hash_exists(struct hashmap *map, const char *refname)
|
|
|
|
{
|
|
|
|
return !!hashmap_get_from_hash(map, strhash(refname), refname);
|
|
|
|
}
|
|
|
|
|
2019-06-04 04:13:28 +02:00
|
|
|
static void clear_item(struct refname_hash_entry *item)
|
|
|
|
{
|
2019-06-04 04:13:30 +02:00
|
|
|
item->ignore = 1;
|
2019-06-04 04:13:28 +02:00
|
|
|
}
|
|
|
|
|
2018-06-28 00:30:21 +02:00
|
|
|
static void find_non_local_tags(const struct ref *refs,
|
|
|
|
struct ref **head,
|
|
|
|
struct ref ***tail)
|
2013-10-30 06:32:55 +01:00
|
|
|
{
|
2018-09-25 22:25:04 +02:00
|
|
|
struct hashmap existing_refs;
|
|
|
|
struct hashmap remote_refs;
|
2019-09-15 23:18:02 +02:00
|
|
|
struct oidset fetch_oids = OIDSET_INIT;
|
2018-09-25 22:25:04 +02:00
|
|
|
struct string_list remote_refs_list = STRING_LIST_INIT_NODUP;
|
|
|
|
struct string_list_item *remote_ref_item;
|
2013-10-30 06:32:55 +01:00
|
|
|
const struct ref *ref;
|
2018-09-25 22:25:04 +02:00
|
|
|
struct refname_hash_entry *item = NULL;
|
2020-02-21 22:47:28 +01:00
|
|
|
const int quick_flags = OBJECT_INFO_QUICK | OBJECT_INFO_SKIP_FETCH_OBJECT;
|
2018-09-25 22:25:04 +02:00
|
|
|
|
|
|
|
refname_hash_init(&existing_refs);
|
|
|
|
refname_hash_init(&remote_refs);
|
2019-09-15 23:18:02 +02:00
|
|
|
create_fetch_oidset(head, &fetch_oids);
|
2013-10-30 06:32:55 +01:00
|
|
|
|
2018-09-25 22:25:04 +02:00
|
|
|
for_each_ref(add_one_refname, &existing_refs);
|
2018-06-28 00:30:21 +02:00
|
|
|
for (ref = refs; ref; ref = ref->next) {
|
2013-12-17 20:47:35 +01:00
|
|
|
if (!starts_with(ref->name, "refs/tags/"))
|
2013-10-30 06:32:55 +01:00
|
|
|
continue;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The peeled ref always follows the matching base
|
|
|
|
* ref, so if we see a peeled ref that we don't want
|
|
|
|
* to fetch then we can mark the ref entry in the list
|
|
|
|
* as one to ignore by setting util to NULL.
|
|
|
|
*/
|
2013-12-17 20:47:35 +01:00
|
|
|
if (ends_with(ref->name, "^{}")) {
|
fetch: use "quick" has_sha1_file for tag following
When we auto-follow tags in a fetch, we look at all of the
tags advertised by the remote and fetch ones where we don't
already have the tag, but we do have the object it peels to.
This involves a lot of calls to has_sha1_file(), some of
which we can reasonably expect to fail. Since 45e8a74
(has_sha1_file: re-check pack directory before giving up,
2013-08-30), this may cause many calls to
reprepare_packed_git(), which is potentially expensive.
This has gone unnoticed for several years because it
requires a fairly unique setup to matter:
1. You need to have a lot of packs on the client side to
make reprepare_packed_git() expensive (the most
expensive part is finding duplicates in an unsorted
list, which is currently quadratic).
2. You need a large number of tag refs on the server side
that are candidates for auto-following (i.e., that the
client doesn't have). Each one triggers a re-read of
the pack directory.
3. Under normal circumstances, the client would
auto-follow those tags and after one large fetch, (2)
would no longer be true. But if those tags point to
history which is disconnected from what the client
otherwise fetches, then it will never auto-follow, and
those candidates will impact it on every fetch.
So when all three are true, each fetch pays an extra
O(nr_tags * nr_packs^2) cost, mostly in string comparisons
on the pack names. This was exacerbated by 47bf4b0
(prepare_packed_git_one: refactor duplicate-pack check,
2014-06-30) which uses a slightly more expensive string
check, under the assumption that the duplicate check doesn't
happen very often (and it shouldn't; the real problem here
is how often we are calling reprepare_packed_git()).
This patch teaches fetch to use HAS_SHA1_QUICK to sacrifice
accuracy for speed, in cases where we might be racy with a
simultaneous repack. This is similar to the fix in 0eeb077
(index-pack: avoid excessive re-reading of pack directory,
2015-06-09). As with that case, it's OK for has_sha1_file()
occasionally say "no I don't have it" when we do, because
the worst case is not a corruption, but simply that we may
fail to auto-follow a tag that points to it.
Here are results from the included perf script, which sets
up a situation similar to the one described above:
Test HEAD^ HEAD
----------------------------------------------------------
5550.4: fetch 11.21(10.42+0.78) 0.08(0.04+0.02) -99.3%
Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-10-13 18:53:44 +02:00
|
|
|
if (item &&
|
2020-02-21 22:47:28 +01:00
|
|
|
!has_object_file_with_flags(&ref->old_oid, quick_flags) &&
|
2019-09-15 23:18:02 +02:00
|
|
|
!oidset_contains(&fetch_oids, &ref->old_oid) &&
|
2020-02-21 22:47:28 +01:00
|
|
|
!has_object_file_with_flags(&item->oid, quick_flags) &&
|
2019-09-15 23:18:02 +02:00
|
|
|
!oidset_contains(&fetch_oids, &item->oid))
|
2019-06-04 04:13:28 +02:00
|
|
|
clear_item(item);
|
2013-10-30 06:32:55 +01:00
|
|
|
item = NULL;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If item is non-NULL here, then we previously saw a
|
|
|
|
* ref not followed by a peeled reference, so we need
|
|
|
|
* to check if it is a lightweight tag that we want to
|
|
|
|
* fetch.
|
|
|
|
*/
|
fetch: use "quick" has_sha1_file for tag following
When we auto-follow tags in a fetch, we look at all of the
tags advertised by the remote and fetch ones where we don't
already have the tag, but we do have the object it peels to.
This involves a lot of calls to has_sha1_file(), some of
which we can reasonably expect to fail. Since 45e8a74
(has_sha1_file: re-check pack directory before giving up,
2013-08-30), this may cause many calls to
reprepare_packed_git(), which is potentially expensive.
This has gone unnoticed for several years because it
requires a fairly unique setup to matter:
1. You need to have a lot of packs on the client side to
make reprepare_packed_git() expensive (the most
expensive part is finding duplicates in an unsorted
list, which is currently quadratic).
2. You need a large number of tag refs on the server side
that are candidates for auto-following (i.e., that the
client doesn't have). Each one triggers a re-read of
the pack directory.
3. Under normal circumstances, the client would
auto-follow those tags and after one large fetch, (2)
would no longer be true. But if those tags point to
history which is disconnected from what the client
otherwise fetches, then it will never auto-follow, and
those candidates will impact it on every fetch.
So when all three are true, each fetch pays an extra
O(nr_tags * nr_packs^2) cost, mostly in string comparisons
on the pack names. This was exacerbated by 47bf4b0
(prepare_packed_git_one: refactor duplicate-pack check,
2014-06-30) which uses a slightly more expensive string
check, under the assumption that the duplicate check doesn't
happen very often (and it shouldn't; the real problem here
is how often we are calling reprepare_packed_git()).
This patch teaches fetch to use HAS_SHA1_QUICK to sacrifice
accuracy for speed, in cases where we might be racy with a
simultaneous repack. This is similar to the fix in 0eeb077
(index-pack: avoid excessive re-reading of pack directory,
2015-06-09). As with that case, it's OK for has_sha1_file()
occasionally say "no I don't have it" when we do, because
the worst case is not a corruption, but simply that we may
fail to auto-follow a tag that points to it.
Here are results from the included perf script, which sets
up a situation similar to the one described above:
Test HEAD^ HEAD
----------------------------------------------------------
5550.4: fetch 11.21(10.42+0.78) 0.08(0.04+0.02) -99.3%
Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-10-13 18:53:44 +02:00
|
|
|
if (item &&
|
2020-02-21 22:47:28 +01:00
|
|
|
!has_object_file_with_flags(&item->oid, quick_flags) &&
|
2019-09-15 23:18:02 +02:00
|
|
|
!oidset_contains(&fetch_oids, &item->oid))
|
2019-06-04 04:13:28 +02:00
|
|
|
clear_item(item);
|
2013-10-30 06:32:55 +01:00
|
|
|
|
|
|
|
item = NULL;
|
|
|
|
|
|
|
|
/* skip duplicates and refs that we already have */
|
2018-09-25 22:25:04 +02:00
|
|
|
if (refname_hash_exists(&remote_refs, ref->name) ||
|
|
|
|
refname_hash_exists(&existing_refs, ref->name))
|
2013-10-30 06:32:55 +01:00
|
|
|
continue;
|
|
|
|
|
2018-09-25 22:25:04 +02:00
|
|
|
item = refname_hash_add(&remote_refs, ref->name, &ref->old_oid |