git/refs.h

1022 lines
36 KiB
C
Raw Normal View History

#ifndef REFS_H
#define REFS_H
#include "commit.h"
struct object_id;
struct ref_store;
struct repository;
struct strbuf;
struct string_list;
struct string_list_item;
struct worktree;
/*
* Resolve a reference, recursively following symbolic refererences.
*
* Return the name of the non-symbolic reference that ultimately pointed
* at the resolved object name. The return value, if not NULL, is a
* pointer into either a static buffer or the input ref.
*
* If oid is non-NULL, store the referred-to object's name in it.
*
* If the reference cannot be resolved to an object, the behavior
* depends on the RESOLVE_REF_READING flag:
*
* - If RESOLVE_REF_READING is set, return NULL.
*
* - If RESOLVE_REF_READING is not set, clear oid and return the name of
* the last reference name in the chain, which will either be a non-symbolic
* reference or an undefined reference. If this is a prelude to
* "writing" to the ref, the return value is the name of the ref
* that will actually be created or changed.
*
* If the RESOLVE_REF_NO_RECURSE flag is passed, only resolves one
* level of symbolic reference. The value stored in oid for a symbolic
* reference will always be null_oid in this case, and the return
* value is the reference that the symref refers to directly.
*
* If flags is non-NULL, set the value that it points to the
* combination of REF_ISPACKED (if the reference was found among the
* packed references), REF_ISSYMREF (if the initial reference was a
* symbolic reference), REF_BAD_NAME (if the reference name is ill
* formed --- see RESOLVE_REF_ALLOW_BAD_NAME below), and REF_ISBROKEN
* (if the ref is malformed or has a bad name). See refs.h for more detail
* on each flag.
*
* If ref is not a properly-formatted, normalized reference, return
* NULL. If more than MAXDEPTH recursive symbolic lookups are needed,
* give up and return NULL.
*
* RESOLVE_REF_ALLOW_BAD_NAME allows resolving refs even when their
* name is invalid according to git-check-ref-format(1). If the name
* is bad then the value stored in oid will be null_oid and the two
* flags REF_ISBROKEN and REF_BAD_NAME will be set.
*
* Even with RESOLVE_REF_ALLOW_BAD_NAME, names that escape the refs/
* directory and do not consist of all caps and underscores cannot be
* resolved. The function returns NULL for such ref names.
* Caps and underscores refers to the special refs, such as HEAD,
* FETCH_HEAD and friends, that all live outside of the refs/ directory.
*/
#define RESOLVE_REF_READING 0x01
#define RESOLVE_REF_NO_RECURSE 0x02
#define RESOLVE_REF_ALLOW_BAD_NAME 0x04
struct pack_refs_opts {
unsigned int flags;
struct ref_exclusions *exclusions;
struct string_list *includes;
};
const char *refs_resolve_ref_unsafe(struct ref_store *refs,
const char *refname,
int resolve_flags,
struct object_id *oid,
refs API: remove "failure_errno" from refs_resolve_ref_unsafe() Remove the now-unused "failure_errno" parameter from the refs_resolve_ref_unsafe() signature. In my recent 96f6623ada0 (Merge branch 'ab/refs-errno-cleanup', 2021-11-29) series we made all of its callers explicitly request the errno via an output parameter. As that series shows all but one caller ended up passing in a boilerplate "ignore_errno", since they only cared about whether the return value was NULL or not, i.e. if the ref could be resolved. There was one small issue with that series fixed with a follow-up in 31e39123695 (Merge branch 'ab/refs-errno-cleanup', 2022-01-14) a small bug in that series was fixed. After those two there was one caller left in sequencer.c that used the "failure_errno', but as of the preceding commit it uses a boilerplate "ignore_errno" instead. This leaves the public refs API without any use of "failure_errno" at all. We could still do with a bit of cleanup and generalization between refs.c and refs/files-backend.c before the "reftable" integration lands, but that's all internal to the reference code itself. So let's remove this output parameter. Not only isn't it used now, but it's unlikely that we'll want it again in the future. We'd like to slowly move the refs API to a more file-backend independent way of communicating error codes, having it use a "failure_errno" was only the first step in that direction. If this or any other function needs to communicate what specifically is wrong with the requested "refname" it'll be better to have the function set some output enum of well-defined error states than piggy-backend on "errno". Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-26 15:37:01 +01:00
int *flags);
const char *resolve_ref_unsafe(const char *refname, int resolve_flags,
struct object_id *oid, int *flags);
char *refs_resolve_refdup(struct ref_store *refs,
const char *refname, int resolve_flags,
struct object_id *oid, int *flags);
char *resolve_refdup(const char *refname, int resolve_flags,
struct object_id *oid, int *flags);
int read_ref_full(const char *refname, int resolve_flags,
struct object_id *oid, int *flags);
int read_ref(const char *refname, struct object_id *oid);
refs: add ability for backends to special-case reading of symbolic refs Reading of symbolic and non-symbolic references is currently treated the same in reference backends: we always call `refs_read_raw_ref()` and then decide based on the returned flags what type it is. This has one downside though: symbolic references may be treated different from normal references in a backend from normal references. The packed-refs backend for example doesn't even know about symbolic references, and as a result it is pointless to even ask it for one. There are cases where we really only care about whether a reference is symbolic or not, but don't care about whether it exists at all or may be a non-symbolic reference. But it is not possible to optimize for this case right now, and as a consequence we will always first check for a loose reference to exist, and if it doesn't, we'll query the packed-refs backend for a known-to-not-be-symbolic reference. This is inefficient and requires us to search all packed references even though we know to not care for the result at all. Introduce a new function `refs_read_symbolic_ref()` which allows us to fix this case. This function will only ever return symbolic references and can thus optimize for the scenario layed out above. By default, if the backend doesn't provide an implementation for it, we just use the old code path and fall back to `read_raw_ref()`. But in case the backend provides its own, more efficient implementation, we will use that one instead. Note that this function is explicitly designed to not distinguish between missing references and non-symbolic references. If it did, we'd be forced to always search the packed-refs backend to see whether the symbolic reference the user asked for really doesn't exist, or if it exists as a non-symbolic reference. Signed-off-by: Patrick Steinhardt <ps@pks.im> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-03-01 10:33:46 +01:00
int refs_read_symbolic_ref(struct ref_store *ref_store, const char *refname,
struct strbuf *referent);
/*
* Return 0 if a reference named refname could be created without
* conflicting with the name of an existing reference. Otherwise,
* return a negative value and write an explanation to err. If extras
* is non-NULL, it is a list of additional refnames with which refname
* is not allowed to conflict. If skip is non-NULL, ignore potential
* conflicts with refs in skip (e.g., because they are scheduled for
* deletion in the same operation). Behavior is undefined if the same
* name is listed in both extras and skip.
*
* Two reference names conflict if one of them exactly matches the
* leading components of the other; e.g., "foo/bar" conflicts with
* both "foo" and with "foo/bar/baz" but not with "foo/bar" or
* "foo/barbados".
*
* extras and skip must be sorted.
*/
int refs_verify_refname_available(struct ref_store *refs,
const char *refname,
const struct string_list *extras,
const struct string_list *skip,
struct strbuf *err);
int refs_ref_exists(struct ref_store *refs, const char *refname);
int ref_exists(const char *refname);
int should_autocreate_reflog(const char *refname);
int is_branch(const char *refname);
int refs_init_db(struct strbuf *err);
/*
refs: switch peel_ref() to peel_iterated_oid() The peel_ref() interface is confusing and error-prone: - it's typically used by ref iteration callbacks that have both a refname and oid. But since they pass only the refname, we may load the ref value from the filesystem again. This is inefficient, but also means we are open to a race if somebody simultaneously updates the ref. E.g., this: int some_ref_cb(const char *refname, const struct object_id *oid, ...) { if (!peel_ref(refname, &peeled)) printf("%s peels to %s", oid_to_hex(oid), oid_to_hex(&peeled); } could print nonsense. It is correct to say "refname peels to..." (you may see the "before" value or the "after" value, either of which is consistent), but mentioning both oids may be mixing before/after values. Worse, whether this is possible depends on whether the optimization to read from the current iterator value kicks in. So it is actually not possible with: for_each_ref(some_ref_cb); but it _is_ possible with: head_ref(some_ref_cb); which does not use the iterator mechanism (though in practice, HEAD should never peel to anything, so this may not be triggerable). - it must take a fully-qualified refname for the read_ref_full() code path to work. Yet we routinely pass it partial refnames from callbacks to for_each_tag_ref(), etc. This happens to work when iterating because there we do not call read_ref_full() at all, and only use the passed refname to check if it is the same as the iterator. But the requirements for the function parameters are quite unclear. Instead of taking a refname, let's instead take an oid. That fixes both problems. It's a little funny for a "ref" function not to involve refs at all. The key thing is that it's optimizing under the hood based on having access to the ref iterator. So let's change the name to make it clear why you'd want this function versus just peel_object(). There are two other directions I considered but rejected: - we could pass the peel information into the each_ref_fn callback. However, we don't know if the caller actually wants it or not. For packed-refs, providing it is essentially free. But for loose refs, we actually have to peel the object, which would be wasteful in most cases. We could likewise pass in a flag to the callback indicating whether the peeled information is known, but that complicates those callbacks, as they then have to decide whether to manually peel themselves. Plus it requires changing the interface of every callback, whether they care about peeling or not, and there are many of them. - we could make a function to return the peeled value of the current iterated ref (computing it if necessary), and BUG() otherwise. I.e.: int peel_current_iterated_ref(struct object_id *out); Each of the current callers is an each_ref_fn callback, so they'd mostly be happy. But: - we use those callbacks with functions like head_ref(), which do not use the iteration code. So we'd need to handle the fallback case there, anyway. - it's possible that a caller would want to call into generic code that sometimes is used during iteration and sometimes not. This encapsulates the logic to do the fast thing when possible, and fallback when necessary. The implementation is mostly obvious, but I want to call out a few things in the patch: - the test-tool coverage for peel_ref() is now meaningless, as it all collapses to a single peel_object() call (arguably they were pretty uninteresting before; the tricky part of that function is the fast-path we see during iteration, but these calls didn't trigger that). I've just dropped it entirely, though note that some other tests relied on the tags we created; I've moved that creation to the tests where it matters. - we no longer need to take a ref_store parameter, since we'd never look up a ref now. We do still rely on a global "current iterator" variable which _could_ be kept per-ref-store. But in practice this is only useful if there are multiple recursive iterations, at which point the more appropriate solution is probably a stack of iterators. No caller used the actual ref-store parameter anyway (they all call the wrapper that passes the_repository). - the original only kicked in the optimization when the "refname" pointer matched (i.e., not string comparison). We do likewise with the "oid" parameter here, but fall back to doing an actual oideq() call. This in theory lets us kick in the optimization more often, though in practice no current caller cares. It should never be wrong, though (peeling is a property of an object, so two refs pointing to the same object would peel identically). - the original took care not to touch the peeled out-parameter unless we found something to put in it. But no caller cares about this, and anyway, it is enforced by peel_object() itself (and even in the optimized iterator case, that's where we eventually end up). We can shorten the code and avoid an extra copy by just passing the out-parameter through the stack. Signed-off-by: Jeff King <peff@peff.net> Reviewed-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-20 20:44:43 +01:00
* Return the peeled value of the oid currently being iterated via
* for_each_ref(), etc. This is equivalent to calling:
*
* peel_object(oid, &peeled);
*
* with the "oid" value given to the each_ref_fn callback, except
* that some ref storage may be able to answer the query without
* actually loading the object in memory.
*/
refs: switch peel_ref() to peel_iterated_oid() The peel_ref() interface is confusing and error-prone: - it's typically used by ref iteration callbacks that have both a refname and oid. But since they pass only the refname, we may load the ref value from the filesystem again. This is inefficient, but also means we are open to a race if somebody simultaneously updates the ref. E.g., this: int some_ref_cb(const char *refname, const struct object_id *oid, ...) { if (!peel_ref(refname, &peeled)) printf("%s peels to %s", oid_to_hex(oid), oid_to_hex(&peeled); } could print nonsense. It is correct to say "refname peels to..." (you may see the "before" value or the "after" value, either of which is consistent), but mentioning both oids may be mixing before/after values. Worse, whether this is possible depends on whether the optimization to read from the current iterator value kicks in. So it is actually not possible with: for_each_ref(some_ref_cb); but it _is_ possible with: head_ref(some_ref_cb); which does not use the iterator mechanism (though in practice, HEAD should never peel to anything, so this may not be triggerable). - it must take a fully-qualified refname for the read_ref_full() code path to work. Yet we routinely pass it partial refnames from callbacks to for_each_tag_ref(), etc. This happens to work when iterating because there we do not call read_ref_full() at all, and only use the passed refname to check if it is the same as the iterator. But the requirements for the function parameters are quite unclear. Instead of taking a refname, let's instead take an oid. That fixes both problems. It's a little funny for a "ref" function not to involve refs at all. The key thing is that it's optimizing under the hood based on having access to the ref iterator. So let's change the name to make it clear why you'd want this function versus just peel_object(). There are two other directions I considered but rejected: - we could pass the peel information into the each_ref_fn callback. However, we don't know if the caller actually wants it or not. For packed-refs, providing it is essentially free. But for loose refs, we actually have to peel the object, which would be wasteful in most cases. We could likewise pass in a flag to the callback indicating whether the peeled information is known, but that complicates those callbacks, as they then have to decide whether to manually peel themselves. Plus it requires changing the interface of every callback, whether they care about peeling or not, and there are many of them. - we could make a function to return the peeled value of the current iterated ref (computing it if necessary), and BUG() otherwise. I.e.: int peel_current_iterated_ref(struct object_id *out); Each of the current callers is an each_ref_fn callback, so they'd mostly be happy. But: - we use those callbacks with functions like head_ref(), which do not use the iteration code. So we'd need to handle the fallback case there, anyway. - it's possible that a caller would want to call into generic code that sometimes is used during iteration and sometimes not. This encapsulates the logic to do the fast thing when possible, and fallback when necessary. The implementation is mostly obvious, but I want to call out a few things in the patch: - the test-tool coverage for peel_ref() is now meaningless, as it all collapses to a single peel_object() call (arguably they were pretty uninteresting before; the tricky part of that function is the fast-path we see during iteration, but these calls didn't trigger that). I've just dropped it entirely, though note that some other tests relied on the tags we created; I've moved that creation to the tests where it matters. - we no longer need to take a ref_store parameter, since we'd never look up a ref now. We do still rely on a global "current iterator" variable which _could_ be kept per-ref-store. But in practice this is only useful if there are multiple recursive iterations, at which point the more appropriate solution is probably a stack of iterators. No caller used the actual ref-store parameter anyway (they all call the wrapper that passes the_repository). - the original only kicked in the optimization when the "refname" pointer matched (i.e., not string comparison). We do likewise with the "oid" parameter here, but fall back to doing an actual oideq() call. This in theory lets us kick in the optimization more often, though in practice no current caller cares. It should never be wrong, though (peeling is a property of an object, so two refs pointing to the same object would peel identically). - the original took care not to touch the peeled out-parameter unless we found something to put in it. But no caller cares about this, and anyway, it is enforced by peel_object() itself (and even in the optimized iterator case, that's where we eventually end up). We can shorten the code and avoid an extra copy by just passing the out-parameter through the stack. Signed-off-by: Jeff King <peff@peff.net> Reviewed-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-20 20:44:43 +01:00
int peel_iterated_oid(const struct object_id *base, struct object_id *peeled);
/**
* Resolve refname in the nested "gitlink" repository in the specified
* submodule (which must be non-NULL). If the resolution is
* successful, return 0 and set oid to the name of the object;
* otherwise, return a non-zero value.
*/
int resolve_gitlink_ref(const char *submodule, const char *refname,
struct object_id *oid);
/*
* Return true iff abbrev_name is a possible abbreviation for
* full_name according to the rules defined by ref_rev_parse_rules in
* refs.c.
*/
int refname_match(const char *abbrev_name, const char *full_name);
/*
* Given a 'prefix' expand it by the rules in 'ref_rev_parse_rules' and add
* the results to 'prefixes'
*/
argv-array: rename to strvec The name "argv-array" isn't very good, because it describes what the data type can be used for (program argument arrays), not what it actually is (a dynamically-growing string array that maintains a NULL-terminator invariant). This leads to people being hesitant to use it for other cases where it would actually be a good fit. The existing name is also clunky to use. It's overly long, and the name often leads to saying things like "argv.argv" (i.e., the field names overlap with variable names, since they're describing the use, not the type). Let's give it a more neutral name. I settled on "strvec" because "vector" is the name for a dynamic array type in many programming languages. "strarray" would work, too, but it's longer and a bit more awkward to say (and don't we all say these things in our mind as we type them?). A more extreme direction would be a generic data structure which stores a NULL-terminated of _any_ type. That would be easy to do with void pointers, but we'd lose some type safety for the existing cases. Plus it raises questions about memory allocation and ownership. So I limited myself here to changing names only, and not semantics. If we do find a use for that more generic data type, we could perhaps implement it at a lower level and then provide type-safe wrappers around it for strings. But that can come later. This patch does the minimum to convert the struct and function names in the header and implementation, leaving a few things for follow-on patches: - files retain their original names for now - struct field names are retained for now - there's a preprocessor compat layer that lets most users remain the same for now. The exception is headers which made a manual forward declaration of the struct. I've converted them (and their dependent function declarations) here. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-07-28 22:23:25 +02:00
struct strvec;
void expand_ref_prefix(struct strvec *prefixes, const char *prefix);
int expand_ref(struct repository *r, const char *str, int len, struct object_id *oid, char **ref);
int repo_dwim_ref(struct repository *r, const char *str, int len,
struct object_id *oid, char **ref, int nonfatal_dangling_mark);
int repo_dwim_log(struct repository *r, const char *str, int len, struct object_id *oid, char **ref);
int dwim_log(const char *str, int len, struct object_id *oid, char **ref);
/*
* Retrieves the default branch name for newly-initialized repositories.
*
* The return value of `repo_default_branch_name()` is an allocated string. The
* return value of `git_default_branch_name()` is a singleton.
*/
const char *git_default_branch_name(int quiet);
char *repo_default_branch_name(struct repository *r, int quiet);
/*
* A ref_transaction represents a collection of reference updates that
* should succeed or fail together.
*
* Calling sequence
* ----------------
*
* - Allocate and initialize a `struct ref_transaction` by calling
* `ref_transaction_begin()`.
*
* - Specify the intended ref updates by calling one or more of the
* following functions:
* - `ref_transaction_update()`
* - `ref_transaction_create()`
* - `ref_transaction_delete()`
* - `ref_transaction_verify()`
*
* - Then either:
*
* - Optionally call `ref_transaction_prepare()` to prepare the
* transaction. This locks all references, checks preconditions,
* etc. but doesn't finalize anything. If this step fails, the
* transaction has been closed and can only be freed. If this step
* succeeds, then `ref_transaction_commit()` is almost certain to
* succeed. However, you can still call `ref_transaction_abort()`
* if you decide not to commit the transaction after all.
*
* - Call `ref_transaction_commit()` to execute the transaction,
* make the changes permanent, and release all locks. If you
* haven't already called `ref_transaction_prepare()`, then
* `ref_transaction_commit()` calls it for you.
*
* Or
*
* - Call `initial_ref_transaction_commit()` if the ref database is
* known to be empty and have no other writers (e.g. during
* clone). This is likely to be much faster than
* `ref_transaction_commit()`. `ref_transaction_prepare()` should
* *not* be called before `initial_ref_transaction_commit()`.
*
* - Then finally, call `ref_transaction_free()` to free the
* `ref_transaction` data structure.
*
* At any time before calling `ref_transaction_commit()`, you can call
* `ref_transaction_abort()` to abort the transaction, rollback any
* locks, and free any associated resources (including the
* `ref_transaction` data structure).
*
* Putting it all together, a complete reference update looks like
*
* struct ref_transaction *transaction;
* struct strbuf err = STRBUF_INIT;
* int ret = 0;
*
* transaction = ref_store_transaction_begin(refs, &err);
* if (!transaction ||
* ref_transaction_update(...) ||
* ref_transaction_create(...) ||
* ...etc... ||
* ref_transaction_commit(transaction, &err)) {
* error("%s", err.buf);
* ret = -1;
* }
* ref_transaction_free(transaction);
* strbuf_release(&err);
* return ret;
*
* Error handling
* --------------
*
* On error, transaction functions append a message about what
* went wrong to the 'err' argument. The message mentions what
* ref was being updated (if any) when the error occurred so it
* can be passed to 'die' or 'error' as-is.
*
* The message is appended to err without first clearing err.
* err will not be '\n' terminated.
*
* Caveats
* -------
*
* Note that no locks are taken, and no refs are read, until
* `ref_transaction_prepare()` or `ref_transaction_commit()` is
* called. So, for example, `ref_transaction_verify()` won't report a
* verification failure until the commit is attempted.
*/
struct ref_transaction;
/*
refs: introduce an iterator interface Currently, the API for iterating over references is via a family of for_each_ref()-type functions that invoke a callback function for each selected reference. All of these eventually call do_for_each_ref(), which knows how to do one thing: iterate in parallel through two ref_caches, one for loose and one for packed refs, giving loose references precedence over packed refs. This is rather complicated code, and is quite specialized to the files backend. It also requires callers to encapsulate their work into a callback function, which often means that they have to define and use a "cb_data" struct to manage their context. The current design is already bursting at the seams, and will become even more awkward in the upcoming world of multiple reference storage backends: * Per-worktree vs. shared references are currently handled via a kludge in git_path() rather than iterating over each part of the reference namespace separately and merging the results. This kludge will cease to work when we have multiple reference storage backends. * The current scheme is inflexible. What if we sometimes want to bypass the ref_cache, or use it only for packed or only for loose refs? What if we want to store symbolic refs in one type of storage backend and non-symbolic ones in another? In the future, each reference backend will need to define its own way of iterating over references. The crux of the problem with the current design is that it is impossible to compose for_each_ref()-style iterations, because the flow of control is owned by the for_each_ref() function. There is nothing that a caller can do but iterate through all references in a single burst, so there is no way for it to interleave references from multiple backends and present the result to the rest of the world as a single compound backend. This commit introduces a new iteration primitive for references: a ref_iterator. A ref_iterator is a polymorphic object that a reference storage backend can be asked to instantiate. There are three functions that can be applied to a ref_iterator: * ref_iterator_advance(): move to the next reference in the iteration * ref_iterator_abort(): end the iteration before it is exhausted * ref_iterator_peel(): peel the reference currently being looked at Iterating using a ref_iterator leaves the flow of control in the hands of the caller, which means that ref_iterators from multiple sources (e.g., loose and packed refs) can be composed and presented to the world as a single compound ref_iterator. It also means that the backend code for implementing reference iteration will sometimes be more complicated. For example, the cache_ref_iterator (which iterates over a ref_cache) can't use the C stack to recurse; instead, it must manage its own stack internally as explicit data structures. There is also a lot of boilerplate connected with object-oriented programming in C. Eventually, end-user callers will be able to be written in a more natural way—managing their own flow of control rather than having to work via callbacks. Since there will only be a few reference backends but there are many consumers of this API, this is a good tradeoff. More importantly, we gain composability, and especially the possibility of writing interchangeable parts that can work with any ref_iterator. For example, merge_ref_iterator implements a generic way of merging the contents of any two ref_iterators. It is used to merge loose + packed refs as part of the implementation of the files_ref_iterator. But it will also be possible to use it to merge other pairs of reference sources (e.g., per-worktree vs. shared refs). Another example is prefix_ref_iterator, which can be used to trim a prefix off the front of reference names before presenting them to the caller (e.g., "refs/heads/master" -> "master"). In this patch, we introduce the iterator abstraction and many utilities, and implement a reference iterator for the files ref storage backend. (I've written several other obvious utilities, for example a generic way to filter references being iterated over. These will probably be useful in the future. But they are not needed for this patch series, so I am not including them at this time.) In a moment we will rewrite do_for_each_ref() to work via reference iterators (allowing some special-purpose code to be discarded), and do something similar for reflogs. In future patch series, we will expose the ref_iterator abstraction in the public refs API so that callers can use it directly. Implementation note: I tried abstracting this a layer further to allow generic iterators (over arbitrary types of objects) and generic utilities like a generic merge_iterator. But the implementation in C was very cumbersome, involving (in my opinion) too much boilerplate and too much unsafe casting, some of which would have had to be done on the caller side. However, I did put a few iterator-related constants in a top-level header file, iterator.h, as they will be useful in a moment to implement iteration over directory trees and possibly other types of iterators in the future. Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com> Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-18 06:15:15 +02:00
* Bit values set in the flags argument passed to each_ref_fn() and
* stored in ref_iterator::flags. Other bits are for internal use
* only:
*/
/* Reference is a symbolic reference. */
#define REF_ISSYMREF 0x01
/* Reference is a packed reference. */
#define REF_ISPACKED 0x02
/*
* Reference cannot be resolved to an object name: dangling symbolic
refs.c: allow listing and deleting badly named refs We currently do not handle badly named refs well: $ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\. $ git branch fatal: Reference has invalid format: 'refs/heads/master.....@*@\.' $ git branch -D master.....@\*@\\. error: branch 'master.....@*@\.' not found. Users cannot recover from a badly named ref without manually finding and deleting the loose ref file or appropriate line in packed-refs. Making that easier will make it easier to tweak the ref naming rules in the future, for example to forbid shell metacharacters like '`' and '"', without putting people in a state that is hard to get out of. So allow "branch --list" to show these refs and allow "branch -d/-D" and "update-ref -d" to delete them. Other commands (for example to rename refs) will continue to not handle these refs but can be changed in later patches. Details: In resolving functions, refuse to resolve refs that don't pass the git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to resolve refs that escape the refs/ directory and do not match the pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD"). In locking functions, refuse to act on badly named refs unless they are being deleted and either are in the refs/ directory or match [A-Z_]*. Just like other invalid refs, flag resolved, badly named refs with the REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them in all iteration functions except for for_each_rawref. Flag badly named refs (but not symrefs pointing to badly named refs) with a REF_BAD_NAME flag to make it easier for future callers to notice and handle them specially. For example, in a later patch for-each-ref will use this flag to detect refs whose names can confuse callers parsing for-each-ref output. In the transaction API, refuse to create or update badly named refs, but allow deleting them (unless they try to escape refs/ and don't match [A-Z_]*). Signed-off-by: Ronnie Sahlberg <sahlberg@google.com> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
* reference (directly or indirectly), corrupt reference file,
* reference exists but name is bad, or symbolic reference refers to
* ill-formatted reference name.
*/
#define REF_ISBROKEN 0x04
refs.c: allow listing and deleting badly named refs We currently do not handle badly named refs well: $ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\. $ git branch fatal: Reference has invalid format: 'refs/heads/master.....@*@\.' $ git branch -D master.....@\*@\\. error: branch 'master.....@*@\.' not found. Users cannot recover from a badly named ref without manually finding and deleting the loose ref file or appropriate line in packed-refs. Making that easier will make it easier to tweak the ref naming rules in the future, for example to forbid shell metacharacters like '`' and '"', without putting people in a state that is hard to get out of. So allow "branch --list" to show these refs and allow "branch -d/-D" and "update-ref -d" to delete them. Other commands (for example to rename refs) will continue to not handle these refs but can be changed in later patches. Details: In resolving functions, refuse to resolve refs that don't pass the git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to resolve refs that escape the refs/ directory and do not match the pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD"). In locking functions, refuse to act on badly named refs unless they are being deleted and either are in the refs/ directory or match [A-Z_]*. Just like other invalid refs, flag resolved, badly named refs with the REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them in all iteration functions except for for_each_rawref. Flag badly named refs (but not symrefs pointing to badly named refs) with a REF_BAD_NAME flag to make it easier for future callers to notice and handle them specially. For example, in a later patch for-each-ref will use this flag to detect refs whose names can confuse callers parsing for-each-ref output. In the transaction API, refuse to create or update badly named refs, but allow deleting them (unless they try to escape refs/ and don't match [A-Z_]*). Signed-off-by: Ronnie Sahlberg <sahlberg@google.com> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
/*
* Reference name is not well formed.
*
* See git-check-ref-format(1) for the definition of well formed ref names.
*/
#define REF_BAD_NAME 0x08
/*
* The signature for the callback function for the for_each_*()
* functions below. The memory pointed to by the refname and oid
* arguments is only guaranteed to be valid for the duration of a
* single callback invocation.
*/
typedef int each_ref_fn(const char *refname,
const struct object_id *oid, int flags, void *cb_data);
/*
* The same as each_ref_fn, but also with a repository argument that
* contains the repository associated with the callback.
*/
typedef int each_repo_ref_fn(struct repository *r,
const char *refname,
const struct object_id *oid,
int flags,
void *cb_data);
/*
* The following functions invoke the specified callback function for
* each reference indicated. If the function ever returns a nonzero
* value, stop the iteration and return that value. Please note that
* it is not safe to modify references while an iteration is in
* progress, unless the same callback function invocation that
* modifies the reference also returns a nonzero value to immediately
* stop the iteration. Returned references are sorted.
*/
int refs_head_ref(struct ref_store *refs,
each_ref_fn fn, void *cb_data);
int refs_for_each_ref(struct ref_store *refs,
each_ref_fn fn, void *cb_data);
int refs_for_each_ref_in(struct ref_store *refs, const char *prefix,
each_ref_fn fn, void *cb_data);
int refs_for_each_tag_ref(struct ref_store *refs,
each_ref_fn fn, void *cb_data);
int refs_for_each_branch_ref(struct ref_store *refs,
each_ref_fn fn, void *cb_data);
int refs_for_each_remote_ref(struct ref_store *refs,
each_ref_fn fn, void *cb_data);
/* just iterates the head ref. */
int head_ref(each_ref_fn fn, void *cb_data);
/* iterates all refs. */
int for_each_ref(each_ref_fn fn, void *cb_data);
/**
* iterates all refs which have a defined prefix and strips that prefix from
* the passed variable refname.
*/
int for_each_ref_in(const char *prefix, each_ref_fn fn, void *cb_data);
refs/packed-backend.c: implement jump lists to avoid excluded pattern(s) When iterating through the `packed-refs` file in order to answer a query like: $ git for-each-ref --exclude=refs/__hidden__ it would be useful to avoid walking over all of the entries in `refs/__hidden__/*` when possible, since we know that the ref-filter code is going to throw them away anyways. In certain circumstances, doing so is possible. The algorithm for doing so is as follows: - For each excluded pattern, find the first record that matches it, and the first record that *doesn't* match it (i.e. the location you'd next want to consider when excluding that pattern). - Sort the set of excluded regions from the previous step in ascending order of the first location within the `packed-refs` file that matches. - Clean up the results from the previous step: discard empty regions, and combine adjacent regions. The set of regions which remains is referred to as the "jump list", and never contains any references which should be included in the result set. Then when iterating through the `packed-refs` file, if `iter->pos` is ever contained in one of the regions from the previous steps, advance `iter->pos` past the end of that region, and continue enumeration. Note that we only perform this optimization when none of the excluded pattern(s) have special meta-characters in them. For a pattern like "refs/foo[ac]", the excluded regions ("refs/fooa", "refs/fooc", and everything underneath them) are not connected. A future implementation that handles this case may split the character class (pretending as if two patterns were excluded: "refs/fooa", and "refs/fooc"). There are a few other gotchas worth considering. First, note that the jump list is sorted, so once we jump past a region, we can avoid considering it (or any regions preceding it) again. The member `jump_pos` is used to track the first next-possible region to jump through. Second, note that the jump list is best-effort, since we do not handle loose references, and because of the meta-character issue above. The jump list may not skip past all references which won't appear in the results, but will never skip over a reference which does appear in the result set. In repositories with a large number of hidden references, the speed-up can be significant. Tests here are done with a copy of linux.git with a reference "refs/pull/N" pointing at every commit, as in: $ git rev-list HEAD | awk '{ print "create refs/pull/" NR " " $0 }' | git update-ref --stdin $ git pack-refs --all , it is significantly faster to have `for-each-ref` jump over the excluded references, as opposed to filtering them out after the fact: $ hyperfine \ 'git for-each-ref --format="%(objectname) %(refname)" | grep -vE "^[0-9a-f]{40} refs/pull/"' \ 'git.prev for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"' \ 'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"' Benchmark 1: git for-each-ref --format="%(objectname) %(refname)" | grep -vE "^[0-9a-f]{40} refs/pull/" Time (mean ± σ): 798.1 ms ± 3.3 ms [User: 687.6 ms, System: 146.4 ms] Range (min … max): 794.5 ms … 805.5 ms 10 runs Benchmark 2: git.prev for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull" Time (mean ± σ): 98.9 ms ± 1.4 ms [User: 93.1 ms, System: 5.7 ms] Range (min … max): 97.0 ms … 104.0 ms 29 runs Benchmark 3: git.compile for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull" Time (mean ± σ): 4.5 ms ± 0.2 ms [User: 0.7 ms, System: 3.8 ms] Range (min … max): 4.1 ms … 5.8 ms 524 runs Summary 'git.compile for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"' ran 21.87 ± 1.05 times faster than 'git.prev for-each-ref --format="%(objectname) %(refname)" --exclude="refs/pull"' 176.52 ± 8.19 times faster than 'git for-each-ref --format="%(objectname) %(refname)" | grep -vE "^[0-9a-f]{40} refs/pull/"' (Comparing stock git and this patch isn't quite fair, since an earlier commit in this series adds a naive implementation of the `--exclude` option. `git.prev` is built from the previous commit and includes this naive implementation). Using the jump list is fairly straightforward (see the changes to `refs/packed-backend.c::next_record()`), but constructing the list is not. To ensure that the construction is correct, add a new suite of tests in t1419 covering various corner cases (overlapping regions, partially overlapping regions, adjacent regions, etc.). Co-authored-by: Jeff King <peff@peff.net> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-07-10 23:12:28 +02:00
/*
* references matching any pattern in "exclude_patterns" are omitted from the
* result set on a best-effort basis.
*/
int refs_for_each_fullref_in(struct ref_store *refs, const char *prefix,
const char **exclude_patterns,
each_ref_fn fn, void *cb_data);
int for_each_fullref_in(const char *prefix, each_ref_fn fn, void *cb_data);
/**
* iterate all refs in "patterns" by partitioning patterns into disjoint sets
* and iterating the longest-common prefix of each set.
*
* references matching any pattern in "exclude_patterns" are omitted from the
* result set on a best-effort basis.
*
* callers should be prepared to ignore references that they did not ask for.
*/
int refs_for_each_fullref_in_prefixes(struct ref_store *refs,
const char *namespace,
const char **patterns,
const char **exclude_patterns,
each_ref_fn fn, void *cb_data);
/**
* iterate refs from the respective area.
*/
int for_each_tag_ref(each_ref_fn fn, void *cb_data);
int for_each_branch_ref(each_ref_fn fn, void *cb_data);
int for_each_remote_ref(each_ref_fn fn, void *cb_data);
int for_each_replace_ref(struct repository *r, each_repo_ref_fn fn, void *cb_data);
/* iterates all refs that match the specified glob pattern. */
int for_each_glob_ref(each_ref_fn fn, const char *pattern, void *cb_data);
int for_each_glob_ref_in(each_ref_fn fn, const char *pattern,
const char *prefix, void *cb_data);
int head_ref_namespaced(each_ref_fn fn, void *cb_data);
/*
* references matching any pattern in "exclude_patterns" are omitted from the
* result set on a best-effort basis.
*/
int for_each_namespaced_ref(const char **exclude_patterns,
each_ref_fn fn, void *cb_data);
ref namespaces: infrastructure Add support for dividing the refs of a single repository into multiple namespaces, each of which can have its own branches, tags, and HEAD. Git can expose each namespace as an independent repository to pull from and push to, while sharing the object store, and exposing all the refs to operations such as git-gc. Storing multiple repositories as namespaces of a single repository avoids storing duplicate copies of the same objects, such as when storing multiple branches of the same source. The alternates mechanism provides similar support for avoiding duplicates, but alternates do not prevent duplication between new objects added to the repositories without ongoing maintenance, while namespaces do. To specify a namespace, set the GIT_NAMESPACE environment variable to the namespace. For each ref namespace, git stores the corresponding refs in a directory under refs/namespaces/. For example, GIT_NAMESPACE=foo will store refs under refs/namespaces/foo/. You can also specify namespaces via the --namespace option to git. Note that namespaces which include a / will expand to a hierarchy of namespaces; for example, GIT_NAMESPACE=foo/bar will store refs under refs/namespaces/foo/refs/namespaces/bar/. This makes paths in GIT_NAMESPACE behave hierarchically, so that cloning with GIT_NAMESPACE=foo/bar produces the same result as cloning with GIT_NAMESPACE=foo and cloning from that repo with GIT_NAMESPACE=bar. It also avoids ambiguity with strange namespace paths such as foo/refs/heads/, which could otherwise generate directory/file conflicts within the refs directory. Add the infrastructure for ref namespaces: handle the GIT_NAMESPACE environment variable and --namespace option, and support iterating over refs in a namespace. Signed-off-by: Josh Triplett <josh@joshtriplett.org> Signed-off-by: Jamey Sharp <jamey@minilop.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-07-05 19:54:44 +02:00
/* can be used to learn about broken ref and symref */
int refs_for_each_rawref(struct ref_store *refs, each_ref_fn fn, void *cb_data);
int for_each_rawref(each_ref_fn fn, void *cb_data);
log: add option to choose which refs to decorate When `log --decorate` is used, git will decorate commits with all available refs. While in most cases this may give the desired effect, under some conditions it can lead to excessively verbose output. Introduce two command line options, `--decorate-refs=<pattern>` and `--decorate-refs-exclude=<pattern>` to allow the user to select which refs are used in decoration. When "--decorate-refs=<pattern>" is given, only the refs that match the pattern are used in decoration. The refs that match the pattern when "--decorate-refs-exclude=<pattern>" is given, are never used in decoration. These options follow the same convention for mixing negative and positive patterns across the system, assuming that the inclusive default is to match all refs available. (1) if there is no positive pattern given, pretend as if an inclusive default positive pattern was given; (2) for each candidate, reject it if it matches no positive pattern, or if it matches any one of the negative patterns. The rules for what is considered a match are slightly different from the rules used elsewhere. Commands like `log --glob` assume a trailing '/*' when glob chars are not present in the pattern. This makes it difficult to specify a single ref. On the other hand, commands like `describe --match --all` allow specifying exact refs, but do not have the convenience of allowing "shorthand refs" like 'refs/heads' or 'heads' to refer to 'refs/heads/*'. The commands introduced in this patch consider a match if: (a) the pattern contains globs chars, and regular pattern matching returns a match. (b) the pattern does not contain glob chars, and ref '<pattern>' exists, or if ref exists under '<pattern>/' This allows both behaviours (allowing single refs and shorthand refs) yet remaining compatible with existent commands. Helped-by: Kevin Daudt <me@ikke.info> Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Rafael Ascensão <rafa.almas@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-11-21 22:33:41 +01:00
/*
* Normalizes partial refs to their fully qualified form.
* Will prepend <prefix> to the <pattern> if it doesn't start with 'refs/'.
* <prefix> will default to 'refs/' if NULL.
*
* item.string will be set to the result.
* item.util will be set to NULL if <pattern> contains glob characters, or
* non-NULL if it doesn't.
*/
void normalize_glob_ref(struct string_list_item *item, const char *prefix,
const char *pattern);
static inline const char *has_glob_specials(const char *pattern)
{
return strpbrk(pattern, "?*[");
}
void warn_dangling_symref(FILE *fp, const char *msg_fmt, const char *refname);
void warn_dangling_symrefs(FILE *fp, const char *msg_fmt,
const struct string_list *refnames);
/*
* Flags for controlling behaviour of pack_refs()
* PACK_REFS_PRUNE: Prune loose refs after packing
* PACK_REFS_ALL: Pack _all_ refs, not just tags and already packed refs
*/
#define PACK_REFS_PRUNE 0x0001
#define PACK_REFS_ALL 0x0002
/*
* Write a packed-refs file for the current repository.
* flags: Combination of the above PACK_REFS_* flags.
*/
int refs_pack_refs(struct ref_store *refs, struct pack_refs_opts *opts);
/*
* Setup reflog before using. Fill in err and return -1 on failure.
*/
int refs_create_reflog(struct ref_store *refs, const char *refname,
struct strbuf *err);
int safe_create_reflog(const char *refname, struct strbuf *err);
/** Reads log for the value of ref during at_time. **/
int read_ref_at(struct ref_store *refs,
const char *refname, unsigned int flags,
timestamp_t at_time, int cnt,
struct object_id *oid, char **msg,
timestamp_t *cutoff_time, int *cutoff_tz, int *cutoff_cnt);
/** Check if a particular reflog exists */
int refs_reflog_exists(struct ref_store *refs, const char *refname);
int reflog_exists(const char *refname);
/*
* Delete the specified reference. If old_oid is non-NULL, then
* verify that the current value of the reference is old_oid before
* deleting it. If old_oid is NULL, delete the reference if it
* exists, regardless of its old value. It is an error for old_oid to
* be null_oid. msg and flags are passed through to
* ref_transaction_delete().
*/
int refs_delete_ref(struct ref_store *refs, const char *msg,
const char *refname,
const struct object_id *old_oid,
unsigned int flags);
int delete_ref(const char *msg, const char *refname,
const struct object_id *old_oid, unsigned int flags);
/*
* Delete the specified references. If there are any problems, emit
* errors but attempt to keep going (i.e., the deletes are not done in
* an all-or-nothing transaction). msg and flags are passed through to
* ref_transaction_delete().
*/
int refs_delete_refs(struct ref_store *refs, const char *msg,
struct string_list *refnames, unsigned int flags);
int delete_refs(const char *msg, struct string_list *refnames,
unsigned int flags);
/** Delete a reflog */
int refs_delete_reflog(struct ref_store *refs, const char *refname);
int delete_reflog(const char *refname);
/*
* Callback to process a reflog entry found by the iteration functions (see
* below).
*
* The committer parameter is a single string, in the form
* "$GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL>" (without double quotes).
*
* The timestamp parameter gives the time when entry was created as the number
* of seconds since the UNIX epoch.
*
* The tz parameter gives the timezone offset for the user who created
* the reflog entry, and its value gives a positive or negative offset
* from UTC. Its absolute value is formed by multiplying the hour
* part by 100 and adding the minute part. For example, 1 hour ahead
* of UTC, CET == "+0100", is represented as positive one hundred (not
* postiive sixty).
*
* The msg parameter is a single complete line; a reflog message given
* to refs_delete_ref, refs_update_ref, etc. is returned to the
* callback normalized---each run of whitespaces are squashed into a
* single whitespace, trailing whitespace, if exists, is trimmed, and
* then a single LF is added at the end.
*
* The cb_data is a caller-supplied pointer given to the iterator
* functions.
*/
typedef int each_reflog_ent_fn(
struct object_id *old_oid, struct object_id *new_oid,
const char *committer, timestamp_t timestamp,
int tz, const char *msg, void *cb_data);
/* Iterate over reflog entries in the log for `refname`. */
/* oldest entry first */
int refs_for_each_reflog_ent(struct ref_store *refs, const char *refname,
each_reflog_ent_fn fn, void *cb_data);
/* youngest entry first */
int refs_for_each_reflog_ent_reverse(struct ref_store *refs,
const char *refname,
each_reflog_ent_fn fn,
void *cb_data);
/*
* Iterate over reflog entries in the log for `refname` in the main ref store.
*/
/* oldest entry first */
int for_each_reflog_ent(const char *refname, each_reflog_ent_fn fn, void *cb_data);
/* youngest entry first */
int for_each_reflog_ent_reverse(const char *refname, each_reflog_ent_fn fn, void *cb_data);
/*
* Calls the specified function for each reflog file until it returns nonzero,
* and returns the value. Reflog file order is unspecified.
*/
int refs_for_each_reflog(struct ref_store *refs, each_ref_fn fn, void *cb_data);
int for_each_reflog(each_ref_fn fn, void *cb_data);
#define REFNAME_ALLOW_ONELEVEL 1
#define REFNAME_REFSPEC_PATTERN 2
/*
* Return</