Git Source Code Mirror - This is a publish-only repository and all pull requests are ignored. Please follow Documentation/SubmittingPatches procedure for any of your improvements. https://git-scm.com/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
git/apply.c

5077 lines
132 KiB

/*
* apply.c
*
* Copyright (C) Linus Torvalds, 2005
*
* This applies patches on top of some (arbitrary) version of the SCM.
*
*/
#include "cache.h"
#include "config.h"
#include "object-store.h"
#include "blob.h"
#include "delta.h"
#include "diff.h"
#include "dir.h"
#include "xdiff-interface.h"
#include "ll-merge.h"
#include "lockfile.h"
#include "parse-options.h"
#include "quote.h"
#include "rerere.h"
#include "apply.h"
#include "entry.h"
struct gitdiff_data {
struct strbuf *root;
int linenr;
int p_value;
};
static void git_apply_config(void)
{
config: drop git_config_get_string_const() As evidenced by the leak fixes in the previous commit, the "const" in git_config_get_string_const() clearly misleads people into thinking that it does not allocate a copy of the string. We can fix this by renaming it, but it's easier still to just drop it. Of the four remaining callers: - The one in git_config_parse_expiry() still needs to allocate, since that's what its callers expect. We can just use the non-const version and cast our pointer. Slightly ugly, but the damage is contained in one spot. - The two in apply are writing to global "const char *" variables, and need to continue allocating. We often mark these as const because we assign default string literals to them. But in this case we don't do that, so we can just declare them as real "char *" pointers and use the non-const version. - The call in checkout doesn't actually need a copy; it can just use the non-allocating "tmp" version of the function. The function is also mentioned in the MyFirstContribution document. We can swap that call out for the non-allocating "tmp" variant, which fits well in the example given. We'll drop the "configset" and "repo" variants, as well (which are unused). Note that this frees up the "const" name, so we could rename the "tmp" variant back to that. But let's give some time for topics in flight to adapt to the new code before doing so (if we do it too soon, the function semantics will change but the compiler won't alert us). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2 years ago
git_config_get_string("apply.whitespace", &apply_default_whitespace);
git_config_get_string("apply.ignorewhitespace", &apply_default_ignorewhitespace);
git_config(git_xmerge_config, NULL);
}
static int parse_whitespace_option(struct apply_state *state, const char *option)
{
if (!option) {
state->ws_error_action = warn_on_ws_error;
return 0;
}
if (!strcmp(option, "warn")) {
state->ws_error_action = warn_on_ws_error;
return 0;
}
if (!strcmp(option, "nowarn")) {
state->ws_error_action = nowarn_ws_error;
return 0;
}
if (!strcmp(option, "error")) {
state->ws_error_action = die_on_ws_error;
return 0;
}
if (!strcmp(option, "error-all")) {
state->ws_error_action = die_on_ws_error;
state->squelch_whitespace_errors = 0;
return 0;
}
if (!strcmp(option, "strip") || !strcmp(option, "fix")) {
state->ws_error_action = correct_ws_error;
return 0;
}
/*
* Please update $__git_whitespacelist in git-completion.bash
* when you add new options.
*/
return error(_("unrecognized whitespace option '%s'"), option);
}
static int parse_ignorewhitespace_option(struct apply_state *state,
const char *option)
{
if (!option || !strcmp(option, "no") ||
!strcmp(option, "false") || !strcmp(option, "never") ||
!strcmp(option, "none")) {
state->ws_ignore_action = ignore_ws_none;
return 0;
}
if (!strcmp(option, "change")) {
state->ws_ignore_action = ignore_ws_change;
return 0;
}
return error(_("unrecognized whitespace ignore option '%s'"), option);
}
int init_apply_state(struct apply_state *state,
struct repository *repo,
apply: move lockfile into `apply_state` We have two users of `struct apply_state` and the related functionality in apply.c. Each user sets up its `apply_state` by handing over a pointer to its static `lock_file`. (Before 076aa2cbd (tempfile: auto-allocate tempfiles on heap, 2017-09-05), we could never free lockfiles, so making them static was a reasonable approach.) Other than that, they never directly access their `lock_file`s, which are instead handled by the functionality in apply.c. To make life easier for the caller and to make it less tempting for a future caller to mess with the lock, make apply.c fully responsible for setting up the `lock_file`. As mentioned above, it is now safe to free a `lock_file`, so we can make the `struct apply_state` contain an actual `struct lock_file` instead of a pointer to one. The user in builtin/apply.c is rather simple. For builtin/am.c, we might worry that the lock state is actually meant to be inherited across calls. But the lock is only taken as `apply_all_patches()` executes, and code inspection shows that it will always be released. Alternatively, we can observe that the lock itself is never queried directly. When we decide whether we should lock, we check a related variable `newfd`. That variable is not inherited, so from the point of view of apply.c, the state machine really is reset with each call to `init_apply_state()`. (It would be a bug if `newfd` and the lock status were not in sync. The duplication of information in `newfd` and the lock will be addressed in the next patch.) Signed-off-by: Martin Ågren <martin.agren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
5 years ago
const char *prefix)
{
memset(state, 0, sizeof(*state));
state->prefix = prefix;
state->repo = repo;
state->apply = 1;
state->line_termination = '\n';
state->p_value = 1;
state->p_context = UINT_MAX;
state->squelch_whitespace_errors = 5;
state->ws_error_action = warn_on_ws_error;
state->ws_ignore_action = ignore_ws_none;
state->linenr = 1;
string_list_init_nodup(&state->fn_table);
string_list_init_nodup(&state->limit_by_name);
strset_init(&state->removed_symlinks);
strset_init(&state->kept_symlinks);
strbuf_init(&state->root, 0);
git_apply_config();
if (apply_default_whitespace && parse_whitespace_option(state, apply_default_whitespace))
return -1;
if (apply_default_ignorewhitespace && parse_ignorewhitespace_option(state, apply_default_ignorewhitespace))
return -1;
return 0;
}
void clear_apply_state(struct apply_state *state)
{
string_list_clear(&state->limit_by_name, 0);
strset_clear(&state->removed_symlinks);
strset_clear(&state->kept_symlinks);
strbuf_release(&state->root);
/* &state->fn_table is cleared at the end of apply_patch() */
}
static void mute_routine(const char *msg, va_list params)
{
/* do nothing */
}
int check_apply_state(struct apply_state *state, int force_apply)
{
int is_not_gitdir = !startup_info->have_repository;
if (state->apply_with_reject && state->threeway)
return error(_("options '%s' and '%s' cannot be used together"), "--reject", "--3way");
if (state->threeway) {
if (is_not_gitdir)
return error(_("'%s' outside a repository"), "--3way");
state->check_index = 1;
}
if (state->apply_with_reject) {
state->apply = 1;
if (state->apply_verbosity == verbosity_normal)
state->apply_verbosity = verbosity_verbose;
}
if (!force_apply && (state->diffstat || state->numstat || state->summary || state->check || state->fake_ancestor))
state->apply = 0;
if (state->check_index && is_not_gitdir)
return error(_("'%s' outside a repository"), "--index");
if (state->cached) {
if (is_not_gitdir)
return error(_("'%s' outside a repository"), "--cached");
state->check_index = 1;
}
if (state->ita_only && (state->check_index || is_not_gitdir))
state->ita_only = 0;
if (state->check_index)
state->unsafe_paths = 0;
if (state->apply_verbosity <= verbosity_silent) {
state->saved_error_routine = get_error_routine();
state->saved_warn_routine = get_warn_routine();
set_error_routine(mute_routine);
set_warn_routine(mute_routine);
}
return 0;
}
static void set_default_whitespace_mode(struct apply_state *state)
{
if (!state->whitespace_option && !apply_default_whitespace)
state->ws_error_action = (state->apply ? warn_on_ws_error : nowarn_ws_error);
}
/*
* This represents one "hunk" from a patch, starting with
* "@@ -oldpos,oldlines +newpos,newlines @@" marker. The
* patch text is pointed at by patch, and its byte length
* is stored in size. leading and trailing are the number
* of context lines.
*/
struct fragment {
unsigned long leading, trailing;
unsigned long oldpos, oldlines;
unsigned long newpos, newlines;
/*
* 'patch' is usually borrowed from buf in apply_patch(),
* but some codepaths store an allocated buffer.
*/
const char *patch;
unsigned free_patch:1,
rejected:1;
int size;
int linenr;
struct fragment *next;
};
/*
* When dealing with a binary patch, we reuse "leading" field
* to store the type of the binary hunk, either deflated "delta"
* or deflated "literal".
*/
#define binary_patch_method leading
#define BINARY_DELTA_DEFLATED 1
#define BINARY_LITERAL_DEFLATED 2
static void free_fragment_list(struct fragment *list)
{
while (list) {
struct fragment *next = list->next;
if (list->free_patch)
free((char *)list->patch);
free(list);
list = next;
}
}
void release_patch(struct patch *patch)
{
free_fragment_list(patch->fragments);
free(patch->def_name);
free(patch->old_name);
free(patch->new_name);
free(patch->result);
}
static void free_patch(struct patch *patch)
{
release_patch(patch);
free(patch);
}
static void free_patch_list(struct patch *list)
{
while (list) {
struct patch *next = list->next;
free_patch(list);
list = next;
}
}
/*
* A line in a file, len-bytes long (includes the terminating LF,
* except for an incomplete line at the end if the file ends with
* one), and its contents hashes to 'hash'.
*/
struct line {
size_t len;
unsigned hash : 24;
unsigned flag : 8;
#define LINE_COMMON 1
#define LINE_PATCHED 2
};
/*
* This represents a "file", which is an array of "lines".
*/
struct image {
char *buf;
size_t len;
size_t nr;
size_t alloc;
struct line *line_allocated;
struct line *line;
};
static uint32_t hash_line(const char *cp, size_t len)
{
size_t i;
uint32_t h;
for (i = 0, h = 0; i < len; i++) {
if (!isspace(cp[i])) {
h = h * 3 + (cp[i] & 0xff);
}
}
return h;
}
/*
* Compare lines s1 of length n1 and s2 of length n2, ignoring
* whitespace difference. Returns 1 if they match, 0 otherwise
*/
static int fuzzy_matchlines(const char *s1, size_t n1,
const char *s2, size_t n2)
{
const char *end1 = s1 + n1;
const char *end2 = s2 + n2;
/* ignore line endings */
while (s1 < end1 && (end1[-1] == '\r' || end1[-1] == '\n'))
end1--;
while (s2 < end2 && (end2[-1] == '\r' || end2[-1] == '\n'))
end2--;
while (s1 < end1 && s2 < end2) {
if (isspace(*s1)) {
/*
* Skip whitespace. We check on both buffers
* because we don't want "a b" to match "ab".
*/
if (!isspace(*s2))
return 0;
while (s1 < end1 && isspace(*s1))
s1++;
while (s2 < end2 && isspace(*s2))
s2++;
} else if (*s1++ != *s2++)
return 0;
}
/* If we reached the end on one side only, lines don't match. */
return s1 == end1 && s2 == end2;
}
static void add_line_info(struct image *img, const char *bol, size_t len, unsigned flag)
{
ALLOC_GROW(img->line_allocated, img->nr + 1, img->alloc);
img->line_allocated[img->nr].len = len;
img->line_allocated[img->nr].hash = hash_line(bol, len);
img->line_allocated[img->nr].flag = flag;
img->nr++;
}
/*
* "buf" has the file contents to be patched (read from various sources).
* attach it to "image" and add line-based index to it.
* "image" now owns the "buf".
*/
static void prepare_image(struct image *image, char *buf, size_t len,
int prepare_linetable)
{
const char *cp, *ep;
memset(image, 0, sizeof(*image));
image->buf = buf;
image->len = len;
if (!prepare_linetable)
return;
ep = image->buf + image->len;
cp = image->buf;
while (cp < ep) {
const char *next;
for (next = cp; next < ep && *next != '\n'; next++)
;
if (next < ep)
next++;
add_line_info(image, cp, next - cp, 0);
cp = next;
}
image->line = image->line_allocated;
}
static void clear_image(struct image *image)
{
free(image->buf);
free(image->line_allocated);
memset(image, 0, sizeof(*image));
}
/* fmt must contain _one_ %s and no other substitution */
static void say_patch_name(FILE *output, const char *fmt, struct patch *patch)
{
struct strbuf sb = STRBUF_INIT;
if (patch->old_name && patch->new_name &&
strcmp(patch->old_name, patch->new_name)) {
quote_c_style(patch->old_name, &sb, NULL, 0);
strbuf_addstr(&sb, " => ");
quote_c_style(patch->new_name, &sb, NULL, 0);
} else {
const char *n = patch->new_name;
if (!n)
n = patch->old_name;
quote_c_style(n, &sb, NULL, 0);
}
fprintf(output, fmt, sb.buf);
fputc('\n', output);
strbuf_release(&sb);
}
#define SLOP (16)
static int read_patch_file(struct strbuf *sb, int fd)
{
if (strbuf_read(sb, fd, 0) < 0)
return error_errno("git apply: failed to read");
/*
* Make sure that we have some slop in the buffer
* so that we can do speculative "memcmp" etc, and
* see to it that it is NUL-filled.
*/
strbuf_grow(sb, SLOP);
memset(sb->buf + sb->len, 0, SLOP);
return 0;
}
static unsigned long linelen(const char *buffer, unsigned long size)
{
unsigned long len = 0;
while (size--) {
len++;
if (*buffer++ == '\n')
break;
}
return len;
}
static int is_dev_null(const char *str)
{
return skip_prefix(str, "/dev/null", &str) && isspace(*str);
}
#define TERM_SPACE 1
#define TERM_TAB 2
static int name_terminate(int c, int terminate)
{
if (c == ' ' && !(terminate & TERM_SPACE))
return 0;
if (c == '\t' && !(terminate & TERM_TAB))
return 0;
return 1;
}
/* remove double slashes to make --index work with such filenames */
static char *squash_slash(char *name)
{
int i = 0, j = 0;
if (!name)
return NULL;
while (name[i]) {
if ((name[j++] = name[i++]) == '/')
while (name[i] == '/')
i++;
}
name[j] = '\0';
return name;
}
static char *find_name_gnu(struct strbuf *root,
const char *line,
int p_value)
{
struct strbuf name = STRBUF_INIT;
char *cp;
/*
* Proposed "new-style" GNU patch/diff format; see
* https://lore.kernel.org/git/7vll0wvb2a.fsf@assigned-by-dhcp.cox.net/
*/
if (unquote_c_style(&name, line, NULL)) {
strbuf_release(&name);
return NULL;
}
for (cp = name.buf; p_value; p_value--) {
cp = strchr(cp, '/');
if (!cp) {
strbuf_release(&name);
return NULL;
}
cp++;
}
strbuf_remove(&name, 0, cp - name.buf);
if (root->len)
strbuf_insert(&name, 0, root->buf, root->len);
return squash_slash(strbuf_detach(&name, NULL));
}
static size_t sane_tz_len(const char *line, size_t len)
{
const char *tz, *p;
if (len < strlen(" +0500") || line[len-strlen(" +0500")] != ' ')
return 0;
tz = line + len - strlen(" +0500");
if (tz[1] != '+' && tz[1] != '-')
return 0;
for (p = tz + 2; p != line + len; p++)
if (!isdigit(*p))
return 0;
return line + len - tz;
}
static size_t tz_with_colon_len(const char *line, size_t len)
{
const char *tz, *p;
if (len < strlen(" +08:00") || line[len - strlen(":00")] != ':')
return 0;
tz = line + len - strlen(" +08:00");
if (tz[0] != ' ' || (tz[1] != '+' && tz[1] != '-'))
return 0;
p = tz + 2;
if (!isdigit(*p++) || !isdigit(*p++) || *p++ != ':' ||
!isdigit(*p++) || !isdigit(*p++))
return 0;
return line + len - tz;
}
static size_t date_len(const char *line, size_t len)
{
const char *date, *p;
if (len < strlen("72-02-05") || line[len-strlen("-05")] != '-')
return 0;
p = date = line + len - strlen("72-02-05");
if (!isdigit(*p++) || !isdigit(*p++) || *p++ != '-' ||
!isdigit(*p++) || !isdigit(*p++) || *p++ != '-' ||
!isdigit(*p++) || !isdigit(*p++)) /* Not a date. */
return 0;
if (date - line >= strlen("19") &&
isdigit(date[-1]) && isdigit(date[-2])) /* 4-digit year */
date -= strlen("19");
return line + len - date;
}
static size_t short_time_len(const char *line, size_t len)
{
const char *time, *p;
if (len < strlen(" 07:01:32") || line[len-strlen(":32")] != ':')
return 0;
p = time = line + len - strlen(" 07:01:32");
/* Permit 1-digit hours? */
if (*p++ != ' ' ||
!isdigit(*p++) || !isdigit(*p++) || *p++ != ':' ||
!isdigit(*p++) || !isdigit(*p++) || *p++ != ':' ||
!isdigit(*p++) || !isdigit(*p++)) /* Not a time. */
return 0;
return line + len - time;
}
static size_t fractional_time_len(const char *line, size_t len)
{
const char *p;
size_t n;
/* Expected format: 19:41:17.620000023 */
if (!len || !isdigit(line[len - 1]))
return 0;
p = line + len - 1;
/* Fractional seconds. */
while (p > line && isdigit(*p))
p--;
if (*p != '.')
return 0;
/* Hours, minutes, and whole seconds. */
n = short_time_len(line, p - line);
if (!n)
return 0;
return line + len - p + n;
}
static size_t trailing_spaces_len(const char *line, size_t len)
{
const char *p;
/* Expected format: ' ' x (1 or more) */
if (!len || line[len - 1] != ' ')
return 0;
p = line + len;
while (p != line) {
p--;
if (*p != ' ')
return line + len - (p + 1);
}
/* All spaces! */
return len;
}
static size_t diff_timestamp_len(const char *line, size_t len)
{
const char *end = line + len;
size_t n;
/*
* Posix: 2010-07-05 19:41:17
* GNU: 2010-07-05 19:41:17.620000023 -0500
*/
if (!isdigit(end[-1]))
return 0;
n = sane_tz_len(line, end - line);
if (!n)
n = tz_with_colon_len(line, end - line);
end -= n;
n = short_time_len(line, end - line);
if (!n)
n = fractional_time_len(line, end - line);
end -= n;
n = date_len(line, end - line);
if (!n) /* No date. Too bad. */
return 0;
end -= n;
if (end == line) /* No space before date. */
return 0;
if (end[-1] == '\t') { /* Success! */
end--;
return line + len - end;
}
if (end[-1] != ' ') /* No space before date. */
return 0;
/* Whitespace damage. */
end -= trailing_spaces_len(line, end - line);
return line + len - end;
}
static char *find_name_common(struct strbuf *root,
const char *line,
const char *def,
int p_value,
const char *end,
int terminate)
{
int len;
const char *start = NULL;
if (p_value == 0)
start = line;
while (line != end) {
char c = *line;
if (!end && isspace(c)) {
if (c == '\n')
break;
if (name_terminate(c, terminate))
break;
}
line++;
if (c == '/' && !--p_value)
start = line;
}
if (!start)
return squash_slash(xstrdup_or_null(def));
len = line - start;
if (!len)
return squash_slash(xstrdup_or_null(def));
/*
* Generally we prefer the shorter name, especially
* if the other one is just a variation of that with
* something else tacked on to the end (ie "file.orig"
* or "file~").
*/
if (def) {
int deflen = strlen(def);
if (deflen < len && !strncmp(start, def, deflen))
return squash_slash(xstrdup(def));
}
if (root->len) {
char *ret = xstrfmt("%s%.*s", root->buf, len, start);
return squash_slash(ret);
}
return squash_slash(xmemdupz(start, len));
}
static char *find_name(struct strbuf *root,
const char *line,
char *def,
int p_value,
int terminate)
{
if (*line == '"') {
char *name = find_name_gnu(root, line, p_value);
if (name)
return name;
}
return find_name_common(root, line, def, p_value, NULL, terminate);
}
static char *find_name_traditional(struct strbuf *root,
const char *line,
char *def,
int p_value)
{
size_t len;
size_t date_len;
if (*line == '"') {
char *name = find_name_gnu(root, line, p_value);
if (name)
return name;
}
len = strchrnul(line, '\n') - line;
date_len = diff_timestamp_len(line, len);
if (!date_len)
return find_name_common(root, line, def, p_value, NULL, TERM_TAB);
len -= date_len;
return find_name_common(root, line, def, p_value, line + len, 0);
}
/*
* Given the string after "--- " or "+++ ", guess the appropriate
* p_value for the given patch.
*/
static int guess_p_value(struct apply_state *state, const char *nameline)
{
char *name, *cp;
int val = -1;
if (is_dev_null(nameline))
return -1;
name = find_name_traditional(&state->root, nameline, NULL, 0);
if (!name)
return -1;
cp = strchr(name, '/');
if (!cp)
val = 0;
else if (state->prefix) {
/*
* Does it begin with "a/$our-prefix" and such? Then this is
* very likely to apply to our directory.
*/
if (starts_with(name, state->prefix))
val = count_slashes(state->prefix);
else {
cp++;
if (starts_with(cp, state->prefix))
val = count_slashes(state->prefix) + 1;
}
}
free(name);
return val;
}
/*
* Does the ---/+++ line have the POSIX timestamp after the last HT?
* GNU diff puts epoch there to signal a creation/deletion event. Is
* this such a timestamp?
*/
static int has_epoch_timestamp(const char *nameline)
{
/*
* We are only interested in epoch timestamp; any non-zero
* fraction cannot be one, hence "(\.0+)?" in the regexp below.
* For the same reason, the date must be either 1969-12-31 or
* 1970-01-01, and the seconds part must be "00".
*/
const char stamp_regexp[] =
"^[0-2][0-9]:([0-5][0-9]):00(\\.0+)?"
" "
"([-+][0-2][0-9]:?[0-5][0-9])\n";
const char *timestamp = NULL, *cp, *colon;
static regex_t *stamp;
regmatch_t m[10];