2016-04-22 20:55:46 +02:00
|
|
|
/*
|
|
|
|
* apply.c
|
|
|
|
*
|
|
|
|
* Copyright (C) Linus Torvalds, 2005
|
|
|
|
*
|
|
|
|
* This applies patches on top of some (arbitrary) version of the SCM.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2016-08-08 23:03:07 +02:00
|
|
|
#include "cache.h"
|
2023-03-21 07:25:58 +01:00
|
|
|
#include "abspath.h"
|
2023-02-24 01:09:24 +01:00
|
|
|
#include "alloc.h"
|
2017-06-14 20:07:36 +02:00
|
|
|
#include "config.h"
|
2018-05-16 01:42:15 +02:00
|
|
|
#include "object-store.h"
|
2016-04-22 20:55:46 +02:00
|
|
|
#include "blob.h"
|
|
|
|
#include "delta.h"
|
|
|
|
#include "diff.h"
|
|
|
|
#include "dir.h"
|
2023-03-21 07:26:03 +01:00
|
|
|
#include "environment.h"
|
2023-03-21 07:25:54 +01:00
|
|
|
#include "gettext.h"
|
2023-02-24 01:09:27 +01:00
|
|
|
#include "hex.h"
|
2016-04-22 20:55:46 +02:00
|
|
|
#include "xdiff-interface.h"
|
|
|
|
#include "ll-merge.h"
|
2016-08-08 23:03:07 +02:00
|
|
|
#include "lockfile.h"
|
2023-04-11 09:41:49 +02:00
|
|
|
#include "object-name.h"
|
2016-04-22 20:55:46 +02:00
|
|
|
#include "parse-options.h"
|
|
|
|
#include "quote.h"
|
|
|
|
#include "rerere.h"
|
2016-08-08 23:03:07 +02:00
|
|
|
#include "apply.h"
|
2021-03-23 15:19:32 +01:00
|
|
|
#include "entry.h"
|
2023-03-21 07:26:05 +01:00
|
|
|
#include "setup.h"
|
2023-03-21 07:26:01 +01:00
|
|
|
#include "wrapper.h"
|
2016-08-08 23:03:07 +02:00
|
|
|
|
2019-07-11 18:08:43 +02:00
|
|
|
struct gitdiff_data {
|
|
|
|
struct strbuf *root;
|
|
|
|
int linenr;
|
|
|
|
int p_value;
|
|
|
|
};
|
|
|
|
|
2016-08-08 23:03:07 +02:00
|
|
|
static void git_apply_config(void)
|
|
|
|
{
|
config: drop git_config_get_string_const()
As evidenced by the leak fixes in the previous commit, the "const" in
git_config_get_string_const() clearly misleads people into thinking that
it does not allocate a copy of the string. We can fix this by renaming
it, but it's easier still to just drop it. Of the four remaining
callers:
- The one in git_config_parse_expiry() still needs to allocate, since
that's what its callers expect. We can just use the non-const
version and cast our pointer. Slightly ugly, but the damage is
contained in one spot.
- The two in apply are writing to global "const char *" variables, and
need to continue allocating. We often mark these as const because we
assign default string literals to them. But in this case we don't do
that, so we can just declare them as real "char *" pointers and use
the non-const version.
- The call in checkout doesn't actually need a copy; it can just use
the non-allocating "tmp" version of the function.
The function is also mentioned in the MyFirstContribution document. We
can swap that call out for the non-allocating "tmp" variant, which fits
well in the example given.
We'll drop the "configset" and "repo" variants, as well (which are
unused).
Note that this frees up the "const" name, so we could rename the "tmp"
variant back to that. But let's give some time for topics in flight to
adapt to the new code before doing so (if we do it too soon, the
function semantics will change but the compiler won't alert us).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-08-17 23:33:11 +02:00
|
|
|
git_config_get_string("apply.whitespace", &apply_default_whitespace);
|
|
|
|
git_config_get_string("apply.ignorewhitespace", &apply_default_ignorewhitespace);
|
2019-10-24 01:32:38 +02:00
|
|
|
git_config(git_xmerge_config, NULL);
|
2016-08-08 23:03:07 +02:00
|
|
|
}
|
|
|
|
|
2016-09-04 22:18:23 +02:00
|
|
|
static int parse_whitespace_option(struct apply_state *state, const char *option)
|
2016-08-08 23:03:07 +02:00
|
|
|
{
|
|
|
|
if (!option) {
|
|
|
|
state->ws_error_action = warn_on_ws_error;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (!strcmp(option, "warn")) {
|
|
|
|
state->ws_error_action = warn_on_ws_error;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (!strcmp(option, "nowarn")) {
|
|
|
|
state->ws_error_action = nowarn_ws_error;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (!strcmp(option, "error")) {
|
|
|
|
state->ws_error_action = die_on_ws_error;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (!strcmp(option, "error-all")) {
|
|
|
|
state->ws_error_action = die_on_ws_error;
|
|
|
|
state->squelch_whitespace_errors = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (!strcmp(option, "strip") || !strcmp(option, "fix")) {
|
|
|
|
state->ws_error_action = correct_ws_error;
|
|
|
|
return 0;
|
|
|
|
}
|
2019-02-16 12:24:41 +01:00
|
|
|
/*
|
|
|
|
* Please update $__git_whitespacelist in git-completion.bash
|
|
|
|
* when you add new options.
|
|
|
|
*/
|
2016-08-08 23:03:07 +02:00
|
|
|
return error(_("unrecognized whitespace option '%s'"), option);
|
|
|
|
}
|
|
|
|
|
2016-09-04 22:18:23 +02:00
|
|
|
static int parse_ignorewhitespace_option(struct apply_state *state,
|
|
|
|
const char *option)
|
2016-08-08 23:03:07 +02:00
|
|
|
{
|
|
|
|
if (!option || !strcmp(option, "no") ||
|
|
|
|
!strcmp(option, "false") || !strcmp(option, "never") ||
|
|
|
|
!strcmp(option, "none")) {
|
|
|
|
state->ws_ignore_action = ignore_ws_none;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (!strcmp(option, "change")) {
|
|
|
|
state->ws_ignore_action = ignore_ws_change;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return error(_("unrecognized whitespace ignore option '%s'"), option);
|
|
|
|
}
|
|
|
|
|
2016-08-08 23:03:08 +02:00
|
|
|
int init_apply_state(struct apply_state *state,
|
2018-08-13 18:14:39 +02:00
|
|
|
struct repository *repo,
|
apply: move lockfile into `apply_state`
We have two users of `struct apply_state` and the related functionality
in apply.c. Each user sets up its `apply_state` by handing over a
pointer to its static `lock_file`. (Before 076aa2cbd (tempfile:
auto-allocate tempfiles on heap, 2017-09-05), we could never free
lockfiles, so making them static was a reasonable approach.)
Other than that, they never directly access their `lock_file`s, which
are instead handled by the functionality in apply.c.
To make life easier for the caller and to make it less tempting for a
future caller to mess with the lock, make apply.c fully responsible for
setting up the `lock_file`. As mentioned above, it is now safe to free a
`lock_file`, so we can make the `struct apply_state` contain an actual
`struct lock_file` instead of a pointer to one.
The user in builtin/apply.c is rather simple. For builtin/am.c, we might
worry that the lock state is actually meant to be inherited across
calls. But the lock is only taken as `apply_all_patches()` executes, and
code inspection shows that it will always be released.
Alternatively, we can observe that the lock itself is never queried
directly. When we decide whether we should lock, we check a related
variable `newfd`. That variable is not inherited, so from the point of
view of apply.c, the state machine really is reset with each call to
`init_apply_state()`. (It would be a bug if `newfd` and the lock status
were not in sync. The duplication of information in `newfd` and the lock
will be addressed in the next patch.)
Signed-off-by: Martin Ågren <martin.agren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-05 22:32:09 +02:00
|
|
|
const char *prefix)
|
2016-08-08 23:03:07 +02:00
|
|
|
{
|
|
|
|
memset(state, 0, sizeof(*state));
|
|
|
|
state->prefix = prefix;
|
2018-08-13 18:14:39 +02:00
|
|
|
state->repo = repo;
|
2016-08-08 23:03:07 +02:00
|
|
|
state->apply = 1;
|
|
|
|
state->line_termination = '\n';
|
|
|
|
state->p_value = 1;
|
|
|
|
state->p_context = UINT_MAX;
|
|
|
|
state->squelch_whitespace_errors = 5;
|
|
|
|
state->ws_error_action = warn_on_ws_error;
|
|
|
|
state->ws_ignore_action = ignore_ws_none;
|
|
|
|
state->linenr = 1;
|
2021-07-01 12:51:29 +02:00
|
|
|
string_list_init_nodup(&state->fn_table);
|
|
|
|
string_list_init_nodup(&state->limit_by_name);
|
2022-01-07 13:16:53 +01:00
|
|
|
strset_init(&state->removed_symlinks);
|
|
|
|
strset_init(&state->kept_symlinks);
|
2016-08-08 23:03:07 +02:00
|
|
|
strbuf_init(&state->root, 0);
|
|
|
|
|
|
|
|
git_apply_config();
|
|
|
|
if (apply_default_whitespace && parse_whitespace_option(state, apply_default_whitespace))
|
2016-08-08 23:03:08 +02:00
|
|
|
return -1;
|
2016-08-08 23:03:07 +02:00
|
|
|
if (apply_default_ignorewhitespace && parse_ignorewhitespace_option(state, apply_default_ignorewhitespace))
|
2016-08-08 23:03:08 +02:00
|
|
|
return -1;
|
|
|
|
return 0;
|
2016-08-08 23:03:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void clear_apply_state(struct apply_state *state)
|
|
|
|
{
|
|
|
|
string_list_clear(&state->limit_by_name, 0);
|
2022-01-07 13:16:53 +01:00
|
|
|
strset_clear(&state->removed_symlinks);
|
|
|
|
strset_clear(&state->kept_symlinks);
|
2016-08-08 23:03:07 +02:00
|
|
|
strbuf_release(&state->root);
|
|
|
|
|
|
|
|
/* &state->fn_table is cleared at the end of apply_patch() */
|
|
|
|
}
|
2016-08-08 23:03:10 +02:00
|
|
|
|
2022-10-18 03:08:51 +02:00
|
|
|
static void mute_routine(const char *msg UNUSED, va_list params UNUSED)
|
2016-09-04 22:18:29 +02:00
|
|
|
{
|
|
|
|
/* do nothing */
|
|
|
|
}
|
|
|
|
|
2016-08-08 23:03:10 +02:00
|
|
|
int check_apply_state(struct apply_state *state, int force_apply)
|
|
|
|
{
|
|
|
|
int is_not_gitdir = !startup_info->have_repository;
|
|
|
|
|
|
|
|
if (state->apply_with_reject && state->threeway)
|
2022-01-05 21:02:16 +01:00
|
|
|
return error(_("options '%s' and '%s' cannot be used together"), "--reject", "--3way");
|
2016-08-08 23:03:10 +02:00
|
|
|
if (state->threeway) {
|
|
|
|
if (is_not_gitdir)
|
2022-01-05 21:02:22 +01:00
|
|
|
return error(_("'%s' outside a repository"), "--3way");
|
2016-08-08 23:03:10 +02:00
|
|
|
state->check_index = 1;
|
|
|
|
}
|
2016-09-04 22:18:25 +02:00
|
|
|
if (state->apply_with_reject) {
|
|
|
|
state->apply = 1;
|
|
|
|
if (state->apply_verbosity == verbosity_normal)
|
|
|
|
state->apply_verbosity = verbosity_verbose;
|
|
|
|
}
|
2016-08-08 23:03:10 +02:00
|
|
|
if (!force_apply && (state->diffstat || state->numstat || state->summary || state->check || state->fake_ancestor))
|
|
|
|
state->apply = 0;
|
|
|
|
if (state->check_index && is_not_gitdir)
|
2022-01-05 21:02:22 +01:00
|
|
|
return error(_("'%s' outside a repository"), "--index");
|
2016-08-08 23:03:10 +02:00
|
|
|
if (state->cached) {
|
|
|
|
if (is_not_gitdir)
|
2022-01-05 21:02:22 +01:00
|
|
|
return error(_("'%s' outside a repository"), "--cached");
|
2016-08-08 23:03:10 +02:00
|
|
|
state->check_index = 1;
|
|
|
|
}
|
2018-05-26 14:08:46 +02:00
|
|
|
if (state->ita_only && (state->check_index || is_not_gitdir))
|
|
|
|
state->ita_only = 0;
|
2016-08-08 23:03:10 +02:00
|
|
|
if (state->check_index)
|
|
|
|
state->unsafe_paths = 0;
|
|
|
|
|
2016-09-04 22:18:29 +02:00
|
|
|
if (state->apply_verbosity <= verbosity_silent) {
|
|
|
|
state->saved_error_routine = get_error_routine();
|
|
|
|
state->saved_warn_routine = get_warn_routine();
|
|
|
|
set_error_routine(mute_routine);
|
|
|
|
set_warn_routine(mute_routine);
|
|
|
|
}
|
|
|
|
|
2016-08-08 23:03:10 +02:00
|
|
|
return 0;
|
|
|
|
}
|
2016-04-22 20:55:46 +02:00
|
|
|
|
|
|
|
static void set_default_whitespace_mode(struct apply_state *state)
|
|
|
|
{
|
|
|
|
if (!state->whitespace_option && !apply_default_whitespace)
|
|
|
|
state->ws_error_action = (state->apply ? warn_on_ws_error : nowarn_ws_error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This represents one "hunk" from a patch, starting with
|
|
|
|
* "@@ -oldpos,oldlines +newpos,newlines @@" marker. The
|
|
|
|
* patch text is pointed at by patch, and its byte length
|
|
|
|
* is stored in size. leading and trailing are the number
|
|
|
|
* of context lines.
|
|
|
|
*/
|
|
|
|
struct fragment {
|
|
|
|
unsigned long leading, trailing;
|
|
|
|
unsigned long oldpos, oldlines;
|
|
|
|
unsigned long newpos, newlines;
|
|
|
|
/*
|
|
|
|
* 'patch' is usually borrowed from buf in apply_patch(),
|
|
|
|
* but some codepaths store an allocated buffer.
|
|
|
|
*/
|
|
|
|
const char *patch;
|
|
|
|
unsigned free_patch:1,
|
|
|
|
rejected:1;
|
|
|
|
int size;
|
|
|
|
int linenr;
|
|
|
|
struct fragment *next;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When dealing with a binary patch, we reuse "leading" field
|
|
|
|
* to store the type of the binary hunk, either deflated "delta"
|
|
|
|
* or deflated "literal".
|
|
|
|
*/
|
|
|
|
#define binary_patch_method leading
|
|
|
|
#define BINARY_DELTA_DEFLATED 1
|
|
|
|
#define BINARY_LITERAL_DEFLATED 2
|
|
|
|
|
|
|
|
static void free_fragment_list(struct fragment *list)
|
|
|
|
{
|
|
|
|
while (list) {
|
|
|
|
struct fragment *next = list->next;
|
|
|
|
if (list->free_patch)
|
|
|
|
free((char *)list->patch);
|
|
|
|
free(list);
|
|
|
|
list = next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-03-04 19:32:15 +01:00
|
|
|
void release_patch(struct patch *patch)
|
2016-04-22 20:55:46 +02:00
|
|
|
{
|
|
|
|
free_fragment_list(patch->fragments);
|
|
|
|
free(patch->def_name);
|
|
|
|
free(patch->old_name);
|
|
|
|
free(patch->new_name);
|
|
|
|
free(patch->result);
|
2022-03-04 19:32:15 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void free_patch(struct patch *patch)
|
|
|
|
{
|
|
|
|
release_patch(patch);
|
2016-04-22 20:55:46 +02:00
|
|
|
free(patch);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void free_patch_list(struct patch *list)
|
|
|
|
{
|
|
|
|
while (list) {
|
|
|
|
struct patch *next = list->next;
|
|
|
|
free_patch(list);
|
|
|
|
list = next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A line in a file, len-bytes long (includes the terminating LF,
|
|
|
|
* except for an incomplete line at the end if the file ends with
|
|
|
|
* one), and its contents hashes to 'hash'.
|
|
|
|
*/
|
|
|
|
struct line {
|
|
|
|
size_t len;
|
|
|
|
unsigned hash : 24;
|
|
|
|
unsigned flag : 8;
|
|
|
|
#define LINE_COMMON 1
|
|
|
|
#define LINE_PATCHED 2
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This represents a "file", which is an array of "lines".
|
|
|
|
*/
|
|
|
|
struct image {
|
|
|
|
char *buf;
|
|
|
|
size_t len;
|
|
|
|
size_t nr;
|
|
|
|
size_t alloc;
|
|
|
|
struct line *line_allocated;
|
|
|
|
struct line *line;
|
|
|
|
};
|
|
|
|
|
|
|
|
static uint32_t hash_line(const char *cp, size_t len)
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
uint32_t h;
|
|
|
|
for (i = 0, h = 0; i < len; i++) {
|
|
|
|
if (!isspace(cp[i])) {
|
|
|
|
h = h * 3 + (cp[i] & 0xff);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return h;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Compare lines s1 of length n1 and s2 of length n2, ignoring
|
|
|
|
* whitespace difference. Returns 1 if they match, 0 otherwise
|
|
|
|
*/
|
|
|
|
static int fuzzy_matchlines(const char *s1, size_t n1,
|
|
|
|
const char *s2, size_t n2)
|
|
|
|
{
|
2017-11-11 15:10:19 +01:00
|
|
|
const char *end1 = s1 + n1;
|
|
|
|
const char *end2 = s2 + n2;
|
2016-04-22 20:55:46 +02:00
|
|
|
|
|
|
|
/* ignore line endings */
|
2017-11-11 15:10:19 +01:00
|
|
|
while (s1 < end1 && (end1[-1] == '\r' || end1[-1] == '\n'))
|
|
|
|
end1--;
|
|
|
|
while (s2 < end2 && (end2[-1] == '\r' || end2[-1] == '\n'))
|
|
|
|
end2--;
|
2016-04-22 20:55:46 +02:00
|
|
|
|
2017-11-11 15:10:19 +01:00
|
|
|
while (s1 < end1 && s2 < end2) {
|
|
|
|
if (isspace(*s1)) {
|
|
|
|
/*
|
|
|
|
* Skip whitespace. We check on both buffers
|
|
|
|
* because we don't want "a b" to match "ab".
|
|
|
|
*/
|
|
|
|
if (!isspace(*s2))
|
|
|
|
return 0;
|
|
|
|
while (s1 < end1 && isspace(*s1))
|
2016-04-22 20:55:46 +02:00
|
|
|
s1++;
|
2017-11-11 15:10:19 +01:00
|
|
|
while (s2 < end2 && isspace(*s2))
|
2016-04-22 20:55:46 +02:00
|
|
|
s2++;
|
2017-11-11 15:10:19 +01:00
|
|
|
} else if (*s1++ != *s2++)
|
2016-04-22 20:55:46 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-11-11 15:10:19 +01:00
|
|
|
/* If we reached the end on one side only, lines don't match. */
|
|
|
|
return s1 == end1 && s2 == end2;
|
2016-04-22 20:55:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void add_line_info(struct image *img, const char *bol, size_t len, unsigned flag)
|
|
|
|
{
|
|
|
|
ALLOC_GROW(img->line_allocated, img->nr + 1, img->alloc);
|
|
|
|
img->line_allocated[img->nr].len = len;
|
|
|
|
img->line_allocated[img->nr].hash = hash_line(bol, len);
|
|
|
|
img->line_allocated[img->nr].flag = flag;
|
|
|
|
img->nr++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* "buf" has the file contents to be patched (read from various sources).
|
|
|
|
* attach it to "image" and add line-based index to it.
|
|
|
|
* "image" now owns the "buf".
|
|
|
|
*/
|
|
|
|
static void prepare_image(struct image *image, char *buf, size_t len,
|
|
|
|
int prepare_linetable)
|
|
|
|
{
|
|
|
|
const char *cp, *ep;
|
|
|
|
|
|
|
|
memset(image, 0, sizeof(*image));
|
|
|
|
image->buf = buf;
|
|
|
|
image->len = len;
|
|
|
|
|
|
|
|
if (!prepare_linetable)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ep = image->buf + image->len;
|
|
|
|
cp = image->buf;
|
|
|
|
while (cp < ep) {
|
|
|
|
const char *next;
|
|
|
|
for (next = cp; next < ep && *next != '\n'; next++)
|
|
|
|
;
|
|
|
|
if (next < ep)
|
|
|
|
next++;
|
|
|
|
add_line_info(image, cp, next - cp, 0);
|
|
|
|
cp = next;
|
|
|
|
}
|
|
|
|
image->line = image->line_allocated;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void clear_image(struct image *image)
|
|
|
|
{
|
|
|
|
free(image->buf);
|
|
|
|
free(image->line_allocated);
|
|
|
|
memset(image, 0, sizeof(*image));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* fmt must contain _one_ %s and no other substitution */
|
|
|
|
static void say_patch_name(FILE *output, const char *fmt, struct patch *patch)
|
|
|
|
{
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
|
|
|
|
if (patch->old_name && patch->new_name &&
|
|
|
|
strcmp(patch->old_name, patch->new_name)) {
|
|
|
|
quote_c_style(patch->old_name, &sb, NULL, 0);
|
|
|
|
strbuf_addstr(&sb, " => ");
|
|
|
|
quote_c_style(patch->new_name, &sb, NULL, 0);
|
|
|
|
} else {
|
|
|
|
const char *n = patch->new_name;
|
|
|
|
if (!n)
|
|
|
|
n = patch->old_name;
|
|
|
|
quote_c_style(n, &sb, NULL, 0);
|
|
|
|
}
|
|
|
|
fprintf(output, fmt, sb.buf);
|
|
|
|
fputc('\n', output);
|
|
|
|
strbuf_release(&sb);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define SLOP (16)
|
|
|
|
|
apply: reject patches larger than ~1 GiB
The apply code is not prepared to handle extremely large files. It uses
"int" in some places, and "unsigned long" in others.
This combination leads to unfortunate problems when switching between
the two types. Using "int" prevents us from handling large files, since
large offsets will wrap around and spill into small negative values,
which can result in wrong behavior (like accessing the patch buffer with
a negative offset).
Converting from "unsigned long" to "int" also has truncation problems
even on LLP64 platforms where "long" is the same size as "int", since
the former is unsigned but the latter is not.
To avoid potential overflow and truncation issues in `git apply`, apply
similar treatment as in dcd1742e56 (xdiff: reject files larger than
~1GB, 2015-09-24), where the xdiff code was taught to reject large
files for similar reasons.
The maximum size was chosen somewhat arbitrarily, but picking a value
just shy of a gigabyte allows us to double it without overflowing 2^31-1
(after which point our value would wrap around to a negative number).
To give ourselves a bit of extra margin, the maximum patch size is a MiB
smaller than a full GiB, which gives us some slop in case we allocate
"(records + 1) * sizeof(int)" or similar.
Luckily, the security implications of these conversion issues are
relatively uninteresting, because a victim needs to be convinced to
apply a malicious patch.
Reported-by: 정재우 <thebound7@gmail.com>
Suggested-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-10-25 20:24:31 +02:00
|
|
|
/*
|
|
|
|
* apply.c isn't equipped to handle arbitrarily large patches, because
|
|
|
|
* it intermingles `unsigned long` with `int` for the type used to store
|
|
|
|
* buffer lengths.
|
|
|
|
*
|
|
|
|
* Only process patches that are just shy of 1 GiB large in order to
|
|
|
|
* avoid any truncation or overflow issues.
|
|
|
|
*/
|
|
|
|
#define MAX_APPLY_SIZE (1024UL * 1024 * 1023)
|
|
|
|
|
2016-04-22 20:55:46 +02:00
|
|
|
static int read_patch_file(struct strbuf *sb, int fd)
|
|
|
|
{
|
apply: reject patches larger than ~1 GiB
The apply code is not prepared to handle extremely large files. It uses
"int" in some places, and "unsigned long" in others.
This combination leads to unfortunate problems when switching between
the two types. Using "int" prevents us from handling large files, since
large offsets will wrap around and spill into small negative values,
which can result in wrong behavior (like accessing the patch buffer with
a negative offset).
Converting from "unsigned long" to "int" also has truncation problems
even on LLP64 platforms where "long" is the same size as "int", since
the former is unsigned but the latter is not.
To avoid potential overflow and truncation issues in `git apply`, apply
similar treatment as in dcd1742e56 (xdiff: reject files larger than
~1GB, 2015-09-24), where the xdiff code was taught to reject large
files for similar reasons.
The maximum size was chosen somewhat arbitrarily, but picking a value
just shy of a gigabyte allows us to double it without overflowing 2^31-1
(after which point our value would wrap around to a negative number).
To give ourselves a bit of extra margin, the maximum patch size is a MiB
smaller than a full GiB, which gives us some slop in case we allocate
"(records + 1) * sizeof(int)" or similar.
Luckily, the security implications of these conversion issues are
relatively uninteresting, because a victim needs to be convinced to
apply a malicious patch.
Reported-by: 정재우 <thebound7@gmail.com>
Suggested-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-10-25 20:24:31 +02:00
|
|
|
if (strbuf_read(sb, fd, 0) < 0 || sb->len >= MAX_APPLY_SIZE)
|
2016-04-22 20:55:46 +02:00
|
|
|
return error_errno("git apply: failed to read");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Make sure that we have some slop in the buffer
|
|
|
|
* so that we can do speculative "memcmp" etc, and
|
|
|
|
* see to it that it is NUL-filled.
|
|
|
|
*/
|
|
|
|
strbuf_grow(sb, SLOP);
|
|
|
|
memset(sb->buf + sb->len, 0, SLOP);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned long linelen(const char *buffer, unsigned long size)
|
|
|
|
{
|
|
|
|
unsigned long len = 0;
|
|
|
|
while (size--) {
|
|
|
|
len++;
|
|
|
|
if (*buffer++ == '\n')
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int is_dev_null(const char *str)
|
|
|
|
{
|
|
|
|
return skip_prefix(str, "/dev/null", &str) && isspace(*str);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define TERM_SPACE 1
|
|
|
|
#define TERM_TAB 2
|
|
|
|
|
|
|
|
static int name_terminate(int c, int terminate)
|
|
|
|
{
|
|
|
|
if (c == ' ' && !(terminate & TERM_SPACE))
|
|
|
|
return 0;
|
|
|
|
if (c == '\t' && !(terminate & TERM_TAB))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* remove double slashes to make --index work with such filenames */
|
|
|
|
static char *squash_slash(char *name)
|
|
|
|
{
|
|
|
|
int i = 0, j = 0;
|
|
|
|
|
|
|
|
if (!name)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
while (name[i]) {
|
|
|
|
if ((name[j++] = name[i++]) == '/')
|
|
|
|
while (name[i] == '/')
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
name[j] = '\0';
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
2019-07-08 18:33:06 +02:00
|
|
|
static char *find_name_gnu(struct strbuf *root,
|
2016-04-22 20:55:46 +02:00
|
|
|
const char *line,
|
|
|
|
int p_value)
|
|
|
|
{
|
|
|
|
struct strbuf name = STRBUF_INIT;
|
|
|
|
char *cp;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Proposed "new-style" GNU patch/diff format; see
|
2019-11-27 13:54:04 +01:00
|
|
|
* https://lore.kernel.org/git/7vll0wvb2a.fsf@assigned-by-dhcp.cox.net/
|
2016-04-22 20:55:46 +02:00
|
|
|
*/
|
|
|
|
if (unquote_c_style(&name, line, NULL)) {
|
|
|
|
strbuf_release(&name);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (cp = name.buf; p_value; p_value--) {
|
|
|
|
cp = strchr(cp, '/');
|
|
|
|
if (!cp) {
|
|
|
|
strbuf_release(&name);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
cp++;
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_remove(&name, 0, cp - name.buf);
|
2019-07-08 18:33:06 +02:00
|
|
|
if (root->len)
|
|
|
|
strbuf_insert(&name, 0, root->buf, root->len);
|
2016-04-22 20:55:46 +02:00
|
|
|
return squash_slash(strbuf_detach(&name, NULL));
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t sane_tz_len(const char *line, size_t len)
|
|
|
|
{
|
|
|
|
const char *tz, *p;
|
|
|
|
|
|
|
|
if (len < strlen(" +0500") || line[len-strlen(" +0500")] != ' ')
|
|
|
|
return 0;
|
|
|
|
tz = line + len - strlen(" +0500");
|
|
|
|
|
|
|
|
if (tz[1] != '+' && tz[1] != '-')
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
for (p = tz + 2; p != line + len; p++)
|
|
|
|
if (!isdigit(*p))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return line + len - tz;
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t tz_with_colon_len(const char *line, size_t len)
|
|
|
|
{
|
|
|
|
const char *tz, *p;
|
|
|
|
|
|
|
|
if (len < strlen(" +08:00") || line[len - strlen(":00")] != ':')
|
|
|
|
return 0;
|
|
|
|
tz = line + len - strlen(" +08:00");
|
|
|
|
|
|
|
|
if (tz[0] != ' ' || (tz[1] != '+' && tz[1] != '-'))
|
|
|
|
return 0;
|
|
|
|
p = tz + 2;
|
|
|
|
if (!isdigit(*p++) || !isdigit(*p++) || *p++ != ':' ||
|
|
|
|
!isdigit(*p++) || !isdigit(*p++))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return line + len - tz;
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t date_len(const char *line, size_t len)
|
|
|
|
{
|
|
|
|
const char *date, *p;
|
|
|
|
|
|
|
|
if (len < strlen("72-02-05") || line[len-strlen("-05")] != '-')
|
|
|
|
return 0;
|
|
|
|
p = date = line + len - strlen("72-02-05");
|
|
|
|
|
|
|
|
if (!isdigit(*p++) || !isdigit(*p++) || *p++ != '-' ||
|
|
|
|
!isdigit(*p++) || !isdigit(*p++) || *p++ != '-' ||
|
|
|
|
!isdigit(*p++) || !isdigit(*p++)) /* Not a date. */
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (date - line >= strlen("19") &&
|
|
|
|
isdigit(date[-1]) && isdigit(date[-2])) /* 4-digit year */
|
|
|
|
date -= strlen("19");
|
|
|
|
|
|
|
|
return line + len - date;
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t short_time_len(const char *line, size_t len)
|
|
|
|
{
|
|
|
|
const char *time, *p;
|
|
|
|
|
|
|
|
if (len < strlen(" 07:01:32") || line[len-strlen(":32")] != ':')
|
|
|
|
return 0;
|
|
|
|
p = time = line + len - strlen(" 07:01:32");
|
|
|
|
|
|
|
|
/* Permit 1-digit hours? */
|
|
|
|
if (*p++ != ' ' ||
|
|
|
|
!isdigit(*p++) || !isdigit(*p++) || *p++ != ':' ||
|
|
|
|
!isdigit(*p++) || !isdigit(*p++) || *p++ != ':' ||
|
|
|
|
!isdigit(*p++) || !isdigit(*p++)) /* Not a time. */
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return line + len - time;
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t fractional_time_len(const char *line, size_t len)
|
|
|
|
{
|
|
|
|
const char *p;
|
|
|
|
size_t n;
|
|
|
|
|
|
|
|
/* Expected format: 19:41:17.620000023 */
|
|
|
|
if (!len || !isdigit(line[len - 1]))
|
|
|
|
return 0;
|
|
|
|
p = line + len - 1;
|
|
|
|
|
|
|
|
/* Fractional seconds. */
|
|
|
|
while (p > line && isdigit(*p))
|
|
|
|
p--;
|
|
|
|
if (*p != '.')
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Hours, minutes, and whole seconds. */
|
|
|
|
n = short_time_len(line, p - line);
|
|
|
|
if (!n)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return line + len - p + n;
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t trailing_spaces_len(const char *line, size_t len)
|
|
|
|
{
|
|
|
|
const char *p;
|
|
|
|
|
|
|
|
/* Expected format: ' ' x (1 or more) */
|
|
|
|
if (!len || line[len - 1] != ' ')
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
p = line + len;
|
|
|
|
while (p != line) {
|
|
|
|
p--;
|
|
|
|
if (*p != ' ')
|
|
|
|
return line + len - (p + 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* All spaces! */
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t diff_timestamp_len(const char *line, size_t len)
|
|
|
|
{
|
|
|
|
const char *end = line + len;
|
|
|
|
size_t n;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Posix: 2010-07-05 19:41:17
|
|
|
|
* GNU: 2010-07-05 19:41:17.620000023 -0500
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (!isdigit(end[-1]))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
n = sane_tz_len(line, end - line);
|
|
|
|
if (!n)
|
|
|
|
n = tz_with_colon_len(line, end - line);
|
|
|
|
end -= n;
|
|
|
|
|
|
|
|
n = short_time_len(line, end - line);
|
|
|
|
if (!n)
|
|
|
|
n = fractional_time_len(line, end - line);
|
|
|
|
end -= n;
|
|
|
|
|
|
|
|
n = date_len(line, end - line);
|
|
|
|
if (!n) /* No date. Too bad. */
|
|
|
|
return 0;
|
|
|
|
end -= n;
|
|
|
|
|
|
|
|
if (end == line) /* No space before date. */
|
|
|
|
return 0;
|
|
|
|
if (end[-1] == '\t') { /* Success! */
|
|
|
|
end--;
|
|
|
|
return line + len - end;
|
|
|
|
}
|
|
|
|
if (end[-1] != ' ') /* No space before date. */
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Whitespace damage. */
|
|
|
|
end -= trailing_spaces_len(line, end - line);
|
|
|
|
return line + len - end;
|
|
|
|
}
|
|
|
|
|
2019-07-08 18:33:06 +02:00
|
|
|
static char *find_name_common(struct strbuf *root,
|
2016-04-22 20:55:46 +02:00
|
|
|
const char *line,
|
|
|
|
const char *def,
|
|
|
|
int p_value,
|
|
|
|
const char *end,
|
|
|
|
int terminate)
|
|
|
|
{
|
|
|
|
int len;
|
|
|
|
const char *start = NULL;
|
|
|
|
|
|
|
|
if (p_value == 0)
|
|
|
|
start = line;
|
|
|
|
while (line != end) {
|
|
|
|
char c = *line;
|
|
|
|
|
|
|
|
if (!end && isspace(c)) {
|
|
|
|
if (c == '\n')
|
|
|
|
break;
|
|
|
|
if (name_terminate(c, terminate))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
line++;
|
|
|
|
if (c == '/' && !--p_value)
|
|
|
|
start = line;
|
|
|
|
}
|
|
|
|
if (!start)
|
|
|
|
return squash_slash(xstrdup_or_null(def));
|
|
|
|
len = line - start;
|
|
|
|
if (!len)
|
|
|
|
return squash_slash(xstrdup_or_null(def));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Generally we prefer the shorter name, especially
|
|
|
|
* if the other one is just a variation of that with
|
|
|
|
* something else tacked on to the end (ie "file.orig"
|
|
|
|
* or "file~").
|
|
|
|
*/
|
|
|
|
if (def) {
|
|
|
|
int deflen = strlen(def);
|
|
|
|
if (deflen < len && !strncmp(start, def, deflen))
|
|
|
|
return squash_slash(xstrdup(def));
|
|
|
|
}
|
|
|
|
|
2019-07-08 18:33:06 +02:00
|
|
|
if (root->len) {
|
|
|
|
char *ret = xstrfmt("%s%.*s", root->buf, len, start);
|
2016-04-22 20:55:46 +02:00
|
|
|
return squash_slash(ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
return squash_slash(xmemdupz(start, len));
|
|
|
|
}
|
|
|
|
|
2019-07-08 18:33:06 +02:00
|
|
|
static char *find_name(struct strbuf *root,
|
2016-04-22 20:55:46 +02:00
|
|
|
const char *line,
|
|
|
|
char *def,
|
|
|
|
int p_value,
|
|
|
|
int terminate)
|
|
|
|
{
|
|
|
|
if (*line == '"') {
|
2019-07-08 18:33:06 +02:00
|
|
|
char *name = find_name_gnu(root, line, p_value);
|
2016-04-22 20:55:46 +02:00
|
|
|
if (name)
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
2019-07-08 18:33:06 +02:00
|
|
|
return find_name_common(root, line, def, p_value, NULL, terminate);
|
2016-04-22 20:55:46 +02:00
|
|
|
}
|
|
|
|
|
2019-07-08 18:33:06 +02:00
|
|
|
static char *find_name_traditional(struct strbuf *root,
|
2016-04-22 20:55:46 +02:00
|
|
|
const char *line,
|
|
|
|
char *def,
|
|
|
|
int p_value)
|
|
|
|
{
|
|
|
|
size_t len;
|
|
|
|
size_t date_len;
|
|
|
|
|
|
|
|
if (*line == '"') {
|
2019-07-08 18:33:06 +02:00
|
|
|
char *name = find_name_gnu(root, line, p_value);
|
2016-04-22 20:55:46 +02:00
|
|
|
if (name)
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
|
|
|
len = strchrnul(line, '\n') - line;
|
|
|
|
date_len = diff_timestamp_len(line, len);
|
|
|
|
if (!date_len)
|
2019-07-08 18:33:06 +02:00
|
|
|
return find_name_common(root, line, def, p_value, NULL, TERM_TAB);
|
2016-04-22 20:55:46 +02:00
|
|
|
len -= date_len;
|
|
|
|
|
2019-07-08 18:33:06 +02:00
|
|
|
return find_name_common(root, line, def, p_value, line + len, 0);
|
2016-04-22 20:55:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Given the string after "--- " or "+++ ", guess the appropriate
|
|
|
|
* p_value for the given patch.
|
|
|
|
*/
|
|
|
|
static int guess_p_value(struct apply_state *state, const char *nameline)
|
|
|
|
{
|
|
|
|
char *name, *cp;
|
|
|
|
int val = -1;
|
|
|
|
|
|
|
|
if (is_dev_null(nameline))
|
|
|
|
return -1;
|
2019-07-08 18:33:06 +02:00
|
|
|
name = find_name_traditional(&state->root, nameline, NULL, 0);
|
2016-04-22 20:55:46 +02:00
|
|
|
if (!name)
|
|
|
|
return -1;
|
|
|
|
cp = strchr(name, '/');
|
|
|
|
if (!cp)
|
|
|
|
val = 0;
|
|
|
|
else if (state->prefix) {
|
|
|
|
/*
|
|
|
|
* Does it begin with "a/$our-prefix" and such? Then this is
|
|
|
|
* very likely to apply to our directory.
|
|
|
|
*/
|
2017-08-09 17:54:46 +02:00
|
|
|
if (starts_with(name, state->prefix))
|
2016-04-22 20:55:46 +02:00
|
|
|
val = count_slashes(state->prefix);
|
|
|
|
else {
|
|
|
|
cp++;
|
2017-08-09 17:54:46 +02:00
|
|
|
if (starts_with(cp, state->prefix))
|
2016-04-22 20:55:46 +02:00
|
|
|
val = count_slashes(state->prefix) + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
free(name);
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Does the ---/+++ line have the POSIX timestamp after the last HT?
|
|
|
|
* GNU diff puts epoch there to signal a creation/deletion event. Is
|
|
|
|
* this such a timestamp?
|
|
|
|
*/
|
|
|
|
static int has_epoch_timestamp(const char *nameline)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We are only interested in epoch timestamp; any non-zero
|
|
|
|
* fraction cannot be one, hence "(\.0+)?" in the regexp below.
|
|
|
|
* For the same reason, the date must be either 1969-12-31 or
|
|
|
|
* 1970-01-01, and the seconds part must be "00".
|
|
|
|
*/
|
|
|
|
const char stamp_regexp[] =
|
2017-08-25 21:06:28 +02:00
|
|
|
"^[0-2][0-9]:([0-5][0-9]):00(\\.0+)?"
|
2016-04-22 20:55:46 +02:00
|
|
|
" "
|
|
|
|
"([-+][0-2][0-9]:?[0-5][0-9])\n";
|
|
|
|
const char *timestamp = NULL, *cp, *colon;
|
|
|
|
static regex_t *stamp;
|
|
|
|
regmatch_t m[10];
|
2017-08-25 21:04:54 +02:00
|
|
|
int zoneoffset, epoch_hour, hour, minute;
|
2016-04-22 20:55:46 +02:00
|
|
|
int status;
|
|
|
|
|
|
|
|
for (cp = nameline; *cp != '\n'; cp++) {
|
|
|
|
if (*cp == '\t')
|
|
|
|
timestamp = cp + 1;
|
|
|
|
}
|
|
|
|
if (!timestamp)
|
|
|
|
return 0;
|
2017-08-25 21:04:54 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* YYYY-MM-DD hh:mm:ss must be from either 1969-12-31
|
|
|
|
* (west of GMT) or 1970-01-01 (east of GMT)
|
|
|
|
*/
|
2017-08-25 21:06:28 +02:00
|
|
|
if (skip_prefix(timestamp, "1969-12-31 ", ×tamp))
|
2017-08-25 21:04:54 +02:00
|
|
|
epoch_hour = 24;
|
2017-08-25 21:06:28 +02:00
|
|
|
else if (skip_prefix(timestamp, "1970-01-01 ", ×tamp))
|
2017-08-25 21:04:54 +02:00
|
|
|
epoch_hour = 0;
|
|
|
|
else
|
|
|
|
return 0;
|
|
|
|
|
2016-04-22 20:55:46 +02:00
|
|
|
if (!stamp) {
|
|
|
|
stamp = xmalloc(sizeof(*stamp));
|
|
|
|
if (regcomp(stamp, stamp_regexp, REG_EXTENDED)) {
|
|
|
|
warning(_("Cannot prepare timestamp regexp %s"),
|
|
|
|
stamp_regexp);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
status = regexec(stamp, timestamp, ARRAY_SIZE(m), m, 0);
|
|
|
|
if (status) {
|
|
|
|
if (status != REG_NOMATCH)
|
|
|
|
warning(_("regexec returned %d for input: %s"),
|
|
|
|
status, timestamp);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-08-25 21:06:28 +02:00
|
|
|
hour = strtol(timestamp, NULL, 10);
|
|
|
|
minute = strtol(timestamp + m[1].rm_so, NULL, 10);
|
2017-08-25 21:04:54 +02:00
|
|
|
|
2016-04-22 20:55:46 +02:00
|
|
|
zoneoffset = strtol(timestamp + m[3].rm_so + 1, (char **) &colon, 10);
|
|
|
|
if (*colon == ':')
|
|