git/builtin/am.c

2481 lines
62 KiB
C
Raw Normal View History

/*
* Builtin "git am"
*
* Based on git-am.sh by Junio C Hamano.
*/
#define USE_THE_INDEX_COMPATIBILITY_MACROS
#include "cache.h"
#include "config.h"
#include "builtin.h"
#include "exec-cmd.h"
#include "parse-options.h"
#include "dir.h"
#include "run-command.h"
#include "quote.h"
#include "tempfile.h"
#include "lockfile.h"
#include "cache-tree.h"
#include "refs.h"
#include "commit.h"
#include "diff.h"
#include "diffcore.h"
#include "unpack-trees.h"
#include "branch.h"
#include "sequencer.h"
#include "revision.h"
#include "merge-recursive.h"
#include "log-tree.h"
#include "notes-utils.h"
#include "rerere.h"
#include "prompt.h"
#include "mailinfo.h"
builtin/am: use apply API in run_apply() This replaces run_apply() implementation with a new one that uses the apply API that has been previously prepared in apply.c and apply.h. This shoud improve performance a lot in certain cases. As the previous implementation was creating a new `git apply` process to apply each patch, it could be slow on systems like Windows where it is costly to create new processes. Also the new `git apply` process had to read the index from disk, and when the process was done the calling process discarded its own index and read back from disk the new index that had been created by the `git apply` process. This could be very inefficient with big repositories that have big index files, especially when the system decided that it was a good idea to run the `git apply` processes on a different processor core. Also eliminating index reads enables further performance improvements by using: `git update-index --split-index` For example here is a benchmark of a multi hundred commit rebase on the Linux kernel on a Debian laptop with SSD: command: git rebase --onto 1993b17 52bef0c 29dde7c Vanilla "next" without split index: 1m54.953s Vanilla "next" with split index: 1m22.476s This series on top of "next" without split index: 1m12.034s This series on top of "next" with split index: 0m15.678s (using branch "next" from mid April 2016.) Benchmarked-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Christian Couder <chriscool@tuxfamily.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-04 22:18:33 +02:00
#include "apply.h"
#include "string-list.h"
#include "packfile.h"
#include "repository.h"
/**
* Returns the length of the first line of msg.
*/
static int linelen(const char *msg)
{
return strchrnul(msg, '\n') - msg;
}
/**
* Returns true if `str` consists of only whitespace, false otherwise.
*/
static int str_isspace(const char *str)
{
for (; *str; str++)
if (!isspace(*str))
return 0;
return 1;
}
enum patch_format {
PATCH_FORMAT_UNKNOWN = 0,
PATCH_FORMAT_MBOX,
PATCH_FORMAT_STGIT,
PATCH_FORMAT_STGIT_SERIES,
PATCH_FORMAT_HG,
PATCH_FORMAT_MBOXRD
};
enum keep_type {
KEEP_FALSE = 0,
KEEP_TRUE, /* pass -k flag to git-mailinfo */
KEEP_NON_PATCH /* pass -b flag to git-mailinfo */
};
enum scissors_type {
SCISSORS_UNSET = -1,
SCISSORS_FALSE = 0, /* pass --no-scissors to git-mailinfo */
SCISSORS_TRUE /* pass --scissors to git-mailinfo */
};
enum signoff_type {
SIGNOFF_FALSE = 0,
SIGNOFF_TRUE = 1,
SIGNOFF_EXPLICIT /* --signoff was set on the command-line */
};
am: support --show-current-patch=raw as a synonym for--show-current-patch When "git am --show-current-patch" was added in commit 984913a210 ("am: add --show-current-patch", 2018-02-12), "git am" started recommending it as a replacement for .git/rebase-merge/patch. Unfortunately the suggestion is somewhat misguided; for example, the output "git am --show-current-patch" cannot be passed to "git apply" if it is encoded as quoted-printable or base64. To simplify worktree operations and to avoid that users poke into .git, it would be better if "git am" also provided a mode that copies .git/rebase-merge/patch to stdout. One possibility could be to have completely separate options, introducing for example --show-current-message (for .git/rebase-apply/NNNN) and --show-current-diff (for .git/rebase-apply/patch), while possibly deprecating --show-current-patch. That would even remove the need for the first two patches in the series. However, the long common prefix would have prevented using an abbreviated option such as "--show". Therefore, I chose instead to add a string argument to --show-current-patch. The new argument is optional, so that "git am --show-current-patch"'s behavior remains backwards-compatible. The next choice to make is how to handle multiple --show-current-patch options. Right now, something like "git am --abort --show-current-patch" is rejected, and the previous suggestion would likewise have naturally rejected a command line like git am --show-current-message --show-current-diff Therefore, I decided to also reject for example git am --show-current-patch=diff --show-current-patch=raw In other words the whole of --show-current-patch=xxx (including the optional argument) is treated as the command mode. I found this to be more consistent and intuitive, even though it differs from the usual "last one wins" semantics of the git command line. Add the code to parse submodes based on the above design, where for now "raw" is the only valid submode. "raw" prints the full e-mail message just like "git am --show-current-patch". Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-20 15:15:18 +01:00
enum show_patch_type {
SHOW_PATCH_RAW = 0,
SHOW_PATCH_DIFF = 1,
am: support --show-current-patch=raw as a synonym for--show-current-patch When "git am --show-current-patch" was added in commit 984913a210 ("am: add --show-current-patch", 2018-02-12), "git am" started recommending it as a replacement for .git/rebase-merge/patch. Unfortunately the suggestion is somewhat misguided; for example, the output "git am --show-current-patch" cannot be passed to "git apply" if it is encoded as quoted-printable or base64. To simplify worktree operations and to avoid that users poke into .git, it would be better if "git am" also provided a mode that copies .git/rebase-merge/patch to stdout. One possibility could be to have completely separate options, introducing for example --show-current-message (for .git/rebase-apply/NNNN) and --show-current-diff (for .git/rebase-apply/patch), while possibly deprecating --show-current-patch. That would even remove the need for the first two patches in the series. However, the long common prefix would have prevented using an abbreviated option such as "--show". Therefore, I chose instead to add a string argument to --show-current-patch. The new argument is optional, so that "git am --show-current-patch"'s behavior remains backwards-compatible. The next choice to make is how to handle multiple --show-current-patch options. Right now, something like "git am --abort --show-current-patch" is rejected, and the previous suggestion would likewise have naturally rejected a command line like git am --show-current-message --show-current-diff Therefore, I decided to also reject for example git am --show-current-patch=diff --show-current-patch=raw In other words the whole of --show-current-patch=xxx (including the optional argument) is treated as the command mode. I found this to be more consistent and intuitive, even though it differs from the usual "last one wins" semantics of the git command line. Add the code to parse submodes based on the above design, where for now "raw" is the only valid submode. "raw" prints the full e-mail message just like "git am --show-current-patch". Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-20 15:15:18 +01:00
};
struct am_state {
/* state directory path */
char *dir;
/* current and last patch numbers, 1-indexed */
int cur;
int last;
/* commit metadata and message */
char *author_name;
char *author_email;
char *author_date;
char *msg;
size_t msg_len;
/* when --rebasing, records the original commit the patch came from */
struct object_id orig_commit;
/* number of digits in patch filename */
int prec;
/* various operating modes and command line options */
int interactive;
int threeway;
int quiet;
int signoff; /* enum signoff_type */
int utf8;
int keep; /* enum keep_type */
int message_id;
int scissors; /* enum scissors_type */
int quoted_cr; /* enum quoted_cr_action */
struct strvec git_apply_opts;
const char *resolvemsg;
int committer_date_is_author_date;
int ignore_date;
int allow_rerere_autoupdate;
const char *sign_commit;
int rebasing;
};
/**
* Initializes am_state with the default values.
*/
static void am_state_init(struct am_state *state)
{
int gpgsign;
memset(state, 0, sizeof(*state));
state->dir = git_pathdup("rebase-apply");
state->prec = 4;
git_config_get_bool("am.threeway", &state->threeway);
state->utf8 = 1;
git_config_get_bool("am.messageid", &state->message_id);
state->scissors = SCISSORS_UNSET;
state->quoted_cr = quoted_cr_unset;
strvec_init(&state->git_apply_opts);
if (!git_config_get_bool("commit.gpgsign", &gpgsign))
state->sign_commit = gpgsign ? "" : NULL;
}
/**
* Releases memory allocated by an am_state.
*/
static void am_state_release(struct am_state *state)
{
free(state->dir);
free(state->author_name);
free(state->author_email);
free(state->author_date);
free(state->msg);
strvec_clear(&state->git_apply_opts);
}
static int am_option_parse_quoted_cr(const struct option *opt,
const char *arg, int unset)
{
BUG_ON_OPT_NEG(unset);
if (mailinfo_parse_quoted_cr_action(arg, opt->value) != 0)
return error(_("bad action '%s' for '%s'"), arg, "--quoted-cr");
return 0;
}
/**
* Returns path relative to the am_state directory.
*/
static inline const char *am_path(const struct am_state *state, const char *path)
{
return mkpath("%s/%s", state->dir, path);
}
/**
* For convenience to call write_file()
*/
static void write_state_text(const struct am_state *state,
const char *name, const char *string)
{
write_file(am_path(state, name), "%s", string);
}
static void write_state_count(const struct am_state *state,
const char *name, int value)
{
write_file(am_path(state, name), "%d", value);
}
static void write_state_bool(const struct am_state *state,
const char *name, int value)
{
write_state_text(state, name, value ? "t" : "f");
}
/**
* If state->quiet is false, calls fprintf(fp, fmt, ...), and appends a newline
* at the end.
*/
__attribute__((format (printf, 3, 4)))
static void say(const struct am_state *state, FILE *fp, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
if (!state->quiet) {
vfprintf(fp, fmt, ap);
putc('\n', fp);
}
va_end(ap);
}
/**
* Returns 1 if there is an am session in progress, 0 otherwise.
*/
static int am_in_progress(const struct am_state *state)
{
struct stat st;
if (lstat(state->dir, &st) < 0 || !S_ISDIR(st.st_mode))
return 0;
if (lstat(am_path(state, "last"), &st) || !S_ISREG(st.st_mode))
return 0;
if (lstat(am_path(state, "next"), &st) || !S_ISREG(st.st_mode))
return 0;
return 1;
}
/**
* Reads the contents of `file` in the `state` directory into `sb`. Returns the
* number of bytes read on success, -1 if the file does not exist. If `trim` is
* set, trailing whitespace will be removed.
*/
static int read_state_file(struct strbuf *sb, const struct am_state *state,
const char *file, int trim)
{
strbuf_reset(sb);
if (strbuf_read_file(sb, am_path(state, file), 0) >= 0) {
if (trim)
strbuf_trim(sb);
return sb->len;
}
if (errno == ENOENT)
return -1;
die_errno(_("could not read '%s'"), am_path(state, file));
}
/**
* Reads and parses the state directory's "author-script" file, and sets
* state->author_name, state->author_email and state->author_date accordingly.
* Returns 0 on success, -1 if the file could not be parsed.
*
* The author script is of the format:
*
* GIT_AUTHOR_NAME='$author_name'
* GIT_AUTHOR_EMAIL='$author_email'
* GIT_AUTHOR_DATE='$author_date'
*
* where $author_name, $author_email and $author_date are quoted. We are strict
* with our parsing, as the file was meant to be eval'd in the old git-am.sh
* script, and thus if the file differs from what this function expects, it is
* better to bail out than to do something that the user does not expect.
*/
static int read_am_author_script(struct am_state *state)
{
const char *filename = am_path(state, "author-script");
assert(!state->author_name);
assert(!state->author_email);
assert(!state->author_date);
return read_author_script(filename, &state->author_name,
&state->author_email, &state->author_date, 1);
}
/**
* Saves state->author_name, state->author_email and state->author_date in the
* state directory's "author-script" file.
*/
static void write_author_script(const struct am_state *state)
{
struct strbuf sb = STRBUF_INIT;
strbuf_addstr(&sb, "GIT_AUTHOR_NAME=");
sq_quote_buf(&sb, state->author_name);
strbuf_addch(&sb, '\n');
strbuf_addstr(&sb, "GIT_AUTHOR_EMAIL=");
sq_quote_buf(&sb, state->author_email);
strbuf_addch(&sb, '\n');
strbuf_addstr(&sb, "GIT_AUTHOR_DATE=");
sq_quote_buf(&sb, state->author_date);
strbuf_addch(&sb, '\n');
write_state_text(state, "author-script", sb.buf);
strbuf_release(&sb);
}
/**
* Reads the commit message from the state directory's "final-commit" file,
* setting state->msg to its contents and state->msg_len to the length of its
* contents in bytes.
*
* Returns 0 on success, -1 if the file does not exist.
*/
static int read_commit_msg(struct am_state *state)
{
struct strbuf sb = STRBUF_INIT;
assert(!state->msg);
if (read_state_file(&sb, state, "final-commit", 0) < 0) {
strbuf_release(&sb);
return -1;
}
state->msg = strbuf_detach(&sb, &state->msg_len);
return 0;
}
/**
* Saves state->msg in the state directory's "final-commit" file.
*/
static void write_commit_msg(const struct am_state *state)
{
const char *filename = am_path(state, "final-commit");
write_file_buf(filename, state->msg, state->msg_len);
}
/**
* Loads state from disk.
*/
static void am_load(struct am_state *state)
{
struct strbuf sb = STRBUF_INIT;
if (read_state_file(&sb, state, "next", 1) < 0)
BUG("state file 'next' does not exist");
state->cur = strtol(sb.buf, NULL, 10);
if (read_state_file(&sb, state, "last", 1) < 0)
BUG("state file 'last' does not exist");
state->last = strtol(sb.buf, NULL, 10);
if (read_am_author_script(state) < 0)
die(_("could not parse author script"));
read_commit_msg(state);
if (read_state_file(&sb, state, "original-commit", 1) < 0)
oidclr(&state->orig_commit);
else if (get_oid_hex(sb.buf, &state->orig_commit) < 0)
die(_("could not parse %s"), am_path(state, "original-commit"));
read_state_file(&sb, state, "threeway", 1);
state->threeway = !strcmp(sb.buf, "t");
read_state_file(&sb, state, "quiet", 1);
state->quiet = !strcmp(sb.buf, "t");
read_state_file(&sb, state, "sign", 1);
state->signoff = !strcmp(sb.buf, "t");
read_state_file(&sb, state, "utf8", 1);
state->utf8 = !strcmp(sb.buf, "t");
if (file_exists(am_path(state, "rerere-autoupdate"))) {
read_state_file(&sb, state, "rerere-autoupdate", 1);
state->allow_rerere_autoupdate = strcmp(sb.buf, "t") ?
RERERE_NOAUTOUPDATE : RERERE_AUTOUPDATE;
} else {
state->allow_rerere_autoupdate = 0;
}
read_state_file(&sb, state, "keep", 1);
if (!strcmp(sb.buf, "t"))
state->keep = KEEP_TRUE;
else if (!strcmp(sb.buf, "b"))
state->keep = KEEP_NON_PATCH;
else
state->keep = KEEP_FALSE;
read_state_file(&sb, state, "messageid", 1);
state->message_id = !strcmp(sb.buf, "t");
read_state_file(&sb, state, "scissors", 1);
if (!strcmp(sb.buf, "t"))
state->scissors = SCISSORS_TRUE;
else if (!strcmp(sb.buf, "f"))
state->scissors = SCISSORS_FALSE;
else
state->scissors = SCISSORS_UNSET;
read_state_file(&sb, state, "quoted-cr", 1);
if (!*sb.buf)
state->quoted_cr = quoted_cr_unset;
else if (mailinfo_parse_quoted_cr_action(sb.buf, &state->quoted_cr) != 0)
die(_("could not parse %s"), am_path(state, "quoted-cr"));
read_state_file(&sb, state, "apply-opt", 1);
strvec_clear(&state->git_apply_opts);
if (sq_dequote_to_strvec(sb.buf, &state->git_apply_opts) < 0)
die(_("could not parse %s"), am_path(state, "apply-opt"));
state->rebasing = !!file_exists(am_path(state, "rebasing"));
strbuf_release(&sb);
}
/**
* Removes the am_state directory, forcefully terminating the current am
* session.
*/
static void am_destroy(const struct am_state *state)
{
struct strbuf sb = STRBUF_INIT;
strbuf_addstr(&sb, state->dir);
remove_dir_recursively(&sb, 0);
strbuf_release(&sb);
}
/**
* Runs applypatch-msg hook. Returns its exit code.
*/
static int run_applypatch_msg_hook(struct am_state *state)
{
int ret;
assert(state->msg);
ret = run_hook_le(NULL, "applypatch-msg", am_path(state, "final-commit"), NULL);
if (!ret) {
FREE_AND_NULL(state->msg);
if (read_commit_msg(state) < 0)
die(_("'%s' was deleted by the applypatch-msg hook"),
am_path(state, "final-commit"));
}
return ret;
}
/**
* Runs post-rewrite hook. Returns it exit code.
*/
static int run_post_rewrite_hook(const struct am_state *state)
{
struct child_process cp = CHILD_PROCESS_INIT;
const char *hook = find_hook("post-rewrite");
int ret;
if (!hook)
return 0;
strvec_push(&cp.args, hook);
strvec_push(&cp.args, "rebase");
cp.in = xopen(am_path(state, "rewritten"), O_RDONLY);
cp.stdout_to_stderr = 1;
cp.trace2_hook_name = "post-rewrite";
ret = run_command(&cp);
close(cp.in);
return ret;
}
/**
* Reads the state directory's "rewritten" file, and copies notes from the old
* commits listed in the file to their rewritten commits.
*
* Returns 0 on success, -1 on failure.
*/
static int copy_notes_for_rebase(const struct am_state *state)
{
struct notes_rewrite_cfg *c;
struct strbuf sb = STRBUF_INIT;
const char *invalid_line = _("Malformed input line: '%s'.");
const char *msg = "Notes added by 'git rebase'";
FILE *fp;
int ret = 0;
assert(state->rebasing);
c = init_copy_notes_for_rewrite("rebase");
if (!c)
return 0;
fp = xfopen(am_path(state, "rewritten"), "r");
while (!strbuf_getline_lf(&sb, fp)) {
struct object_id from_obj, to_obj;
const char *p;
if (sb.len != the_hash_algo->hexsz * 2 + 1) {
ret = error(invalid_line, sb.buf);
goto finish;
}
if (parse_oid_hex(sb.buf, &from_obj, &p)) {
ret = error(invalid_line, sb.buf);
goto finish;
}
if (*p != ' ') {
ret = error(invalid_line, sb.buf);
goto finish;
}
if (get_oid_hex(p + 1, &to_obj)) {
ret = error(invalid_line, sb.buf);
goto finish;
}
if (copy_note_for_rewrite(c, &from_obj, &to_obj))
ret = error(_("Failed to copy notes from '%s' to '%s'"),
oid_to_hex(&from_obj), oid_to_hex(&to_obj));
}
finish:
finish_copy_notes_for_rewrite(the_repository, c, msg);
fclose(fp);
strbuf_release(&sb);
return ret;
}
/**
* Determines if the file looks like a piece of RFC2822 mail by grabbing all
* non-indented lines and checking if they look like they begin with valid
* header field names.
*
* Returns 1 if the file looks like a piece of mail, 0 otherwise.
*/
static int is_mail(FILE *fp)
{
const char *header_regex = "^[!-9;-~]+:";
struct strbuf sb = STRBUF_INIT;
regex_t regex;
int ret = 1;
if (fseek(fp, 0L, SEEK_SET))
die_errno(_("fseek failed"));
if (regcomp(&regex, header_regex, REG_NOSUB | REG_EXTENDED))
die("invalid pattern: %s", header_regex);
while (!strbuf_getline(&sb, fp)) {
if (!sb.len)
break; /* End of header */
/* Ignore indented folded lines */
if (*sb.buf == '\t' || *sb.buf == ' ')
continue;
/* It's a header if it matches header_regex */
if (regexec(&regex, sb.buf, 0, NULL, 0)) {
ret = 0;
goto done;
}
}
done:
regfree(&regex);
strbuf_release(&sb);
return ret;
}
/**
* Attempts to detect the patch_format of the patches contained in `paths`,
* returning the PATCH_FORMAT_* enum value. Returns PATCH_FORMAT_UNKNOWN if
* detection fails.
*/
static int detect_patch_format(const char **paths)
{
enum patch_format ret = PATCH_FORMAT_UNKNOWN;
struct strbuf l1 = STRBUF_INIT;
struct strbuf l2 = STRBUF_INIT;
struct strbuf l3 = STRBUF_INIT;
FILE *fp;
/*
* We default to mbox format if input is from stdin and for directories
*/
if (!*paths || !strcmp(*paths, "-") || is_directory(*paths))
return PATCH_FORMAT_MBOX;
/*
* Otherwise, check the first few lines of the first patch, starting
* from the first non-blank line, to try to detect its format.
*/
fp = xfopen(*paths, "r");
while (!strbuf_getline(&l1, fp)) {
if (l1.len)
break;
}
if (starts_with(l1.buf, "From ") || starts_with(l1.buf, "From: ")) {
ret = PATCH_FORMAT_MBOX;
goto done;
}
if (starts_with(l1.buf, "# This series applies on GIT commit")) {
ret = PATCH_FORMAT_STGIT_SERIES;
goto done;
}