Git Source Code Mirror - This is a publish-only repository and all pull requests are ignored. Please follow Documentation/SubmittingPatches procedure for any of your improvements. https://git-scm.com/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
git/strbuf.c

1203 lines
26 KiB

#include "cache.h"
#include "refs.h"
#include "string-list.h"
#include "utf8.h"
#include "date.h"
int starts_with(const char *str, const char *prefix)
{
for (; ; str++, prefix++)
if (!*prefix)
return 1;
else if (*str != *prefix)
return 0;
}
int istarts_with(const char *str, const char *prefix)
{
for (; ; str++, prefix++)
if (!*prefix)
return 1;
else if (tolower(*str) != tolower(*prefix))
return 0;
}
int skip_to_optional_arg_default(const char *str, const char *prefix,
const char **arg, const char *def)
{
const char *p;
if (!skip_prefix(str, prefix, &p))
return 0;
if (!*p) {
if (arg)
*arg = def;
return 1;
}
if (*p != '=')
return 0;
if (arg)
*arg = p + 1;
return 1;
}
/*
* Used as the default ->buf value, so that people can always assume
* buf is non NULL and ->buf is NUL terminated even for a freshly
* initialized strbuf.
*/
char strbuf_slopbuf[1];
void strbuf_init(struct strbuf *sb, size_t hint)
{
struct strbuf blank = STRBUF_INIT;
memcpy(sb, &blank, sizeof(*sb));
if (hint)
strbuf_grow(sb, hint);
}
void strbuf_release(struct strbuf *sb)
{
if (sb->alloc) {
free(sb->buf);
strbuf_init(sb, 0);
}
}
char *strbuf_detach(struct strbuf *sb, size_t *sz)
{
char *res;
strbuf_grow(sb, 0);
res = sb->buf;
if (sz)
*sz = sb->len;
strbuf_init(sb, 0);
return res;
}
void strbuf_attach(struct strbuf *sb, void *buf, size_t len, size_t alloc)
{
strbuf_release(sb);
sb->buf = buf;
sb->len = len;
sb->alloc = alloc;
strbuf_grow(sb, 0);
sb->buf[sb->len] = '\0';
}
void strbuf_grow(struct strbuf *sb, size_t extra)
{
int new_buf = !sb->alloc;
if (unsigned_add_overflows(extra, 1) ||
unsigned_add_overflows(sb->len, extra + 1))
die("you want to use way too much memory");
if (new_buf)
sb->buf = NULL;
ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
if (new_buf)
sb->buf[0] = '\0';
}
void strbuf_trim(struct strbuf *sb)
{
strbuf_rtrim(sb);
strbuf_ltrim(sb);
}
void strbuf_rtrim(struct strbuf *sb)
{
while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1]))
sb->len--;
sb->buf[sb->len] = '\0';
}
void strbuf_trim_trailing_dir_sep(struct strbuf *sb)
{
while (sb->len > 0 && is_dir_sep((unsigned char)sb->buf[sb->len - 1]))
sb->len--;
sb->buf[sb->len] = '\0';
}
void strbuf_trim_trailing_newline(struct strbuf *sb)
{
if (sb->len > 0 && sb->buf[sb->len - 1] == '\n') {
if (--sb->len > 0 && sb->buf[sb->len - 1] == '\r')
--sb->len;
sb->buf[sb->len] = '\0';
}
}
void strbuf_ltrim(struct strbuf *sb)
{
char *b = sb->buf;
while (sb->len > 0 && isspace(*b)) {
b++;
sb->len--;
}
memmove(sb->buf, b, sb->len);
sb->buf[sb->len] = '\0';
}
int strbuf_reencode(struct strbuf *sb, const char *from, const char *to)
{
char *out;
size_t len;
if (same_encoding(from, to))
return 0;
out = reencode_string_len(sb->buf, sb->len, to, from, &len);
if (!out)
return -1;
strbuf_attach(sb, out, len, len);
return 0;
}
void strbuf_tolower(struct strbuf *sb)
{
char *p = sb->buf, *end = sb->buf + sb->len;
for (; p < end; p++)
*p = tolower(*p);
}
struct strbuf **strbuf_split_buf(const char *str, size_t slen,
int terminator, int max)
{
struct strbuf **ret = NULL;
size_t nr = 0, alloc = 0;
struct strbuf *t;
while (slen) {
int len = slen;
if (max <= 0 || nr + 1 < max) {
const char *end = memchr(str, terminator, slen);
if (end)
len = end - str + 1;
}
t = xmalloc(sizeof(struct strbuf));
strbuf_init(t, len);
strbuf_add(t, str, len);
ALLOC_GROW(ret, nr + 2, alloc);
ret[nr++] = t;
str += len;
slen -= len;
}
ALLOC_GROW(ret, nr + 1, alloc); /* In case string was empty */
ret[nr] = NULL;
return ret;
}
void strbuf_add_separated_string_list(struct strbuf *str,
const char *sep,
struct string_list *slist)
{
struct string_list_item *item;
int sep_needed = 0;
for_each_string_list_item(item, slist) {
if (sep_needed)
strbuf_addstr(str, sep);
strbuf_addstr(str, item->string);
sep_needed = 1;
}
}
void strbuf_list_free(struct strbuf **sbs)
{
struct strbuf **s = sbs;
mailinfo: also free strbuf lists when clearing mailinfo mailinfo.p_hdr_info/s_hdr_info are null-terminated lists of strbuf's, with entries pointing either to NULL or an allocated strbuf. Therefore we need to free those strbuf's (and not just the data they contain) whenever we're done with a given entry. (See handle_header() where those new strbufs are malloc'd.) Once we no longer need the list (and not just its entries) we can switch over to strbuf_list_free() instead of manually iterating over the list, which takes care of those additional details for us. We can only do this in clear_mailinfo() - in handle_commit_message() we are only clearing the array contents but want to reuse the array itself, hence we can't use strbuf_list_free() there. However, strbuf_list_free() cannot handle a NULL input, and the lists we are freeing might be NULL. Therefore we add a NULL check in strbuf_list_free() to make it safe to use with a NULL input (which is a pattern used by some of the other *_free() functions around git). Leak output from t0023: Direct leak of 72 byte(s) in 3 object(s) allocated from: #0 0x49a85d in malloc ../projects/compiler-rt/lib/asan/asan_malloc_linux.cpp:145:3 #1 0x9ac9f4 in do_xmalloc wrapper.c:41:8 #2 0x9ac9ca in xmalloc wrapper.c:62:9 #3 0x7f6cf7 in handle_header mailinfo.c:205:10 #4 0x7f5abf in check_header mailinfo.c:583:4 #5 0x7f5524 in mailinfo mailinfo.c:1197:3 #6 0x4dcc95 in parse_mail builtin/am.c:1167:6 #7 0x4d9070 in am_run builtin/am.c:1732:12 #8 0x4d5b7a in cmd_am builtin/am.c:2398:3 #9 0x4cd91d in run_builtin git.c:467:11 #10 0x4cb5f3 in handle_builtin git.c:719:3 #11 0x4ccf47 in run_argv git.c:808:4 #12 0x4caf49 in cmd_main git.c:939:19 #13 0x69e43e in main common-main.c:52:11 #14 0x7fc1fadfa349 in __libc_start_main (/lib64/libc.so.6+0x24349) SUMMARY: AddressSanitizer: 72 byte(s) leaked in 3 allocation(s). Signed-off-by: Andrzej Hunt <ajrhunt@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2 years ago
if (!s)
return;
while (*s) {
strbuf_release(*s);
free(*s++);
}
free(sbs);
}
int strbuf_cmp(const struct strbuf *a, const struct strbuf *b)
{
size_t len = a->len < b->len ? a->len: b->len;
int cmp = memcmp(a->buf, b->buf, len);
if (cmp)
return cmp;
return a->len < b->len ? -1: a->len != b->len;
}
void strbuf_splice(struct strbuf *sb, size_t pos, size_t len,
const void *data, size_t dlen)
{
if (unsigned_add_overflows(pos, len))
die("you want to use way too much memory");
if (pos > sb->len)
die("`pos' is too far after the end of the buffer");
if (pos + len > sb->len)
die("`pos + len' is too far after the end of the buffer");
if (dlen >= len)
strbuf_grow(sb, dlen - len);
memmove(sb->buf + pos + dlen,
sb->buf + pos + len,
sb->len - pos - len);
memcpy(sb->buf + pos, data, dlen);
strbuf_setlen(sb, sb->len + dlen - len);
}
void strbuf_insert(struct strbuf *sb, size_t pos, const void *data, size_t len)
{
strbuf_splice(sb, pos, 0, data, len);
}
void strbuf_vinsertf(struct strbuf *sb, size_t pos, const char *fmt, va_list ap)
{
int len, len2;
char save;
va_list cp;
if (pos > sb->len)
die("`pos' is too far after the end of the buffer");
va_copy(cp, ap);
len = vsnprintf(sb->buf + sb->len, 0, fmt, cp);
va_end(cp);
if (len < 0)
BUG("your vsnprintf is broken (returned %d)", len);
if (!len)
return; /* nothing to do */
if (unsigned_add_overflows(sb->len, len))
die("you want to use way too much memory");
strbuf_grow(sb, len);
memmove(sb->buf + pos + len, sb->buf + pos, sb->len - pos);
/* vsnprintf() will append a NUL, overwriting one of our characters */
save = sb->buf[pos + len];
len2 = vsnprintf(sb->buf + pos, len + 1, fmt, ap);
sb->buf[pos + len] = save;
if (len2 != len)
BUG("your vsnprintf is broken (returns inconsistent lengths)");
strbuf_setlen(sb, sb->len + len);
}
void strbuf_insertf(struct strbuf *sb, size_t pos, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
strbuf_vinsertf(sb, pos, fmt, ap);
va_end(ap);
}
void strbuf_remove(struct strbuf *sb, size_t pos, size_t len)
{
strbuf_splice(sb, pos, len, "", 0);
}
void strbuf_add(struct strbuf *sb, const void *data, size_t len)
{
strbuf_grow(sb, len);
memcpy(sb->buf + sb->len, data, len);
strbuf_setlen(sb, sb->len + len);
}
void strbuf_addbuf(struct strbuf *sb, const struct strbuf *sb2)
{
strbuf_grow(sb, sb2->len);
memcpy(sb->buf + sb->len, sb2->buf, sb2->len);
strbuf_setlen(sb, sb->len + sb2->len);
}
const char *strbuf_join_argv(struct strbuf *buf,
int argc, const char **argv, char delim)
{
if (!argc)
return buf->buf;
strbuf_addstr(buf, *argv);
while (--argc) {
strbuf_addch(buf, delim);
strbuf_addstr(buf, *(++argv));
}
return buf->buf;
}
void strbuf_addchars(struct strbuf *sb, int c, size_t n)
{
strbuf_grow(sb, n);
memset(sb->buf + sb->len, c, n);
strbuf_setlen(sb, sb->len + n);
}
void strbuf_addf(struct strbuf *sb, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
strbuf_vaddf(sb, fmt, ap);
va_end(ap);
}
static void add_lines(struct strbuf *out,
const char *prefix1,
const char *prefix2,
const char *buf, size_t size)
{
while (size) {
const char *prefix;
const char *next = memchr(buf, '\n', size);
next = next ? (next + 1) : (buf + size);
prefix = ((prefix2 && (buf[0] == '\n' || buf[0] == '\t'))
? prefix2 : prefix1);
strbuf_addstr(out, prefix);
strbuf_add(out, buf, next - buf);
size -= next - buf;
buf = next;
}
strbuf_complete_line(out);
}
void strbuf_add_commented_lines(struct strbuf *out, const char *buf, size_t size)
{
static char prefix1[3];
static char prefix2[2];
if (prefix1[0] != comment_line_char) {
xsnprintf(prefix1, sizeof(prefix1), "%c ", comment_line_char);
xsnprintf(prefix2, sizeof(prefix2), "%c", comment_line_char);
}
add_lines(out, prefix1, prefix2, buf, size);
}
void strbuf_commented_addf(struct strbuf *sb, const char *fmt, ...)
{
va_list params;
struct strbuf buf = STRBUF_INIT;
int incomplete_line = sb->len && sb->buf[sb->len - 1] != '\n';
va_start(params, fmt);
strbuf_vaddf(&buf, fmt, params);
va_end(params);
strbuf_add_commented_lines(sb, buf.buf, buf.len);
if (incomplete_line)
sb->buf[--sb->len] = '\0';
strbuf_release(&buf);
}
void strbuf_vaddf(struct strbuf *sb, const char *fmt, va_list ap)
{
int len;
va_list cp;
if (!strbuf_avail(sb))
strbuf_grow(sb, 64);
va_copy(cp, ap);
len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, cp);
va_end(cp);
if (len < 0)
BUG("your vsnprintf is broken (returned %d)", len);
if (len > strbuf_avail(sb)) {
strbuf_grow(sb, len);
len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
if (len > strbuf_avail(sb))
BUG("your vsnprintf is broken (insatiable)");
}
strbuf_setlen(sb, sb->len + len);
}
void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn,
void *context)
--pretty=format: on-demand format expansion Some of the --pretty=format placeholders expansions are expensive to calculate. This is made worse by the current code's use of interpolate(), which requires _all_ placeholders are to be prepared up front. One way to speed this up is to check which placeholders are present in the format string and to prepare only the expansions that are needed. That still leaves the allocation overhead of interpolate(). Another way is to use a callback based approach together with the strbuf library to keep allocations to a minimum and avoid string copies. That's what this patch does. It introduces a new strbuf function, strbuf_expand(). The function takes a format string, list of placeholder strings, a user supplied function 'fn', and an opaque pointer 'context' to tell 'fn' what thingy to operate on. The function 'fn' is expected to accept a strbuf, a parsed placeholder string and the 'context' pointer, and append the interpolated value for the 'context' thingy, according to the format specified by the placeholder. Thanks to Pierre Habouzit for his suggestion to use strchrnul() and the code surrounding its callsite. And thanks to Junio for most of this commit message. :) Here my measurements of most of Paul Mackerras' test cases that highlighted the performance problem (best of three runs): (master) $ time git log --pretty=oneline >/dev/null real 0m0.390s user 0m0.340s sys 0m0.040s (master) $ time git log --pretty=raw >/dev/null real 0m0.434s user 0m0.408s sys 0m0.016s (master) $ time git log --pretty="format:%H {%P} %ct" >/dev/null real 0m1.347s user 0m0.080s sys 0m1.256s (interp_find_active -- Dscho) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.694s user 0m0.020s sys 0m0.672s (strbuf_expand -- this patch) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.395s user 0m0.352s sys 0m0.028s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com>
15 years ago
{
for (;;) {
const char *percent;
size_t consumed;
--pretty=format: on-demand format expansion Some of the --pretty=format placeholders expansions are expensive to calculate. This is made worse by the current code's use of interpolate(), which requires _all_ placeholders are to be prepared up front. One way to speed this up is to check which placeholders are present in the format string and to prepare only the expansions that are needed. That still leaves the allocation overhead of interpolate(). Another way is to use a callback based approach together with the strbuf library to keep allocations to a minimum and avoid string copies. That's what this patch does. It introduces a new strbuf function, strbuf_expand(). The function takes a format string, list of placeholder strings, a user supplied function 'fn', and an opaque pointer 'context' to tell 'fn' what thingy to operate on. The function 'fn' is expected to accept a strbuf, a parsed placeholder string and the 'context' pointer, and append the interpolated value for the 'context' thingy, according to the format specified by the placeholder. Thanks to Pierre Habouzit for his suggestion to use strchrnul() and the code surrounding its callsite. And thanks to Junio for most of this commit message. :) Here my measurements of most of Paul Mackerras' test cases that highlighted the performance problem (best of three runs): (master) $ time git log --pretty=oneline >/dev/null real 0m0.390s user 0m0.340s sys 0m0.040s (master) $ time git log --pretty=raw >/dev/null real 0m0.434s user 0m0.408s sys 0m0.016s (master) $ time git log --pretty="format:%H {%P} %ct" >/dev/null real 0m1.347s user 0m0.080s sys 0m1.256s (interp_find_active -- Dscho) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.694s user 0m0.020s sys 0m0.672s (strbuf_expand -- this patch) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.395s user 0m0.352s sys 0m0.028s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com>
15 years ago
percent = strchrnul(format, '%');
strbuf_add(sb, format, percent - format);
if (!*percent)
break;
format = percent + 1;
if (*format == '%') {
strbuf_addch(sb, '%');
format++;
continue;
}
consumed = fn(sb, format, context);
if (consumed)
format += consumed;
else
--pretty=format: on-demand format expansion Some of the --pretty=format placeholders expansions are expensive to calculate. This is made worse by the current code's use of interpolate(), which requires _all_ placeholders are to be prepared up front. One way to speed this up is to check which placeholders are present in the format string and to prepare only the expansions that are needed. That still leaves the allocation overhead of interpolate(). Another way is to use a callback based approach together with the strbuf library to keep allocations to a minimum and avoid string copies. That's what this patch does. It introduces a new strbuf function, strbuf_expand(). The function takes a format string, list of placeholder strings, a user supplied function 'fn', and an opaque pointer 'context' to tell 'fn' what thingy to operate on. The function 'fn' is expected to accept a strbuf, a parsed placeholder string and the 'context' pointer, and append the interpolated value for the 'context' thingy, according to the format specified by the placeholder. Thanks to Pierre Habouzit for his suggestion to use strchrnul() and the code surrounding its callsite. And thanks to Junio for most of this commit message. :) Here my measurements of most of Paul Mackerras' test cases that highlighted the performance problem (best of three runs): (master) $ time git log --pretty=oneline >/dev/null real 0m0.390s user 0m0.340s sys 0m0.040s (master) $ time git log --pretty=raw >/dev/null real 0m0.434s user 0m0.408s sys 0m0.016s (master) $ time git log --pretty="format:%H {%P} %ct" >/dev/null real 0m1.347s user 0m0.080s sys 0m1.256s (interp_find_active -- Dscho) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.694s user 0m0.020s sys 0m0.672s (strbuf_expand -- this patch) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.395s user 0m0.352s sys 0m0.028s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com>
15 years ago
strbuf_addch(sb, '%');
}
}
size_t strbuf_expand_literal_cb(struct strbuf *sb,
const char *placeholder,
void *context)
{
int ch;
switch (placeholder[0]) {
case 'n': /* newline */
strbuf_addch(sb, '\n');
return 1;
case 'x':
/* %x00 == NUL, %x0a == LF, etc. */
ch = hex2chr(placeholder + 1);
if (ch < 0)
return 0;
strbuf_addch(sb, ch);
return 3;
}
return 0;
}
size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder,
void *context)
{
struct strbuf_expand_dict_entry *e = context;
size_t len;
for (; e->placeholder && (len = strlen(e->placeholder)); e++) {
if (!strncmp(placeholder, e->placeholder, len)) {
if (e->value)
strbuf_addstr(sb, e->value);
return len;
}
}
return 0;
}
void strbuf_addbuf_percentquote(struct strbuf *dst, const struct strbuf *src)
{
size_t i, len = src->len;
for (i = 0; i < len; i++) {
if (src->buf[i] == '%')
strbuf_addch(dst, '%');
strbuf_addch(dst, src->buf[i]);
}
}
#define URL_UNSAFE_CHARS " <>\"%{}|\\^`:?#[]@!$&'()*+,;="
void strbuf_add_percentencode(struct strbuf *dst, const char *src, int flags)
{
size_t i, len = strlen(src);
for (i = 0; i < len; i++) {
unsigned char ch = src[i];
if (ch <= 0x1F || ch >= 0x7F ||
(ch == '/' && (flags & STRBUF_ENCODE_SLASH)) ||
strchr(URL_UNSAFE_CHARS, ch))
strbuf_addf(dst, "%%%02X", (unsigned char)ch);
else
strbuf_addch(dst, ch);
}
}
size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f)
{
size_t res;
size_t oldalloc = sb->alloc;
strbuf_grow(sb, size);
res = fread(sb->buf + sb->len, 1, size, f);
if (res > 0)
strbuf_setlen(sb, sb->len + res);
else if (oldalloc == 0)
strbuf_release(sb);
return res;
}
ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint)
{
size_t oldlen = sb->len;
size_t oldalloc = sb->alloc;
strbuf_grow(sb, hint ? hint : 8192);
for (;;) {
ssize_t want = sb->alloc - sb->len - 1;
ssize_t got = read_in_full(fd, sb->buf + sb->len, want);
if (got < 0) {
if (oldalloc == 0)
strbuf_release(sb);
else
strbuf_setlen(sb, oldlen);
return -1;
}
sb->len += got;
if (got < want)
break;
strbuf_grow(sb, 8192);
}
sb->buf[sb->len] = '\0';
return sb->len - oldlen;
}
ssize_t strbuf_read_once(struct strbuf *sb, int fd, size_t hint)
{
size_t oldalloc = sb->alloc;
ssize_t cnt;
strbuf_grow(sb, hint ? hint : 8192);
cnt = xread(fd, sb->buf + sb->len, sb->alloc - sb->len - 1);
if (cnt > 0)
strbuf_setlen(sb, sb->len + cnt);
else if (oldalloc == 0)
strbuf_release(sb);
return cnt;
}
ssize_t strbuf_write(struct strbuf *sb, FILE *f)
{
return sb->len ? fwrite(sb->buf, 1, sb->len, f) : 0;
}
#define STRBUF_MAXLINK (2*PATH_MAX)
int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint)
{
size_t oldalloc = sb->alloc;