From ed79b2cf034b81473dd1fa9648593b245c07daea Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 23 May 2017 15:51:32 -0400 Subject: handle_revision_arg: reset "dotdot" consistently When we are parsing a range like "a..b", we write a temporary NUL over the first ".", so that we can access the names "a" and "b" as C strings. But our restoration of the original "." is done at inconsistent times, which can lead to confusing results. For most calls, we restore the "." after we resolve the names, but before we call verify_non_filename(). This means that when we later call add_pending_object(), the name for the left-hand "a" has been re-expanded to "a..b". You can see this with: git log --source a...b where "b" will be correctly marked with "b", but "a" will be marked with "a...b". Likewise with "a..b" (though you need to use --boundary to even see "a" at all in that case). To top off the confusion, when the REVARG_CANNOT_BE_FILENAME flag is set, we skip the non-filename check, and leave the NUL in place. That means we do report the correct name for "a" in the pending array. But some code paths try to show the whole "a...b" name in error messages, and these erroneously show only "a" instead of "a...b". E.g.: $ git cherry-pick HEAD:foo...HEAD:foo error: object d95f3ad14dee633a758d2e331151e950dd13e4ed is a blob, not a commit error: object d95f3ad14dee633a758d2e331151e950dd13e4ed is a blob, not a commit fatal: Invalid symmetric difference expression HEAD:foo (That last message should be "HEAD:foo...HEAD:foo"; I used cherry-pick because it passes the CANNOT_BE_FILENAME flag). As an interesting side note, cherry-pick actually looks at and re-resolves the arguments from the pending->name fields. So it would have been visibly broken by the first bug, but the effect was canceled out by the second one. This patch makes the whole function consistent by re-writing the NUL immediately after calling verify_non_filename(), and then restoring the "." as appropriate in some error-printing and early-return code paths. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/revision.c b/revision.c index 7ff61ff..2bcd60a 100644 --- a/revision.c +++ b/revision.c @@ -1477,12 +1477,14 @@ int handle_revision_arg(const char *arg_, struct rev_info *revs, int flags, unsi if (!cant_be_filename) { *dotdot = '.'; verify_non_filename(revs->prefix, arg); + *dotdot = '\0'; } a_obj = parse_object(from_sha1); b_obj = parse_object(sha1); if (!a_obj || !b_obj) { missing: + *dotdot = '.'; if (revs->ignore_missing) return 0; die(symmetric @@ -1525,6 +1527,7 @@ int handle_revision_arg(const char *arg_, struct rev_info *revs, int flags, unsi REV_CMD_RIGHT, flags); add_pending_object(revs, a_obj, this); add_pending_object(revs, b_obj, next); + *dotdot = '.'; return 0; } *dotdot = '.'; diff --git a/t/t4202-log.sh b/t/t4202-log.sh index f577990..6da1bbe 100755 --- a/t/t4202-log.sh +++ b/t/t4202-log.sh @@ -1380,4 +1380,13 @@ test_expect_success 'log --source paints tag names' ' test_cmp expect actual ' +test_expect_success 'log --source paints symmetric ranges' ' + cat >expect <<-\EOF && + 09e12a9 source-b three + 8e393e1 source-a two + EOF + git log --oneline --source source-a...source-b >actual && + test_cmp expect actual +' + test_done -- cgit v0.10.2-6-g49f6 From 1d6c93817bf22d6bf279bac302911cc93f63046c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 19 May 2017 08:48:46 -0400 Subject: handle_revision_arg: simplify commit reference lookups The "dotdot" range parser avoids calling lookup_commit_reference() if we are directly fed two commits. But its casts are unnecessarily complex; that function will just return a commit we pass into it. Just calling the function all the time is much simpler, and doesn't do any significant extra work (the object is already parsed, and deref_tag() on a non-tag is a noop; we do incur one extra lookup_object() call, but that's fairly trivial). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/revision.c b/revision.c index 2bcd60a..dc32812 100644 --- a/revision.c +++ b/revision.c @@ -1500,12 +1500,8 @@ int handle_revision_arg(const char *arg_, struct rev_info *revs, int flags, unsi struct commit *a, *b; struct commit_list *exclude; - a = (a_obj->type == OBJ_COMMIT - ? (struct commit *)a_obj - : lookup_commit_reference(a_obj->oid.hash)); - b = (b_obj->type == OBJ_COMMIT - ? (struct commit *)b_obj - : lookup_commit_reference(b_obj->oid.hash)); + a = lookup_commit_reference(a_obj->oid.hash); + b = lookup_commit_reference(b_obj->oid.hash); if (!a || !b) goto missing; exclude = get_merge_bases(a, b); -- cgit v0.10.2-6-g49f6 From f632dedd8d29da20d546612d908a5c1b2ebf37aa Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 19 May 2017 08:50:07 -0400 Subject: handle_revision_arg: stop using "dotdot" as a generic pointer The handle_revision_arg() function has a "dotdot" variable that it uses to find a ".." or "..." in the argument. If we don't find one, we look for other marks, like "^!". But we just keep re-using the "dotdot" variable, which is confusing. Let's introduce a separate "mark" variable that can be used for these other marks. They still reuse the same variable, but at least the name is no longer actively misleading. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/revision.c b/revision.c index dc32812..c98804a 100644 --- a/revision.c +++ b/revision.c @@ -1433,6 +1433,7 @@ int handle_revision_arg(const char *arg_, struct rev_info *revs, int flags, unsi { struct object_context oc; char *dotdot; + char *mark; struct object *object; unsigned char sha1[20]; int local_flags; @@ -1529,33 +1530,33 @@ int handle_revision_arg(const char *arg_, struct rev_info *revs, int flags, unsi *dotdot = '.'; } - dotdot = strstr(arg, "^@"); - if (dotdot && !dotdot[2]) { - *dotdot = 0; + mark = strstr(arg, "^@"); + if (mark && !mark[2]) { + *mark = 0; if (add_parents_only(revs, arg, flags, 0)) return 0; - *dotdot = '^'; + *mark = '^'; } - dotdot = strstr(arg, "^!"); - if (dotdot && !dotdot[2]) { - *dotdot = 0; + mark = strstr(arg, "^!"); + if (mark && !mark[2]) { + *mark = 0; if (!add_parents_only(revs, arg, flags ^ (UNINTERESTING | BOTTOM), 0)) - *dotdot = '^'; + *mark = '^'; } - dotdot = strstr(arg, "^-"); - if (dotdot) { + mark = strstr(arg, "^-"); + if (mark) { int exclude_parent = 1; - if (dotdot[2]) { + if (mark[2]) { char *end; - exclude_parent = strtoul(dotdot + 2, &end, 10); + exclude_parent = strtoul(mark + 2, &end, 10); if (*end != '\0' || !exclude_parent) return -1; } - *dotdot = 0; + *mark = 0; if (!add_parents_only(revs, arg, flags ^ (UNINTERESTING | BOTTOM), exclude_parent)) - *dotdot = '^'; + *mark = '^'; } local_flags = 0; -- cgit v0.10.2-6-g49f6 From d89797feff053bba939b62ee442f56e3fc98062b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 19 May 2017 08:51:40 -0400 Subject: handle_revision_arg: hoist ".." check out of range parsing Since 003c84f6d (specifying ranges: we did not mean to make ".." an empty set, 2011-05-02), we treat the argument ".." specially. We detect it by noticing that both sides of the range are empty, and that this is a non-symmetric two-dot range. While correct, this makes the code overly complicated. We can just detect ".." up front before we try to do further parsing. This avoids having to de-munge the NUL from dotdot, and lets us eliminate an extra const array (which we needed only to do direct pointer comparisons). It also removes the one code path from the range-parsing conditional that requires us to return -1. That will make it simpler to pull the dotdot parsing out into its own function. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/revision.c b/revision.c index c98804a..2664f08 100644 --- a/revision.c +++ b/revision.c @@ -1443,6 +1443,14 @@ int handle_revision_arg(const char *arg_, struct rev_info *revs, int flags, unsi flags = flags & UNINTERESTING ? flags | BOTTOM : flags & ~BOTTOM; + if (!cant_be_filename && !strcmp(arg, "..")) { + /* + * Just ".."? That is not a range but the + * pathspec for the parent directory. + */ + return -1; + } + dotdot = strstr(arg, ".."); if (dotdot) { unsigned char from_sha1[20]; @@ -1450,27 +1458,15 @@ int handle_revision_arg(const char *arg_, struct rev_info *revs, int flags, unsi const char *this = arg; int symmetric = *next == '.'; unsigned int flags_exclude = flags ^ (UNINTERESTING | BOTTOM); - static const char head_by_default[] = "HEAD"; unsigned int a_flags; *dotdot = 0; next += symmetric; if (!*next) - next = head_by_default; + next = "HEAD"; if (dotdot == arg) - this = head_by_default; - if (this == head_by_default && next == head_by_default && - !symmetric) { - /* - * Just ".."? That is not a range but the - * pathspec for the parent directory. - */ - if (!cant_be_filename) { - *dotdot = '.'; - return -1; - } - } + this = "HEAD"; if (!get_sha1_committish(this, from_sha1) && !get_sha1_committish(next, sha1)) { struct object *a_obj, *b_obj; -- cgit v0.10.2-6-g49f6 From 62faad5aa5441942c333c15f3afa03a6e2dd994d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 19 May 2017 08:52:00 -0400 Subject: handle_revision_arg: add handle_dotdot() helper The handle_revision_arg function is rather long, and a big chunk of it is handling the range operators. Let's pull that out to a separate helper. While we're doing so, we can clean up a few of the rough edges that made the flow hard to follow: - instead of manually restoring *dotdot (that we overwrote with a NUL), do the real work in a sub-helper, which makes it clear that the munge/restore lines are a matched pair - eliminate a goto which wasn't actually used for control flow, but only to avoid duplicating a few lines (instead, those lines are pushed into another helper function) - use early returns instead of deep nesting - consistently name all variables for the left-hand side of the range as "a" (rather than "this" or "from") and the right-hand side as "b" (rather than "next", or using the unadorned "sha1" or "flags" from the main function). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/revision.c b/revision.c index 2664f08..4970769 100644 --- a/revision.c +++ b/revision.c @@ -1429,10 +1429,109 @@ static void prepare_show_merge(struct rev_info *revs) revs->limited = 1; } +static int dotdot_missing(const char *arg, char *dotdot, + struct rev_info *revs, int symmetric) +{ + if (revs->ignore_missing) + return 0; + /* de-munge so we report the full argument */ + *dotdot = '.'; + die(symmetric + ? "Invalid symmetric difference expression %s" + : "Invalid revision range %s", arg); +} + +static int handle_dotdot_1(const char *arg, char *dotdot, + struct rev_info *revs, int flags, + int cant_be_filename) +{ + const char *a_name, *b_name; + struct object_id a_oid, b_oid; + struct object *a_obj, *b_obj; + unsigned int a_flags, b_flags; + int symmetric = 0; + unsigned int flags_exclude = flags ^ (UNINTERESTING | BOTTOM); + + a_name = arg; + if (!*a_name) + a_name = "HEAD"; + + b_name = dotdot + 2; + if (*b_name == '.') { + symmetric = 1; + b_name++; + } + if (!*b_name) + b_name = "HEAD"; + + if (get_sha1_committish(a_name, a_oid.hash) || + get_sha1_committish(b_name, b_oid.hash)) + return -1; + + if (!cant_be_filename) { + *dotdot = '.'; + verify_non_filename(revs->prefix, arg); + *dotdot = '\0'; + } + + a_obj = parse_object(a_oid.hash); + b_obj = parse_object(b_oid.hash); + if (!a_obj || !b_obj) + return dotdot_missing(arg, dotdot, revs, symmetric); + + if (!symmetric) { + /* just A..B */ + b_flags = flags; + a_flags = flags_exclude; + } else { + /* A...B -- find merge bases between the two */ + struct commit *a, *b; + struct commit_list *exclude; + + a = lookup_commit_reference(a_obj->oid.hash); + b = lookup_commit_reference(b_obj->oid.hash); + if (!a || !b) + return dotdot_missing(arg, dotdot, revs, symmetric); + + exclude = get_merge_bases(a, b); + add_rev_cmdline_list(revs, exclude, REV_CMD_MERGE_BASE, + flags_exclude); + add_pending_commit_list(revs, exclude, flags_exclude); + free_commit_list(exclude); + + b_flags = flags; + a_flags = flags | SYMMETRIC_LEFT; + } + + a_obj->flags |= a_flags; + b_obj->flags |= b_flags; + add_rev_cmdline(revs, a_obj, a_name, REV_CMD_LEFT, a_flags); + add_rev_cmdline(revs, b_obj, b_name, REV_CMD_RIGHT, b_flags); + add_pending_object(revs, a_obj, a_name); + add_pending_object(revs, b_obj, b_name); + return 0; +} + +static int handle_dotdot(const char *arg, + struct rev_info *revs, int flags, + int cant_be_filename) +{ + char *dotdot = strstr(arg, ".."); + int ret; + + if (!dotdot) + return -1; + + *dotdot = '\0'; + ret = handle_dotdot_1(arg, dotdot, revs, flags, cant_be_filename); + *dotdot = '.'; + + return ret; +} + int handle_revision_arg(const char *arg_, struct rev_info *revs, int flags, unsigned revarg_opt) { struct object_context oc; - char *dotdot; char *mark; struct object *object; unsigned char sha1[20]; @@ -1451,80 +1550,8 @@ int handle_revision_arg(const char *arg_, struct rev_info *revs, int flags, unsi return -1; } - dotdot = strstr(arg, ".."); - if (dotdot) { - unsigned char from_sha1[20]; - const char *next = dotdot + 2; - const char *this = arg; - int symmetric = *next == '.'; - unsigned int flags_exclude = flags ^ (UNINTERESTING | BOTTOM); - unsigned int a_flags; - - *dotdot = 0; - next += symmetric; - - if (!*next) - next = "HEAD"; - if (dotdot == arg) - this = "HEAD"; - if (!get_sha1_committish(this, from_sha1) && - !get_sha1_committish(next, sha1)) { - struct object *a_obj, *b_obj; - - if (!cant_be_filename) { - *dotdot = '.'; - verify_non_filename(revs->prefix, arg); - *dotdot = '\0'; - } - - a_obj = parse_object(from_sha1); - b_obj = parse_object(sha1); - if (!a_obj || !b_obj) { - missing: - *dotdot = '.'; - if (revs->ignore_missing) - return 0; - die(symmetric - ? "Invalid symmetric difference expression %s" - : "Invalid revision range %s", arg); - } - - if (!symmetric) { - /* just A..B */ - a_flags = flags_exclude; - } else { - /* A...B -- find merge bases between the two */ - struct commit *a, *b; - struct commit_list *exclude; - - a = lookup_commit_reference(a_obj->oid.hash); - b = lookup_commit_reference(b_obj->oid.hash); - if (!a || !b) - goto missing; - exclude = get_merge_bases(a, b); - add_rev_cmdline_list(revs, exclude, - REV_CMD_MERGE_BASE, - flags_exclude); - add_pending_commit_list(revs, exclude, - flags_exclude); - free_commit_list(exclude); - - a_flags = flags | SYMMETRIC_LEFT; - } - - a_obj->flags |= a_flags; - b_obj->flags |= flags; - add_rev_cmdline(revs, a_obj, this, - REV_CMD_LEFT, a_flags); - add_rev_cmdline(revs, b_obj, next, - REV_CMD_RIGHT, flags); - add_pending_object(revs, a_obj, this); - add_pending_object(revs, b_obj, next); - *dotdot = '.'; - return 0; - } - *dotdot = '.'; - } + if (!handle_dotdot(arg, revs, flags, revarg_opt)) + return 0; mark = strstr(arg, "^@"); if (mark && !mark[2]) { -- cgit v0.10.2-6-g49f6 From c0a487eafb63301004af424bf02b1951abdc4da7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 19 May 2017 08:52:06 -0400 Subject: sha1_name: consistently refer to object_context as "oc" An early version of the patch to add object_context used the name object_resolve_context. This was later shortened to just object_context, but the "orc" variable name stuck in a few places. Let's use "oc", which is used elsewhere in the code. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/cache.h b/cache.h index e1f0e18..6cae30f 100644 --- a/cache.h +++ b/cache.h @@ -1363,7 +1363,7 @@ extern int get_sha1_tree(const char *str, unsigned char *sha1); extern int get_sha1_treeish(const char *str, unsigned char *sha1); extern int get_sha1_blob(const char *str, unsigned char *sha1); extern void maybe_die_on_misspelt_object_name(const char *name, const char *prefix); -extern int get_sha1_with_context(const char *str, unsigned flags, unsigned char *sha1, struct object_context *orc); +extern int get_sha1_with_context(const char *str, unsigned flags, unsigned char *sha1, struct object_context *oc); extern int get_oid(const char *str, struct object_id *oid); diff --git a/sha1_name.c b/sha1_name.c index 8eec9f7..179f214 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -1638,9 +1638,9 @@ void maybe_die_on_misspelt_object_name(const char *name, const char *prefix) get_sha1_with_context_1(name, GET_SHA1_ONLY_TO_DIE, prefix, sha1, &oc); } -int get_sha1_with_context(const char *str, unsigned flags, unsigned char *sha1, struct object_context *orc) +int get_sha1_with_context(const char *str, unsigned flags, unsigned char *sha1, struct object_context *oc) { if (flags & GET_SHA1_FOLLOW_SYMLINKS && flags & GET_SHA1_ONLY_TO_DIE) die("BUG: incompatible flags for get_sha1_with_context"); - return get_sha1_with_context_1(str, flags, NULL, sha1, orc); + return get_sha1_with_context_1(str, flags, NULL, sha1, oc); } -- cgit v0.10.2-6-g49f6 From d72cae12b9a7bba3a6626e0b5805955eafdefcc6 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 19 May 2017 08:52:25 -0400 Subject: get_sha1_with_context: always initialize oc->symlink_path The get_sha1_with_context() function zeroes out the oc->symlink_path strbuf, but doesn't use strbuf_init() to set up the usual invariants (like pointing to the slopbuf). We don't actually write to the oc->symlink_path strbuf unless we call get_tree_entry_follow_symlinks(), and that function does initialize it. However, readers may still look at the zero'd strbuf. In practice this isn't a triggerable bug. The only caller that looks at it only does so when the mode we found is 0. This doesn't happen for non-tree-entries (where we return S_IFINVALID). A broken tree entry could have a mode of 0, but canon_mode() quietly rewrites that into S_IFGITLINK. So the "0" mode should only come up when we did indeed find a symlink. This is mostly just an accident of how the code happens to work, though. Let's future-proof ourselves to make sure the strbuf is properly initialized for all calls (it's only a few struct member assignments, not a heap allocation). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/sha1_name.c b/sha1_name.c index 179f214..e7b0393 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -1511,6 +1511,7 @@ static int get_sha1_with_context_1(const char *name, memset(oc, 0, sizeof(*oc)); oc->mode = S_IFINVALID; + strbuf_init(&oc->symlink_path, 0); ret = get_sha1_1(name, namelen, sha1, flags); if (!ret) return ret; diff --git a/tree-walk.c b/tree-walk.c index ff77605..c7ecfc8 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -589,7 +589,6 @@ enum follow_symlinks_result get_tree_entry_follow_symlinks(unsigned char *tree_s int i; init_tree_desc(&t, NULL, 0UL); - strbuf_init(result_path, 0); strbuf_addstr(&namebuf, name); hashcpy(current_tree_sha1, tree_sha1); -- cgit v0.10.2-6-g49f6 From dc944b65f1d13e258edc88a7608e2fe957ec334e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 19 May 2017 08:54:43 -0400 Subject: get_sha1_with_context: dynamically allocate oc->path When a sha1 lookup returns the tree path via "struct object_context", it just copies it into a fixed-size buffer. This means the result can be truncated, and it means our "struct object_context" consumes a lot of stack space. Instead, let's allocate a string on the heap. Because most callers don't care about this information, we'll avoid doing it by default (so they don't all have to start calling free() on the result). There are basically two options for the caller to signal to us that it's interested: 1. By setting a pointer to storage in the object_context. 2. By passing a flag in another parameter. Doing (1) would match the way that sha1_object_info_extended() works. But it would mean that every caller would have to initialize the object_context, which they don't currently have to do. This patch does (2), and adds a new bit to the function's flags field. All of the callers that look at the "path" field are updated to pass the new flag. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 1890d7a..4217095 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -61,7 +61,8 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name, if (unknown_type) flags |= LOOKUP_UNKNOWN_OBJECT; - if (get_sha1_with_context(obj_name, 0, oid.hash, &obj_context)) + if (get_sha1_with_context(obj_name, GET_SHA1_RECORD_PATH, + oid.hash, &obj_context)) die("Not a valid object name %s", obj_name); if (!path) @@ -165,6 +166,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name, die("git cat-file %s: bad file", obj_name); write_or_die(1, buf, size); + free(obj_context.path); return 0; } diff --git a/builtin/grep.c b/builtin/grep.c index 3ffb5b4..254c1c7 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1190,7 +1190,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix) break; } - if (get_sha1_with_context(arg, 0, oid.hash, &oc)) { + if (get_sha1_with_context(arg, GET_SHA1_RECORD_PATH, + oid.hash, &oc)) { if (seen_dashdash) die(_("unable to resolve revision: %s"), arg); break; @@ -1200,6 +1201,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) if (!seen_dashdash) verify_non_filename(prefix, arg); add_object_array_with_path(object, arg, &list, oc.mode, oc.path); + free(oc.path); } /* diff --git a/builtin/log.c b/builtin/log.c index b3b10cc..1db016b 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -483,16 +483,20 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c !DIFF_OPT_TST(&rev->diffopt, ALLOW_TEXTCONV)) return stream_blob_to_fd(1, oid, NULL, 0); - if (get_sha1_with_context(obj_name, 0, oidc.hash, &obj_context)) + if (get_sha1_with_context(obj_name, GET_SHA1_RECORD_PATH, + oidc.hash, &obj_context)) die(_("Not a valid object name %s"), obj_name); - if (!obj_context.path[0] || - !textconv_object(obj_context.path, obj_context.mode, &oidc, 1, &buf, &size)) + if (!obj_context.path || + !textconv_object(obj_context.path, obj_context.mode, &oidc, 1, &buf, &size)) { + free(obj_context.path); return stream_blob_to_fd(1, oid, NULL, 0); + } if (!buf) die(_("git show %s: bad file"), obj_name); write_or_die(1, buf, size); + free(obj_context.path); return 0; } diff --git a/cache.h b/cache.h index 6cae30f..52b91f5 100644 --- a/cache.h +++ b/cache.h @@ -1333,13 +1333,18 @@ static inline int hex2chr(const char *s) struct object_context { unsigned char tree[20]; - char path[PATH_MAX]; unsigned mode; /* * symlink_path is only used by get_tree_entry_follow_symlinks, * and only for symlinks that point outside the repository. */ struct strbuf symlink_path; + /* + * If GET_SHA1_RECORD_PATH is set, this will record path (if any) + * found when resolving the name. The caller is responsible for + * releasing the memory. + */ + char *path; }; #define GET_SHA1_QUIETLY 01 @@ -1349,6 +1354,7 @@ struct object_context { #define GET_SHA1_TREEISH 020 #define GET_SHA1_BLOB 040 #define GET_SHA1_FOLLOW_SYMLINKS 0100 +#define GET_SHA1_RECORD_PATH 0200 #define GET_SHA1_ONLY_TO_DIE 04000 #define GET_SHA1_DISAMBIGUATORS \ diff --git a/sha1_name.c b/sha1_name.c index e7b0393..5e2ec37 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -1550,7 +1550,8 @@ static int get_sha1_with_context_1(const char *name, namelen = strlen(cp); } - strlcpy(oc->path, cp, sizeof(oc->path)); + if (flags & GET_SHA1_RECORD_PATH) + oc->path = xstrdup(cp); if (!active_cache) read_cache(); @@ -1613,7 +1614,8 @@ static int get_sha1_with_context_1(const char *name, } } hashcpy(oc->tree, tree_sha1); - strlcpy(oc->path, filename, sizeof(oc->path)); + if (flags & GET_SHA1_RECORD_PATH) + oc->path = xstrdup(filename); free(new_filename); return ret; -- cgit v0.10.2-6-g49f6 From 74e89110a38fb52be29615a1468e86fb75077433 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 19 May 2017 08:54:56 -0400 Subject: t4063: add tests of direct blob diffs The git-diff command can directly compare two blobs (or a blob and a file), but we don't test this at all. Let's add some basic tests that reveal a few problems. There are basically four interesting inputs: 1. sha1 against sha1 (where diff has no information beyond the contents) 2. tree:path against tree:path (where it can get information via get_sha1_with_context) 3. Same as (2), but using the ".." range syntax 4. tree:path against a filename And beyond generating a sane diff, we care about a few little bits: which paths they show in the diff header, and whether they correctly pick up a mode change. They should all be able to show a mode except for (1), though note that case (3) is currently broken. For the headers, we would ideally show the path within the tree if we have it, making: git diff a:path b:path look the same as: git diff a b -- path We can't always do that (e.g., in the direct sha1/sha1 diff, we have no path to show), in which case we should fall back to the name that resolved to the blob (which is nonsense from the repository's perspective, but is the best we can do). Aside from the fallback case in (1), none of the cases get this right. Cases (2) and (3) always show the full tree:path, even though we should be able to know just the "path" portion. Case (4) picks up the filename path, but assigns it to _both_ sides of the diff. So this works for: git diff tree:path path but not for: git diff tree:other_path path The appropriate tests are marked to expect failure. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/t/t4063-diff-blobs.sh b/t/t4063-diff-blobs.sh new file mode 100755 index 0000000..90c6f6b --- /dev/null +++ b/t/t4063-diff-blobs.sh @@ -0,0 +1,91 @@ +#!/bin/sh + +test_description='test direct comparison of blobs via git-diff' +. ./test-lib.sh + +run_diff () { + # use full-index to make it easy to match the index line + git diff --full-index "$@" >diff +} + +check_index () { + grep "^index $1\\.\\.$2" diff +} + +check_mode () { + grep "^old mode $1" diff && + grep "^new mode $2" diff +} + +check_paths () { + grep "^diff --git a/$1 b/$2" diff +} + +test_expect_success 'create some blobs' ' + echo one >one && + echo two >two && + chmod +x two && + git add . && + + # cover systems where modes are ignored + git update-index --chmod=+x two && + + git commit -m base && + + sha1_one=$(git rev-parse HEAD:one) && + sha1_two=$(git rev-parse HEAD:two) +' + +test_expect_success 'diff by sha1' ' + run_diff $sha1_one $sha1_two +' +test_expect_success 'index of sha1 diff' ' + check_index $sha1_one $sha1_two +' +test_expect_success 'sha1 diff uses arguments as paths' ' + check_paths $sha1_one $sha1_two +' +test_expect_success 'sha1 diff has no mode change' ' + ! grep mode diff +' + +test_expect_success 'diff by tree:path (run)' ' + run_diff HEAD:one HEAD:two +' +test_expect_success 'index of tree:path diff' ' + check_index $sha1_one $sha1_two +' +test_expect_failure 'tree:path diff uses filenames as paths' ' + check_paths one two +' +test_expect_success 'tree:path diff shows mode change' ' + check_mode 100644 100755 +' + +test_expect_success 'diff by ranged tree:path' ' + run_diff HEAD:one..HEAD:two +' +test_expect_success 'index of ranged tree:path diff' ' + check_index $sha1_one $sha1_two +' +test_expect_failure 'ranged tree:path diff uses filenames as paths' ' + check_paths one two +' +test_expect_failure 'ranged tree:path diff shows mode change' ' + check_mode 100644 100755 +' + +test_expect_success 'diff blob against file' ' + run_diff HEAD:one two +' +test_expect_success 'index of blob-file diff' ' + check_index $sha1_one $sha1_two +' +test_expect_failure 'blob-file diff uses filename as paths' ' + check_paths one two +' +test_expect_success FILEMODE 'blob-file diff shows mode change' ' + check_mode 100644 100755 +' + +test_done -- cgit v0.10.2-6-g49f6 From 101dd4de16eea300a59e432452fd4fc06e1ac7f2 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 19 May 2017 08:55:11 -0400 Subject: handle_revision_arg: record modes for "a..b" endpoints The "a..b" revision syntax was designed to handle commits, so it doesn't bother to record any mode we find while traversing a "tree:path" endpoint. These days "git diff" can diff blobs using either "a:path..b:path" (with dots) or "a:path b:path" (without), but the two behave inconsistently, as the with-dots version fails to notice the mode. Let's teach the dot-dot range parser to record modes; it doesn't cost us anything, and it makes this case work. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/revision.c b/revision.c index 4970769..5b56f0e 100644 --- a/revision.c +++ b/revision.c @@ -1448,9 +1448,11 @@ static int handle_dotdot_1(const char *arg, char *dotdot, const char *a_name, *b_name; struct object_id a_oid, b_oid; struct object *a_obj, *b_obj; + struct object_context a_oc, b_oc; unsigned int a_flags, b_flags; int symmetric = 0; unsigned int flags_exclude = flags ^ (UNINTERESTING | BOTTOM); + unsigned int oc_flags = GET_SHA1_COMMITTISH; a_name = arg; if (!*a_name) @@ -1464,8 +1466,8 @@ static int handle_dotdot_1(const char *arg, char *dotdot, if (!*b_name) b_name = "HEAD"; - if (get_sha1_committish(a_name, a_oid.hash) || - get_sha1_committish(b_name, b_oid.hash)) + if (get_sha1_with_context(a_name, oc_flags, a_oid.hash, &a_oc) || + get_sha1_with_context(b_name, oc_flags, b_oid.hash, &b_oc)) return -1; if (!cant_be_filename) { @@ -1507,8 +1509,8 @@ static int handle_dotdot_1(const char *arg, char *dotdot, b_obj->flags |= b_flags; add_rev_cmdline(revs, a_obj, a_name, REV_CMD_LEFT, a_flags); add_rev_cmdline(revs, b_obj, b_name, REV_CMD_RIGHT, b_flags); - add_pending_object(revs, a_obj, a_name); - add_pending_object(revs, b_obj, b_name); + add_pending_object_with_mode(revs, a_obj, a_name, a_oc.mode); + add_pending_object_with_mode(revs, b_obj, b_name, b_oc.mode); return 0; } diff --git a/t/t4063-diff-blobs.sh b/t/t4063-diff-blobs.sh index 90c6f6b..df9c35b 100755 --- a/t/t4063-diff-blobs.sh +++ b/t/t4063-diff-blobs.sh @@ -71,7 +71,7 @@ test_expect_success 'index of ranged tree:path diff' ' test_expect_failure 'ranged tree:path diff uses filenames as paths' ' check_paths one two ' -test_expect_failure 'ranged tree:path diff shows mode change' ' +test_expect_success 'ranged tree:path diff shows mode change' ' check_mode 100644 100755 ' -- cgit v0.10.2-6-g49f6 From 18f1ad76392e60cfaf68c3f1e49a4c91bd415d0d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 19 May 2017 08:55:26 -0400 Subject: handle_revision_arg: record paths for pending objects If the revision parser sees an argument like tree:path, we parse it down to the correct blob (or tree), but throw away the "path" portion. Let's ask get_sha1_with_context() to record it, and pass it along in the pending array. This will let programs like git-diff which rely on the revision-parser show more accurate paths. Note that the implementation is a little tricky; we have to make sure we free oc.path in all code paths. For handle_dotdot(), we can piggy-back on the existing cleanup-wrapper pattern. The real work happens in handle_dotdot_1(), but the handle_dotdot() wrapper makes sure that the path is freed no matter how we exit the function (and for that reason we make sure that the object_context struct is zero'd, so if we fail to even get to the get_sha1_with_context() call, we just end up calling free(NULL)). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/revision.c b/revision.c index 5b56f0e..a927d26 100644 --- a/revision.c +++ b/revision.c @@ -1443,16 +1443,17 @@ static int dotdot_missing(const char *arg, char *dotdot, static int handle_dotdot_1(const char *arg, char *dotdot, struct rev_info *revs, int flags, - int cant_be_filename) + int cant_be_filename, + struct object_context *a_oc, + struct object_context *b_oc) { const char *a_name, *b_name; struct object_id a_oid, b_oid; struct object *a_obj, *b_obj; - struct object_context a_oc, b_oc; unsigned int a_flags, b_flags; int symmetric = 0; unsigned int flags_exclude = flags ^ (UNINTERESTING | BOTTOM); - unsigned int oc_flags = GET_SHA1_COMMITTISH; + unsigned int oc_flags = GET_SHA1_COMMITTISH | GET_SHA1_RECORD_PATH; a_name = arg; if (!*a_name) @@ -1466,8 +1467,8 @@ static int handle_dotdot_1(const char *arg, char *dotdot, if (!*b_name) b_name = "HEAD"; - if (get_sha1_with_context(a_name, oc_flags, a_oid.hash, &a_oc) || - get_sha1_with_context(b_name, oc_flags, b_oid.hash, &b_oc)) + if (get_sha1_with_context(a_name, oc_flags, a_oid.hash, a_oc) || + get_sha1_with_context(b_name, oc_flags, b_oid.hash, b_oc)) return -1; if (!cant_be_filename) { @@ -1509,8 +1510,8 @@ static int handle_dotdot_1(const char *arg, char *dotdot, b_obj->flags |= b_flags; add_rev_cmdline(revs, a_obj, a_name, REV_CMD_LEFT, a_flags); add_rev_cmdline(revs, b_obj, b_name, REV_CMD_RIGHT, b_flags); - add_pending_object_with_mode(revs, a_obj, a_name, a_oc.mode); - add_pending_object_with_mode(revs, b_obj, b_name, b_oc.mode); + add_pending_object_with_path(revs, a_obj, a_name, a_oc->mode, a_oc->path); + add_pending_object_with_path(revs, b_obj, b_name, b_oc->mode, b_oc->path); return 0; } @@ -1518,16 +1519,24 @@ static int handle_dotdot(const char *arg, struct rev_info *revs, int flags, int cant_be_filename) { + struct object_context a_oc, b_oc; char *dotdot = strstr(arg, ".."); int ret; if (!dotdot) return -1; + memset(&a_oc, 0, sizeof(a_oc)); + memset(&b_oc, 0, sizeof(b_oc)); + *dotdot = '\0'; - ret = handle_dotdot_1(arg, dotdot, revs, flags, cant_be_filename); + ret = handle_dotdot_1(arg, dotdot, revs, flags, cant_be_filename, + &a_oc, &b_oc); *dotdot = '.'; + free(a_oc.path); + free(b_oc.path); + return ret; } @@ -1540,7 +1549,7 @@ int handle_revision_arg(const char *arg_, struct rev_info *revs, int flags, unsi int local_flags; const char *arg = arg_; int cant_be_filename = revarg_opt & REVARG_CANNOT_BE_FILENAME; - unsigned get_sha1_flags = 0; + unsigned get_sha1_flags = GET_SHA1_RECORD_PATH; flags = flags & UNINTERESTING ? flags | BOTTOM : flags & ~BOTTOM; @@ -1591,7 +1600,7 @@ int handle_revision_arg(const char *arg_, struct rev_info *revs, int flags, unsi } if (revarg_opt & REVARG_COMMITTISH) - get_sha1_flags = GET_SHA1_COMMITTISH; + get_sha1_flags |= GET_SHA1_COMMITTISH; if (get_sha1_with_context(arg, get_sha1_flags, sha1, &oc)) return revs->ignore_missing ? 0 : -1; @@ -1599,7 +1608,8 @@ int handle_revision_arg(const char *arg_, struct rev_info *revs, int flags, unsi verify_non_filename(revs->prefix, arg); object = get_reference(revs, arg, sha1, flags ^ local_flags); add_rev_cmdline(revs, object, arg_, REV_CMD_REV, flags ^ local_flags); - add_pending_object_with_mode(revs, object, arg, oc.mode); + add_pending_object_with_path(revs, object, arg, oc.mode, oc.path); + free(oc.path); return 0; } -- cgit v0.10.2-6-g49f6 From 42f5ba5bb6648c16a3c90a0110fbdb430e590a1b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 19 May 2017 08:57:30 -0400 Subject: diff: pass whole pending entry in blobinfo When diffing blobs directly, git-diff picks the blobs out of the rev_info's pending array and copies the relevant bits to a custom "struct blobinfo". But the pending array entry already has all of this information (and more, which we'll use in future patches). Let's just pass the original entry instead. In practice, these two blobs are probably adjacent in the revs->pending array, and we could just pass the whole array. But the current code is careful to pick each blob out separately and put it into another array, so we'll continue to do so and make our own array-of-pointers. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/diff.c b/builtin/diff.c index d184aaf..8b276ae 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -20,12 +20,6 @@ #define DIFF_NO_INDEX_EXPLICIT 1 #define DIFF_NO_INDEX_IMPLICIT 2 -struct blobinfo { - struct object_id oid; - const char *name; - unsigned mode; -}; - static const char builtin_diff_usage[] = "git diff [] [ []] [--] [...]"; @@ -65,7 +59,7 @@ static void stuff_change(struct diff_options *opt, static int builtin_diff_b_f(struct rev_info *revs, int argc, const char **argv, - struct blobinfo *blob) + struct object_array_entry **blob) { /* Blob vs file in the working tree*/ struct stat st; @@ -84,12 +78,12 @@ static int builtin_diff_b_f(struct rev_info *revs, diff_set_mnemonic_prefix(&revs->diffopt, "o/", "w/"); - if (blob[0].mode == S_IFINVALID) - blob[0].mode = canon_mode(st.st_mode); + if (blob[0]->mode == S_IFINVALID) + blob[0]->mode = canon_mode(st.st_mode); stuff_change(&revs->diffopt, - blob[0].mode, canon_mode(st.st_mode), - &blob[0].oid, &null_oid, + blob[0]->mode, canon_mode(st.st_mode), + &blob[0]->item->oid, &null_oid, 1, 0, path, path); diffcore_std(&revs->diffopt); @@ -99,24 +93,24 @@ static int builtin_diff_b_f(struct rev_info *revs, static int builtin_diff_blobs(struct rev_info *revs, int argc, const char **argv, - struct blobinfo *blob) + struct object_array_entry **blob) { unsigned mode = canon_mode(S_IFREG | 0644); if (argc > 1) usage(builtin_diff_usage); - if (blob[0].mode == S_IFINVALID) - blob[0].mode = mode; + if (blob[0]->mode == S_IFINVALID) + blob[0]->mode = mode; - if (blob[1].mode == S_IFINVALID) - blob[1].mode = mode; + if (blob[1]->mode == S_IFINVALID) + blob[1]->mode = mode; stuff_change(&revs->diffopt, - blob[0].mode, blob[1].mode, - &blob[0].oid, &blob[1].oid, + blob[0]->mode, blob[1]->mode, + &blob[0]->item->oid, &blob[1]->item->oid, 1, 1, - blob[0].name, blob[1].name); + blob[0]->name, blob[1]->name); diffcore_std(&revs->diffopt); diff_flush(&revs->diffopt); return 0; @@ -259,7 +253,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix) struct rev_info rev; struct object_array ent = OBJECT_ARRAY_INIT; int blobs = 0, paths = 0; - struct blobinfo blob[2]; + struct object_array_entry *blob[2]; int nongit = 0, no_index = 0; int result = 0; @@ -408,9 +402,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix) } else if (obj->type == OBJ_BLOB) { if (2 <= blobs) die(_("more than two blobs given: '%s'"), name); - hashcpy(blob[blobs].oid.hash, obj->oid.hash); - blob[blobs].name = name; - blob[blobs].mode = entry->mode; + blob[blobs] = entry; blobs++; } else { -- cgit v0.10.2-6-g49f6 From d04ec74b17138733463c0ca1024fdbb42be6096a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 19 May 2017 08:58:05 -0400 Subject: diff: use the word "path" instead of "name" for blobs The stuff_change() function makes diff_filespecs out of blobs. The term we generally use for filespecs is "path", not "name", so let's be consistent here. That will make things less confusing when the next patch starts caring about the path/name distinction inside the pending object array. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/diff.c b/builtin/diff.c index 8b276ae..4c0811d 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -29,8 +29,8 @@ static void stuff_change(struct diff_options *opt, const struct object_id *new_oid, int old_oid_valid, int new_oid_valid, - const char *old_name, - const char *new_name) + const char *old_path, + const char *new_path) { struct diff_filespec *one, *two; @@ -41,16 +41,16 @@ static void stuff_change(struct diff_options *opt, if (DIFF_OPT_TST(opt, REVERSE_DIFF)) { SWAP(old_mode, new_mode); SWAP(old_oid, new_oid); - SWAP(old_name, new_name); + SWAP(old_path, new_path); } if (opt->prefix && - (strncmp(old_name, opt->prefix, opt->prefix_length) || - strncmp(new_name, opt->prefix, opt->prefix_length))) + (strncmp(old_path, opt->prefix, opt->prefix_length) || + strncmp(new_path, opt->prefix, opt->prefix_length))) return; - one = alloc_filespec(old_name); - two = alloc_filespec(new_name); + one = alloc_filespec(old_path); + two = alloc_filespec(new_path); fill_filespec(one, old_oid->hash, old_oid_valid, old_mode); fill_filespec(two, new_oid->hash, new_oid_valid, new_mode); -- cgit v0.10.2-6-g49f6 From 158b06caee5f9ea2987f444b8e49bd2d678b187d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 19 May 2017 08:59:15 -0400 Subject: diff: use pending "path" if it is available There's a subtle distinction between "name" and "path" for a blob that we resolve: the name is what the user told us on the command line, and the path is what we traversed when finding the blob within a tree (if we did so). When we diff blobs directly, we use "name", but "path" is more likely to be useful to the user (it will find the correct .gitattributes, and give them a saner diff header). We still have to fall back to using the name for some cases (i.e., any blob reference that isn't of the form tree:path). That's the best we can do in such a case. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/diff.c b/builtin/diff.c index 4c0811d..1a1149e 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -23,6 +23,11 @@ static const char builtin_diff_usage[] = "git diff [] [ []] [--] [...]"; +static const char *blob_path(struct object_array_entry *entry) +{ + return entry->path ? entry->path : entry->name; +} + static void stuff_change(struct diff_options *opt, unsigned old_mode, unsigned new_mode, const struct object_id *old_oid, @@ -110,7 +115,7 @@ static int builtin_diff_blobs(struct rev_info *revs, blob[0]->mode, blob[1]->mode, &blob[0]->item->oid, &blob[1]->item->oid, 1, 1, - blob[0]->name, blob[1]->name); + blob_path(blob[0]), blob_path(blob[1])); diffcore_std(&revs->diffopt); diff_flush(&revs->diffopt); return 0; diff --git a/t/t4063-diff-blobs.sh b/t/t4063-diff-blobs.sh index df9c35b..80ce033 100755 --- a/t/t4063-diff-blobs.sh +++ b/t/t4063-diff-blobs.sh @@ -55,7 +55,7 @@ test_expect_success 'diff by tree:path (run)' ' test_expect_success 'index of tree:path diff' ' check_index $sha1_one $sha1_two ' -test_expect_failure 'tree:path diff uses filenames as paths' ' +test_expect_success 'tree:path diff uses filenames as paths' ' check_paths one two ' test_expect_success 'tree:path diff shows mode change' ' @@ -68,7 +68,7 @@ test_expect_success 'diff by ranged tree:path' ' test_expect_success 'index of ranged tree:path diff' ' check_index $sha1_one $sha1_two ' -test_expect_failure 'ranged tree:path diff uses filenames as paths' ' +test_expect_success 'ranged tree:path diff uses filenames as paths' ' check_paths one two ' test_expect_success 'ranged tree:path diff shows mode change' ' -- cgit v0.10.2-6-g49f6 From 30d005c02014680403b5d35ef274047ab91fa5bd Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 19 May 2017 08:59:34 -0400 Subject: diff: use blob path for blob/file diffs When we diff a blob against a working tree file like: git diff HEAD:Makefile Makefile we always use the working tree filename for both sides of the diff. In most cases that's fine, as the two would be the same anyway, as above. And until recently, we used the "name" for the blob, not the path, which would have the messy "HEAD:" on the beginning. But when they don't match, like: git diff HEAD:old_path new_path it makes sense to show both names. This patch uses the blob's path field if it's available, and otherwise falls back to using the filename (in preference to the blob's name, which is likely to be garbage like a raw sha1). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/diff.c b/builtin/diff.c index 1a1149e..5e7c642 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -90,7 +90,8 @@ static int builtin_diff_b_f(struct rev_info *revs, blob[0]->mode, canon_mode(st.st_mode), &blob[0]->item->oid, &null_oid, 1, 0, - path, path); + blob[0]->path ? blob[0]->path : path, + path); diffcore_std(&revs->diffopt); diff_flush(&revs->diffopt); return 0; diff --git a/t/t4063-diff-blobs.sh b/t/t4063-diff-blobs.sh index 80ce033..bc69e26 100755 --- a/t/t4063-diff-blobs.sh +++ b/t/t4063-diff-blobs.sh @@ -81,11 +81,16 @@ test_expect_success 'diff blob against file' ' test_expect_success 'index of blob-file diff' ' check_index $sha1_one $sha1_two ' -test_expect_failure 'blob-file diff uses filename as paths' ' +test_expect_success 'blob-file diff uses filename as paths' ' check_paths one two ' test_expect_success FILEMODE 'blob-file diff shows mode change' ' check_mode 100644 100755 ' +test_expect_success 'blob-file diff prefers filename to sha1' ' + run_diff $sha1_one two && + check_paths two two +' + test_done -- cgit v0.10.2-6-g49f6