From 3c7e2e8f0acfe94220cdd3bbd3a35a955ae80294 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 22 Jun 2021 12:03:54 -0400 Subject: pretty.h: update and expand docstring for userformat_find_requirements() The comment only mentions "notes", but there are more fields now (and I'm about to add another). Let's make it more general, and stick the struct next to the function to make the list of possibilities obvious. While we're touching this comment, let's also mention the behavior of NULL, which some callers rely on (though in the long run, this global is pretty nasty and probably should get moved into rev_info). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/pretty.h b/pretty.h index f034609..c81cf40 100644 --- a/pretty.h +++ b/pretty.h @@ -65,12 +65,15 @@ static inline int cmit_fmt_is_mail(enum cmit_fmt fmt) return (fmt == CMIT_FMT_EMAIL || fmt == CMIT_FMT_MBOXRD); } +/* + * Examine the user-specified format given by "fmt" (or if NULL, the global one + * previously saved by get_commit_format()), and set flags based on which items + * the format will need when it is expanded. + */ struct userformat_want { unsigned notes:1; unsigned source:1; }; - -/* Set the flag "w->notes" if there is placeholder %N in "fmt". */ void userformat_find_requirements(const char *fmt, struct userformat_want *w); /* -- cgit v0.10.2-6-g49f6 From b2086b518366ed71caac498857b9c5765dd73ed1 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 22 Jun 2021 12:04:50 -0400 Subject: log: avoid loading decorations for userformats that don't need it If no --decorate option is given, we default to auto-decoration. And when that kicks in, cmd_log_init_finish() will unconditionally load the decoration refs. However, if we are using a user-format that does not include "%d" or "%D", we won't show the decorations at all, so we don't need to load them. We can detect this case and auto-disable them by adding a new field to our userformat_want helper. We can do this even when the user explicitly asked for --decorate, because it can't affect the output at all. This patch consistently reduces the time to run "git log -1 --format=%H" on my git.git clone (with ~2k refs) from 34ms to 7ms. On a much more extreme real-world repository (with ~220k refs), it goes from 2.5s to 4ms. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/log.c b/builtin/log.c index 6102893..6ba7f20 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -245,6 +245,9 @@ static void cmd_log_init_finish(int argc, const char **argv, const char *prefix, rev->abbrev_commit = 0; } + if (rev->commit_format == CMIT_FMT_USERFORMAT && !w.decorate) + decoration_style = 0; + if (decoration_style) { const struct string_list *config_exclude = repo_config_get_value_multi(the_repository, diff --git a/pretty.c b/pretty.c index b1ecd03..9631529 100644 --- a/pretty.c +++ b/pretty.c @@ -1735,6 +1735,10 @@ static size_t userformat_want_item(struct strbuf *sb, const char *placeholder, case 'S': w->source = 1; break; + case 'd': + case 'D': + w->decorate = 1; + break; } return 0; } diff --git a/pretty.h b/pretty.h index c81cf40..2f16acd 100644 --- a/pretty.h +++ b/pretty.h @@ -73,6 +73,7 @@ static inline int cmit_fmt_is_mail(enum cmit_fmt fmt) struct userformat_want { unsigned notes:1; unsigned source:1; + unsigned decorate:1; }; void userformat_find_requirements(const char *fmt, struct userformat_want *w); -- cgit v0.10.2-6-g49f6 From 542d6abbb4e838cf1c47612a2b4b33f2a31e6277 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 22 Jun 2021 12:05:31 -0400 Subject: object.h: expand docstring for lookup_unknown_object() The lookup_unknown_object() system is not often used and is somewhat confusing. Let's try to explain it a bit more (which is especially important as I'm adding a related but slightly different function in the next commit). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/object.h b/object.h index 8bca310..eb7e481 100644 --- a/object.h +++ b/object.h @@ -144,7 +144,18 @@ struct object *parse_object_or_die(const struct object_id *oid, const char *name */ struct object *parse_object_buffer(struct repository *r, const struct object_id *oid, enum object_type type, unsigned long size, void *buffer, int *eaten_p); -/** Returns the object, with potentially excess memory allocated. **/ +/* + * Allocate and return an object struct, even if you do not know the type of + * the object. The returned object may have its "type" field set to a real type + * (if somebody previously called lookup_blob(), etc), or it may be set to + * OBJ_NONE. In the latter case, subsequent calls to lookup_blob(), etc, will + * set the type field as appropriate. + * + * Use this when you do not know the expected type of an object and want to + * avoid parsing it for efficiency reasons. Try to avoid it otherwise; it + * may allocate excess memory, since the returned object must be as large as + * the maximum struct of any type. + */ struct object *lookup_unknown_object(struct repository *r, const struct object_id *oid); struct object_list *object_list_insert(struct object *item, -- cgit v0.10.2-6-g49f6 From 7463064b28086c0a765e247bc8336f8e32356494 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 22 Jun 2021 12:06:41 -0400 Subject: object.h: add lookup_object_by_type() function In some cases it's useful for efficiency reasons to get the type of an object before deciding whether to parse it, but we still want an object struct. E.g., in reachable.c, bitmaps give us the type, but we just want to mark flags on each object. Likewise, we may loop over every object and only parse tags in order to peel them; checking the type first lets us avoid parsing the non-tags. But our lookup_blob(), etc, functions make getting an object struct annoying: we have to call the right function for every type. And we cannot just use the generic lookup_object(), because it only returns an already-seen object; it won't allocate a new object struct. Let's provide a function that dispatches to the correct lookup_* function based on a run-time type. In fact, reachable.c already has such a helper, so we'll just make that public. I did change the return type from "void *" to "struct object *". While the former is a clever way to avoid casting inside the function, it's less safe and less informative to people reading the function declaration. The next commit will add a new caller. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/object.c b/object.c index 1418845..07fcf23 100644 --- a/object.c +++ b/object.c @@ -185,6 +185,24 @@ struct object *lookup_unknown_object(struct repository *r, const struct object_i return obj; } +struct object *lookup_object_by_type(struct repository *r, + const struct object_id *oid, + enum object_type type) +{ + switch (type) { + case OBJ_COMMIT: + return (struct object *)lookup_commit(r, oid); + case OBJ_TREE: + return (struct object *)lookup_tree(r, oid); + case OBJ_TAG: + return (struct object *)lookup_tag(r, oid); + case OBJ_BLOB: + return (struct object *)lookup_blob(r, oid); + default: + die("BUG: unknown object type %d", type); + } +} + struct object *parse_object_buffer(struct repository *r, const struct object_id *oid, enum object_type type, unsigned long size, void *buffer, int *eaten_p) { struct object *obj; diff --git a/object.h b/object.h index eb7e481..3b38c9c 100644 --- a/object.h +++ b/object.h @@ -158,6 +158,13 @@ struct object *parse_object_buffer(struct repository *r, const struct object_id */ struct object *lookup_unknown_object(struct repository *r, const struct object_id *oid); +/* + * Dispatch to the appropriate lookup_blob(), lookup_commit(), etc, based on + * "type". + */ +struct object *lookup_object_by_type(struct repository *r, const struct object_id *oid, + enum object_type type); + struct object_list *object_list_insert(struct object *item, struct object_list **list_p); diff --git a/reachable.c b/reachable.c index c598472..84e3d0d 100644 --- a/reachable.c +++ b/reachable.c @@ -159,24 +159,6 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs, FOR_EACH_OBJECT_LOCAL_ONLY); } -static void *lookup_object_by_type(struct repository *r, - const struct object_id *oid, - enum object_type type) -{ - switch (type) { - case OBJ_COMMIT: - return lookup_commit(r, oid); - case OBJ_TREE: - return lookup_tree(r, oid); - case OBJ_TAG: - return lookup_tag(r, oid); - case OBJ_BLOB: - return lookup_blob(r, oid); - default: - die("BUG: unknown object type %d", type); - } -} - static int mark_object_seen(const struct object_id *oid, enum object_type type, int exclude, -- cgit v0.10.2-6-g49f6 From 88473c8baeefafe6b95ab0f62eb2f12e2b098ac7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 22 Jun 2021 13:06:48 -0400 Subject: load_ref_decorations(): avoid parsing non-tag objects When we load the ref decorations, we parse the object pointed to by each ref in order to get a "struct object". This is unnecessarily expensive; we really only need the object struct, and don't even look at the parsed contents. The exception is tags, which we do need to peel. We can improve this by looking up the object type first (which is much cheaper), and skipping the parse entirely for non-tags. This increases the work slightly for annotated tags (which now do a type lookup _and_ a parse), but decreases it a lot for other types. On balance, this seems to be a good tradeoff. In my git.git clone, with ~2k refs, most of which are branches, the time to run "git log -1 --decorate" drops from 34ms to 11ms. Even on my linux.git clone, which contains mostly tags and only a handful of branches, the time drops from 30ms to 19ms. And on a more extreme real-world case with ~220k refs, mostly non-tags, the time drops from 2.6s to 650ms. That command is a lop-sided example, of course, because it does as little non-loading work as possible. But it does show the absolute time improvement. Even in something like a full "git log --decorate" on that extreme repo, we'd still be saving 2s of CPU time. Ideally we could push this even further, and avoid parsing even tags, by relying on the packed-refs "peel" optimization (which we could do by calling peel_iterated_oid() instead of peeling manually). But we can't do that here. The packed-refs file only stores the bottom-layer of the peel (so in a "tag->tag->commit" chain, it stores only the commit as the peel result). But the decoration code wants to peel the layers individually, annotating the middle layers of the chain. If the packed-refs file ever learns to store all of the peeled layers, then we could switch to it. Or even if it stored a flag to indicate the peel was not multi-layer (because most of them aren't), then we could use it most of the time and fall back to a manual peel for the rare cases. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/log-tree.c b/log-tree.c index 7b82378..c3c8e7e 100644 --- a/log-tree.c +++ b/log-tree.c @@ -134,6 +134,7 @@ static int add_ref_decoration(const char *refname, const struct object_id *oid, int flags, void *cb_data) { struct object *obj; + enum object_type objtype; enum decoration_type type = DECORATION_NONE; struct decoration_filter *filter = (struct decoration_filter *)cb_data; @@ -155,9 +156,10 @@ static int add_ref_decoration(const char *refname, const struct object_id *oid, return 0; } - obj = parse_object(the_repository, oid); - if (!obj) + objtype = oid_object_info(the_repository, oid, NULL); + if (objtype < 0) return 0; + obj = lookup_object_by_type(the_repository, oid, objtype); if (starts_with(refname, "refs/heads/")) type = DECORATION_REF_LOCAL; -- cgit v0.10.2-6-g49f6 From 6afb265b9675b1e6061f53a323825db239f6e0fa Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 22 Jun 2021 13:09:26 -0400 Subject: add_ref_decoration(): rename s/type/deco_type/ Now that we have two types (a decoration type and an object type) in the function, let's give them both unique names to avoid accidentally using one instead of the other. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/log-tree.c b/log-tree.c index c3c8e7e..4f69ed1 100644 --- a/log-tree.c +++ b/log-tree.c @@ -135,7 +135,7 @@ static int add_ref_decoration(const char *refname, const struct object_id *oid, { struct object *obj; enum object_type objtype; - enum decoration_type type = DECORATION_NONE; + enum decoration_type deco_type = DECORATION_NONE; struct decoration_filter *filter = (struct decoration_filter *)cb_data; if (filter && !ref_filter_match(refname, filter)) @@ -162,17 +162,17 @@ static int add_ref_decoration(const char *refname, const struct object_id *oid, obj = lookup_object_by_type(the_repository, oid, objtype); if (starts_with(refname, "refs/heads/")) - type = DECORATION_REF_LOCAL; + deco_type = DECORATION_REF_LOCAL; else if (starts_with(refname, "refs/remotes/")) - type = DECORATION_REF_REMOTE; + deco_type = DECORATION_REF_REMOTE; else if (starts_with(refname, "refs/tags/")) - type = DECORATION_REF_TAG; + deco_type = DECORATION_REF_TAG; else if (!strcmp(refname, "refs/stash")) - type = DECORATION_REF_STASH; + deco_type = DECORATION_REF_STASH; else if (!strcmp(refname, "HEAD")) - type = DECORATION_REF_HEAD; + deco_type = DECORATION_REF_HEAD; - add_name_decoration(type, refname, obj); + add_name_decoration(deco_type, refname, obj); while (obj->type == OBJ_TAG) { obj = ((struct tag *)obj)->tagged; if (!obj) -- cgit v0.10.2-6-g49f6 From d1ed8d6cee57c91ec770a8a183ed40c3ec867ac1 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 14 Jul 2021 12:31:36 -0400 Subject: load_ref_decorations(): fix decoration with tags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 88473c8bae ("load_ref_decorations(): avoid parsing non-tag objects", 2021-06-22) introduced a shortcut to `add_ref_decoration()`: Rather than calling `parse_object()`, we go for `oid_object_info()` and then `lookup_object_by_type()` using the type just discovered. As detailed in the commit message, this provides a significant time saving. Unfortunately, it also changes the behavior: We lose all annotated tags from the decoration. The reason this happens is in the loop where we try to peel the tags, we won't necessarily have parsed that first object. If we haven't, its `tagged` field will be NULL, so we won't actually add a decoration for the pointed-to object. Make sure to parse the tag object at the top of the peeling loop. This effectively restores the pre-88473c8bae parsing -- but only of tags, allowing us to keep most of the possible speedup from 88473c8bae. On my big ~220k ref test case (where it's mostly non-tags), the timings [using "git log -1 --decorate"] are: - before either patch: 2.945s - with my broken patch: 0.707s - with [this patch]: 0.788s The simplest way to do this is to just conditionally parse before the loop: if (obj->type == OBJ_TAG) parse_object(&obj->oid); But we can observe that our tag-peeling loop needs to peel already, to examine recursive tags-of-tags. So instead of introducing a new call to parse_object(), we can simply move the parsing higher in the loop: instead of parsing the new object before we loop, parse each tag object before we look at its "tagged" field. This has another beneficial side effect: if a tag points at a commit (or other non-tag type), we do not bother to parse the commit at all now. And we know it is a commit without calling oid_object_info(), because parsing the surrounding tag object will have created the correct in-core object based on the "type" field of the tag. Our test coverage for --decorate was obviously not good, since we missed this quite-basic regression. The new tests covers an annotated tag (showing the fix), but also that we correctly show annotations for lightweight tags and double-annotated tag-of-tags. Reported-by: Martin Ågren Helped-by: Martin Ågren Signed-off-by: Martin Ågren Signed-off-by: Jeff King Reviewed-by: Martin Ågren Signed-off-by: Junio C Hamano diff --git a/log-tree.c b/log-tree.c index 4f69ed1..6dc4412 100644 --- a/log-tree.c +++ b/log-tree.c @@ -174,11 +174,11 @@ static int add_ref_decoration(const char *refname, const struct object_id *oid, add_name_decoration(deco_type, refname, obj); while (obj->type == OBJ_TAG) { + if (!obj->parsed) + parse_object(the_repository, &obj->oid); obj = ((struct tag *)obj)->tagged; if (!obj) break; - if (!obj->parsed) - parse_object(the_repository, &obj->oid); add_name_decoration(DECORATION_REF_TAG, refname, obj); } return 0; diff --git a/t/t4202-log.sh b/t/t4202-log.sh index 350cfa3..fe8f5e2 100755 --- a/t/t4202-log.sh +++ b/t/t4202-log.sh @@ -1905,6 +1905,20 @@ test_expect_success '--exclude-promisor-objects does not BUG-crash' ' test_must_fail git log --exclude-promisor-objects source-a ' +test_expect_success 'log --decorate includes all levels of tag annotated tags' ' + git checkout -b branch && + git commit --allow-empty -m "new commit" && + git tag lightweight HEAD && + git tag -m annotated annotated HEAD && + git tag -m double-0 double-0 HEAD && + git tag -m double-1 double-1 double-0 && + cat >expect <<-\EOF && + HEAD -> branch, tag: lightweight, tag: double-1, tag: double-0, tag: annotated + EOF + git log -1 --format="%D" >actual && + test_cmp expect actual +' + test_expect_success 'log --end-of-options' ' git update-ref refs/heads/--source HEAD && git log --end-of-options --source >actual && -- cgit v0.10.2-6-g49f6