From 8d2dfc49b199c7da6faefd7993630f24bd37fee0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 10 Apr 2009 17:27:58 -0700 Subject: process_{tree,blob}: show objects without buffering Here's a less trivial thing, and slightly more dubious one. I was looking at that "struct object_array objects", and wondering why we do that. I have honestly totally forgotten. Why not just call the "show()" function as we encounter the objects? Rather than add the objects to the object_array, and then at the very end going through the array and doing a 'show' on all, just do things more incrementally. Now, there are possible downsides to this: - the "buffer using object_array" _can_ in theory result in at least better I-cache usage (two tight loops rather than one more spread out one). I don't think this is a real issue, but in theory.. - this _does_ change the order of the objects printed. Instead of doing a "process_tree(revs, commit->tree, &objects, NULL, "");" in the loop over the commits (which puts all the root trees _first_ in the object list, this patch just adds them to the list of pending objects, and then we'll traverse them in that order (and thus show each root tree object together with the objects we discover under it) I _think_ the new ordering actually makes more sense, but the object ordering is actually a subtle thing when it comes to packing efficiency, so any change in order is going to have implications for packing. Good or bad, I dunno. - There may be some reason why we did it that odd way with the object array, that I have simply forgotten. Anyway, now that we don't buffer up the objects before showing them that may actually result in lower memory usage during that whole traverse_commit_list() phase. This is seriously not very deeply tested. It makes sense to me, it seems to pass all the tests, it looks ok, but... Does anybody remember why we did that "object_array" thing? It used to be an "object_list" a long long time ago, but got changed into the array due to better memory usage patterns (those linked lists of obejcts are horrible from a memory allocation standpoint). But I wonder why we didn't do this back then. Maybe there's a reason for it. Or maybe there _used_ to be a reason, and no longer is. Signed-off-by: Junio C Hamano diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index a6adc8c..dde8cc3 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -1856,13 +1856,17 @@ static void show_commit(struct commit *commit) commit->object.flags |= OBJECT_ADDED; } -static void show_object(struct object_array_entry *p) +static void show_object(struct object *obj, const char *name) { - add_preferred_base_object(p->name); - add_object_entry(p->item->sha1, p->item->type, p->name, 0); - p->item->flags |= OBJECT_ADDED; - free((char *)p->name); - p->name = NULL; + add_preferred_base_object(name); + add_object_entry(obj->sha1, obj->type, name, 0); + obj->flags |= OBJECT_ADDED; + + /* + * We will have generated the hash from the name, + * but not saved a pointer to it - we can free it + */ + free((char *)name); } static void show_edge(struct commit *commit) diff --git a/builtin-rev-list.c b/builtin-rev-list.c index facaff2..759e671 100644 --- a/builtin-rev-list.c +++ b/builtin-rev-list.c @@ -169,27 +169,27 @@ static void finish_commit(struct commit *commit) commit->buffer = NULL; } -static void finish_object(struct object_array_entry *p) +static void finish_object(struct object *obj, const char *name) { - if (p->item->type == OBJ_BLOB && !has_sha1_file(p->item->sha1)) - die("missing blob object '%s'", sha1_to_hex(p->item->sha1)); + if (obj->type == OBJ_BLOB && !has_sha1_file(obj->sha1)) + die("missing blob object '%s'", sha1_to_hex(obj->sha1)); } -static void show_object(struct object_array_entry *p) +static void show_object(struct object *obj, const char *name) { /* An object with name "foo\n0000000..." can be used to * confuse downstream "git pack-objects" very badly. */ - const char *ep = strchr(p->name, '\n'); + const char *ep = strchr(name, '\n'); - finish_object(p); + finish_object(obj, name); if (ep) { - printf("%s %.*s\n", sha1_to_hex(p->item->sha1), - (int) (ep - p->name), - p->name); + printf("%s %.*s\n", sha1_to_hex(obj->sha1), + (int) (ep - name), + name); } else - printf("%s %s\n", sha1_to_hex(p->item->sha1), p->name); + printf("%s %s\n", sha1_to_hex(obj->sha1), name); } static void show_edge(struct commit *commit) diff --git a/list-objects.c b/list-objects.c index dd243c7..5a4af62 100644 --- a/list-objects.c +++ b/list-objects.c @@ -10,7 +10,7 @@ static void process_blob(struct rev_info *revs, struct blob *blob, - struct object_array *p, + show_object_fn show, struct name_path *path, const char *name) { @@ -23,7 +23,7 @@ static void process_blob(struct rev_info *revs, if (obj->flags & (UNINTERESTING | SEEN)) return; obj->flags |= SEEN; - add_object(obj, p, path, name); + show(obj, path_name(path, name)); } /* @@ -50,7 +50,7 @@ static void process_blob(struct rev_info *revs, */ static void process_gitlink(struct rev_info *revs, const unsigned char *sha1, - struct object_array *p, + show_object_fn show, struct name_path *path, const char *name) { @@ -59,7 +59,7 @@ static void process_gitlink(struct rev_info *revs, static void process_tree(struct rev_info *revs, struct tree *tree, - struct object_array *p, + show_object_fn show, struct name_path *path, const char *name) { @@ -77,7 +77,7 @@ static void process_tree(struct rev_info *revs, if (parse_tree(tree) < 0) die("bad tree object %s", sha1_to_hex(obj->sha1)); obj->flags |= SEEN; - add_object(obj, p, path, name); + show(obj, path_name(path, name)); me.up = path; me.elem = name; me.elem_len = strlen(name); @@ -88,14 +88,14 @@ static void process_tree(struct rev_info *revs, if (S_ISDIR(entry.mode)) process_tree(revs, lookup_tree(entry.sha1), - p, &me, entry.path); + show, &me, entry.path); else if (S_ISGITLINK(entry.mode)) process_gitlink(revs, entry.sha1, - p, &me, entry.path); + show, &me, entry.path); else process_blob(revs, lookup_blob(entry.sha1), - p, &me, entry.path); + show, &me, entry.path); } free(tree->buffer); tree->buffer = NULL; @@ -134,16 +134,20 @@ void mark_edges_uninteresting(struct commit_list *list, } } +static void add_pending_tree(struct rev_info *revs, struct tree *tree) +{ + add_pending_object(revs, &tree->object, ""); +} + void traverse_commit_list(struct rev_info *revs, void (*show_commit)(struct commit *), - void (*show_object)(struct object_array_entry *)) + void (*show_object)(struct object *, const char *)) { int i; struct commit *commit; - struct object_array objects = { 0, 0, NULL }; while ((commit = get_revision(revs)) != NULL) { - process_tree(revs, commit->tree, &objects, NULL, ""); + add_pending_tree(revs, commit->tree); show_commit(commit); } for (i = 0; i < revs->pending.nr; i++) { @@ -154,25 +158,22 @@ void traverse_commit_list(struct rev_info *revs, continue; if (obj->type == OBJ_TAG) { obj->flags |= SEEN; - add_object_array(obj, name, &objects); + show_object(obj, name); continue; } if (obj->type == OBJ_TREE) { - process_tree(revs, (struct tree *)obj, &objects, + process_tree(revs, (struct tree *)obj, show_object, NULL, name); continue; } if (obj->type == OBJ_BLOB) { - process_blob(revs, (struct blob *)obj, &objects, + process_blob(revs, (struct blob *)obj, show_object, NULL, name); continue; } die("unknown pending object %s (%s)", sha1_to_hex(obj->sha1), name); } - for (i = 0; i < objects.nr; i++) - show_object(&objects.objects[i]); - free(objects.objects); if (revs->pending.nr) { free(revs->pending.objects); revs->pending.nr = 0; diff --git a/list-objects.h b/list-objects.h index 0f41391..13b0dd9 100644 --- a/list-objects.h +++ b/list-objects.h @@ -2,7 +2,7 @@ #define LIST_OBJECTS_H typedef void (*show_commit_fn)(struct commit *); -typedef void (*show_object_fn)(struct object_array_entry *); +typedef void (*show_object_fn)(struct object *, const char *); typedef void (*show_edge_fn)(struct commit *); void traverse_commit_list(struct rev_info *revs, show_commit_fn, show_object_fn); diff --git a/revision.c b/revision.c index 45fd7a3..f95104b 100644 --- a/revision.c +++ b/revision.c @@ -14,7 +14,7 @@ volatile show_early_output_fn_t show_early_output; -static char *path_name(struct name_path *path, const char *name) +char *path_name(struct name_path *path, const char *name) { struct name_path *p; char *n, *m; diff --git a/revision.h b/revision.h index 91f1944..6fcfb8c 100644 --- a/revision.h +++ b/revision.h @@ -141,6 +141,8 @@ struct name_path { const char *elem; }; +char *path_name(struct name_path *path, const char *name); + extern void add_object(struct object *obj, struct object_array *p, struct name_path *path, diff --git a/upload-pack.c b/upload-pack.c index e5adbc0..bdbd67b 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -78,20 +78,20 @@ static void show_commit(struct commit *commit) commit->buffer = NULL; } -static void show_object(struct object_array_entry *p) +static void show_object(struct object *obj, const char *name) { /* An object with name "foo\n0000000..." can be used to * confuse downstream git-pack-objects very badly. */ - const char *ep = strchr(p->name, '\n'); + const char *ep = strchr(name, '\n'); if (ep) { - fprintf(pack_pipe, "%s %.*s\n", sha1_to_hex(p->item->sha1), - (int) (ep - p->name), - p->name); + fprintf(pack_pipe, "%s %.*s\n", sha1_to_hex(obj->sha1), + (int) (ep - name), + name); } else fprintf(pack_pipe, "%s %s\n", - sha1_to_hex(p->item->sha1), p->name); + sha1_to_hex(obj->sha1), name); } static void show_edge(struct commit *commit) -- cgit v0.10.2-6-g49f6 From cf2ab916afa4231f7e9db31796e7c0f712ff6ad1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 10 Apr 2009 18:15:26 -0700 Subject: show_object(): push path_name() call further down In particular, pushing the "path_name()" call _into_ the show() function would seem to allow - more clarity into who "owns" the name (ie now when we free the name in the show_object callback, it's because we generated it ourselves by calling path_name()) - not calling path_name() at all, either because we don't care about the name in the first place, or because we are actually happy walking the linked list of "struct name_path *" and the last component. Now, I didn't do that latter optimization, because it would require some more coding, but especially looking at "builtin-pack-objects.c", we really don't even want the whole pathname, we really would be better off with the list of path components. Why? We use that name for two things: - add_preferred_base_object(), which actually _wants_ to traverse the path, and now does it by looking for '/' characters! - for 'name_hash()', which only cares about the last 16 characters of a name, so again, generating the full name seems to be just unnecessary work. Anyway, so I didn't look any closer at those things, but it did convince me that the "show_object()" calling convention was crazy, and we're actually better off doing _less_ in list-objects.c, and giving people access to the internal data structures so that they can decide whether they want to generate a path-name or not. This patch does that, and then for people who did use the name (even if they might do something more clever in the future), it just does the straightforward "name = path_name(path, component); .. free(name);" thing. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index dde8cc3..7104145 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -1856,8 +1856,10 @@ static void show_commit(struct commit *commit) commit->object.flags |= OBJECT_ADDED; } -static void show_object(struct object *obj, const char *name) +static void show_object(struct object *obj, const struct name_path *path, const char *last) { + char *name = path_name(path, last); + add_preferred_base_object(name); add_object_entry(obj->sha1, obj->type, name, 0); obj->flags |= OBJECT_ADDED; diff --git a/builtin-rev-list.c b/builtin-rev-list.c index 759e671..aa3c962 100644 --- a/builtin-rev-list.c +++ b/builtin-rev-list.c @@ -169,20 +169,21 @@ static void finish_commit(struct commit *commit) commit->buffer = NULL; } -static void finish_object(struct object *obj, const char *name) +static void finish_object(struct object *obj, const struct name_path *path, const char *name) { if (obj->type == OBJ_BLOB && !has_sha1_file(obj->sha1)) die("missing blob object '%s'", sha1_to_hex(obj->sha1)); } -static void show_object(struct object *obj, const char *name) +static void show_object(struct object *obj, const struct name_path *path, const char *component) { + char *name = path_name(path, component); /* An object with name "foo\n0000000..." can be used to * confuse downstream "git pack-objects" very badly. */ const char *ep = strchr(name, '\n'); - finish_object(obj, name); + finish_object(obj, path, name); if (ep) { printf("%s %.*s\n", sha1_to_hex(obj->sha1), (int) (ep - name), @@ -190,6 +191,7 @@ static void show_object(struct object *obj, const char *name) } else printf("%s %s\n", sha1_to_hex(obj->sha1), name); + free(name); } static void show_edge(struct commit *commit) diff --git a/list-objects.c b/list-objects.c index 5a4af62..30ded3d 100644 --- a/list-objects.c +++ b/list-objects.c @@ -23,7 +23,7 @@ static void process_blob(struct rev_info *revs, if (obj->flags & (UNINTERESTING | SEEN)) return; obj->flags |= SEEN; - show(obj, path_name(path, name)); + show(obj, path, name); } /* @@ -77,7 +77,7 @@ static void process_tree(struct rev_info *revs, if (parse_tree(tree) < 0) die("bad tree object %s", sha1_to_hex(obj->sha1)); obj->flags |= SEEN; - show(obj, path_name(path, name)); + show(obj, path, name); me.up = path; me.elem = name; me.elem_len = strlen(name); @@ -140,8 +140,8 @@ static void add_pending_tree(struct rev_info *revs, struct tree *tree) } void traverse_commit_list(struct rev_info *revs, - void (*show_commit)(struct commit *), - void (*show_object)(struct object *, const char *)) + show_commit_fn show_commit, + show_object_fn show_object) { int i; struct commit *commit; @@ -158,7 +158,7 @@ void traverse_commit_list(struct rev_info *revs, continue; if (obj->type == OBJ_TAG) { obj->flags |= SEEN; - show_object(obj, name); + show_object(obj, NULL, name); continue; } if (obj->type == OBJ_TREE) { diff --git a/list-objects.h b/list-objects.h index 13b0dd9..0b2de64 100644 --- a/list-objects.h +++ b/list-objects.h @@ -2,7 +2,7 @@ #define LIST_OBJECTS_H typedef void (*show_commit_fn)(struct commit *); -typedef void (*show_object_fn)(struct object *, const char *); +typedef void (*show_object_fn)(struct object *, const struct name_path *, const char *); typedef void (*show_edge_fn)(struct commit *); void traverse_commit_list(struct rev_info *revs, show_commit_fn, show_object_fn); diff --git a/revision.c b/revision.c index f95104b..69d5fd4 100644 --- a/revision.c +++ b/revision.c @@ -14,9 +14,9 @@ volatile show_early_output_fn_t show_early_output; -char *path_name(struct name_path *path, const char *name) +char *path_name(const struct name_path *path, const char *name) { - struct name_path *p; + const struct name_path *p; char *n, *m; int nlen = strlen(name); int len = nlen + 1; diff --git a/revision.h b/revision.h index 6fcfb8c..e5b8908 100644 --- a/revision.h +++ b/revision.h @@ -141,7 +141,7 @@ struct name_path { const char *elem; }; -char *path_name(struct name_path *path, const char *name); +char *path_name(const struct name_path *path, const char *name); extern void add_object(struct object *obj, struct object_array *p, diff --git a/upload-pack.c b/upload-pack.c index bdbd67b..d8ce306 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -78,11 +78,12 @@ static void show_commit(struct commit *commit) commit->buffer = NULL; } -static void show_object(struct object *obj, const char *name) +static void show_object(struct object *obj, const struct name_path *path, const char *component) { /* An object with name "foo\n0000000..." can be used to * confuse downstream git-pack-objects very badly. */ + const char *name = path_name(path, component); const char *ep = strchr(name, '\n'); if (ep) { fprintf(pack_pipe, "%s %.*s\n", sha1_to_hex(obj->sha1), @@ -92,6 +93,7 @@ static void show_object(struct object *obj, const char *name) else fprintf(pack_pipe, "%s %s\n", sha1_to_hex(obj->sha1), name); + free((char *)name); } static void show_edge(struct commit *commit) -- cgit v0.10.2-6-g49f6