From 24304816141d16aacdc63612797faa1426222ef7 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 6 Jan 2007 02:16:10 -0800 Subject: builtin-prune: make file-scope static struct to an argument. I want to make the first part of 'git prune' that marks the reachable objects callable as a library, so this starts the first step toward the goal by making the callchain to pass rev_info structure as an argument. No functionality change should be in this step. Signed-off-by: Junio C Hamano diff --git a/builtin-prune.c b/builtin-prune.c index b469c43..9522864 100644 --- a/builtin-prune.c +++ b/builtin-prune.c @@ -12,7 +12,6 @@ static const char prune_usage[] = "git-prune [-n]"; static int show_only; -static struct rev_info revs; static int prune_object(char *path, const char *filename, const unsigned char *sha1) { @@ -184,45 +183,48 @@ static void walk_commit_list(struct rev_info *revs) static int add_one_reflog_ent(unsigned char *osha1, unsigned char *nsha1, char *datail, void *cb_data) { struct object *object; + struct rev_info *revs = (struct rev_info *)cb_data; object = parse_object(osha1); if (object) - add_pending_object(&revs, object, ""); + add_pending_object(revs, object, ""); object = parse_object(nsha1); if (object) - add_pending_object(&revs, object, ""); + add_pending_object(revs, object, ""); return 0; } static int add_one_ref(const char *path, const unsigned char *sha1, int flag, void *cb_data) { struct object *object = parse_object(sha1); + struct rev_info *revs = (struct rev_info *)cb_data; + if (!object) die("bad object ref: %s:%s", path, sha1_to_hex(sha1)); - add_pending_object(&revs, object, ""); + add_pending_object(revs, object, ""); - for_each_reflog_ent(path, add_one_reflog_ent, NULL); + for_each_reflog_ent(path, add_one_reflog_ent, cb_data); return 0; } -static void add_one_tree(const unsigned char *sha1) +static void add_one_tree(const unsigned char *sha1, struct rev_info *revs) { struct tree *tree = lookup_tree(sha1); - add_pending_object(&revs, &tree->object, ""); + add_pending_object(revs, &tree->object, ""); } -static void add_cache_tree(struct cache_tree *it) +static void add_cache_tree(struct cache_tree *it, struct rev_info *revs) { int i; if (it->entry_count >= 0) - add_one_tree(it->sha1); + add_one_tree(it->sha1, revs); for (i = 0; i < it->subtree_nr; i++) - add_cache_tree(it->down[i]->cache_tree); + add_cache_tree(it->down[i]->cache_tree, revs); } -static void add_cache_refs(void) +static void add_cache_refs(struct rev_info *revs) { int i; @@ -237,12 +239,13 @@ static void add_cache_refs(void) */ } if (active_cache_tree) - add_cache_tree(active_cache_tree); + add_cache_tree(active_cache_tree, revs); } int cmd_prune(int argc, const char **argv, const char *prefix) { int i; + struct rev_info revs; for (i = 1; i < argc; i++) { const char *arg = argv[i]; @@ -264,11 +267,11 @@ int cmd_prune(int argc, const char **argv, const char *prefix) revs.blob_objects = 1; revs.tree_objects = 1; - /* Add all external refs */ - for_each_ref(add_one_ref, NULL); + /* Add all external refs, along with its reflog info */ + for_each_ref(add_one_ref, &revs); /* Add all refs from the index file */ - add_cache_refs(); + add_cache_refs(&revs); /* * Set up the revision walk - this will move all commits -- cgit v0.10.2-6-g49f6 From ca4f293fb492efdd2b984b992796b075c30e230d Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 6 Jan 2007 02:16:14 -0800 Subject: builtin-prune: separate ref walking from reflog walking. This is necessary for the next step, because the reason I am making the connectivity walker into a library is because I want to use it for cleaning up stale reflog entries. Signed-off-by: Junio C Hamano diff --git a/builtin-prune.c b/builtin-prune.c index 9522864..cd079b4 100644 --- a/builtin-prune.c +++ b/builtin-prune.c @@ -203,8 +203,12 @@ static int add_one_ref(const char *path, const unsigned char *sha1, int flag, vo die("bad object ref: %s:%s", path, sha1_to_hex(sha1)); add_pending_object(revs, object, ""); - for_each_reflog_ent(path, add_one_reflog_ent, cb_data); + return 0; +} +static int add_one_reflog(const char *path, const unsigned char *sha1, int flag, void *cb_data) +{ + for_each_reflog_ent(path, add_one_reflog_ent, cb_data); return 0; } @@ -267,12 +271,15 @@ int cmd_prune(int argc, const char **argv, const char *prefix) revs.blob_objects = 1; revs.tree_objects = 1; - /* Add all external refs, along with its reflog info */ + /* Add all external refs */ for_each_ref(add_one_ref, &revs); /* Add all refs from the index file */ add_cache_refs(&revs); + /* Add all reflog info from refs */ + for_each_ref(add_one_reflog, &revs); + /* * Set up the revision walk - this will move all commits * from the pending list to the commit walking list. -- cgit v0.10.2-6-g49f6 From 94421474e068c2f0a7bef3d658216a0f1e75b906 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 6 Jan 2007 02:16:17 -0800 Subject: Move traversal of reachable objects into a separate library. This moves major part of builtin-prune into a separate file, reachable.c. It is used to mark the objects that are reachable from refs, and optionally from reflogs. The patch looks very large, but if you look at it with diff -C, which this message is formatted in, most of them are copied lines and there are very little additions. Signed-off-by: Junio C Hamano diff --git a/Makefile b/Makefile index 180e1e0..d8bfb6b 100644 --- a/Makefile +++ b/Makefile @@ -251,6 +251,7 @@ LIB_OBJS = \ interpolate.o \ lockfile.o \ object.o pack-check.o patch-delta.o path.o pkt-line.o sideband.o \ + reachable.o \ quote.o read-cache.o refs.o run-command.o dir.o object-refs.o \ server-info.o setup.o sha1_file.o sha1_name.o strbuf.o \ tag.o tree.o usage.o config.o environment.o ctype.o copy.o \ diff --git a/builtin-prune.c b/builtin-prune.c index cd079b4..6f0ba0d 100644 --- a/builtin-prune.c +++ b/builtin-prune.c @@ -1,14 +1,9 @@ #include "cache.h" -#include "refs.h" -#include "tag.h" #include "commit.h" -#include "tree.h" -#include "blob.h" -#include "tree-walk.h" #include "diff.h" #include "revision.h" #include "builtin.h" -#include "cache-tree.h" +#include "reachable.h" static const char prune_usage[] = "git-prune [-n]"; static int show_only; @@ -84,168 +79,6 @@ static void prune_object_dir(const char *path) } } -static void process_blob(struct blob *blob, - struct object_array *p, - struct name_path *path, - const char *name) -{ - struct object *obj = &blob->object; - - if (obj->flags & SEEN) - return; - obj->flags |= SEEN; - /* Nothing to do, really .. The blob lookup was the important part */ -} - -static void process_tree(struct tree *tree, - struct object_array *p, - struct name_path *path, - const char *name) -{ - struct object *obj = &tree->object; - struct tree_desc desc; - struct name_entry entry; - struct name_path me; - - if (obj->flags & SEEN) - return; - obj->flags |= SEEN; - if (parse_tree(tree) < 0) - die("bad tree object %s", sha1_to_hex(obj->sha1)); - name = xstrdup(name); - add_object(obj, p, path, name); - me.up = path; - me.elem = name; - me.elem_len = strlen(name); - - desc.buf = tree->buffer; - desc.size = tree->size; - - while (tree_entry(&desc, &entry)) { - if (S_ISDIR(entry.mode)) - process_tree(lookup_tree(entry.sha1), p, &me, entry.path); - else - process_blob(lookup_blob(entry.sha1), p, &me, entry.path); - } - free(tree->buffer); - tree->buffer = NULL; -} - -static void process_tag(struct tag *tag, struct object_array *p, const char *name) -{ - struct object *obj = &tag->object; - struct name_path me; - - if (obj->flags & SEEN) - return; - obj->flags |= SEEN; - - me.up = NULL; - me.elem = "tag:/"; - me.elem_len = 5; - - if (parse_tag(tag) < 0) - die("bad tag object %s", sha1_to_hex(obj->sha1)); - add_object(tag->tagged, p, NULL, name); -} - -static void walk_commit_list(struct rev_info *revs) -{ - int i; - struct commit *commit; - struct object_array objects = { 0, 0, NULL }; - - /* Walk all commits, process their trees */ - while ((commit = get_revision(revs)) != NULL) - process_tree(commit->tree, &objects, NULL, ""); - - /* Then walk all the pending objects, recursively processing them too */ - for (i = 0; i < revs->pending.nr; i++) { - struct object_array_entry *pending = revs->pending.objects + i; - struct object *obj = pending->item; - const char *name = pending->name; - if (obj->type == OBJ_TAG) { - process_tag((struct tag *) obj, &objects, name); - continue; - } - if (obj->type == OBJ_TREE) { - process_tree((struct tree *)obj, &objects, NULL, name); - continue; - } - if (obj->type == OBJ_BLOB) { - process_blob((struct blob *)obj, &objects, NULL, name); - continue; - } - die("unknown pending object %s (%s)", sha1_to_hex(obj->sha1), name); - } -} - -static int add_one_reflog_ent(unsigned char *osha1, unsigned char *nsha1, char *datail, void *cb_data) -{ - struct object *object; - struct rev_info *revs = (struct rev_info *)cb_data; - - object = parse_object(osha1); - if (object) - add_pending_object(revs, object, ""); - object = parse_object(nsha1); - if (object) - add_pending_object(revs, object, ""); - return 0; -} - -static int add_one_ref(const char *path, const unsigned char *sha1, int flag, void *cb_data) -{ - struct object *object = parse_object(sha1); - struct rev_info *revs = (struct rev_info *)cb_data; - - if (!object) - die("bad object ref: %s:%s", path, sha1_to_hex(sha1)); - add_pending_object(revs, object, ""); - - return 0; -} - -static int add_one_reflog(const char *path, const unsigned char *sha1, int flag, void *cb_data) -{ - for_each_reflog_ent(path, add_one_reflog_ent, cb_data); - return 0; -} - -static void add_one_tree(const unsigned char *sha1, struct rev_info *revs) -{ - struct tree *tree = lookup_tree(sha1); - add_pending_object(revs, &tree->object, ""); -} - -static void add_cache_tree(struct cache_tree *it, struct rev_info *revs) -{ - int i; - - if (it->entry_count >= 0) - add_one_tree(it->sha1, revs); - for (i = 0; i < it->subtree_nr; i++) - add_cache_tree(it->down[i]->cache_tree, revs); -} - -static void add_cache_refs(struct rev_info *revs) -{ - int i; - - read_cache(); - for (i = 0; i < active_nr; i++) { - lookup_blob(active_cache[i]->sha1); - /* - * We could add the blobs to the pending list, but quite - * frankly, we don't care. Once we've looked them up, and - * added them as objects, we've really done everything - * there is to do for a blob - */ - } - if (active_cache_tree) - add_cache_tree(active_cache_tree, revs); -} - int cmd_prune(int argc, const char **argv, const char *prefix) { int i; @@ -261,32 +94,8 @@ int cmd_prune(int argc, const char **argv, const char *prefix) } save_commit_buffer = 0; - - /* - * Set up revision parsing, and mark us as being interested - * in all object types, not just commits. - */ init_revisions(&revs, prefix); - revs.tag_objects = 1; - revs.blob_objects = 1; - revs.tree_objects = 1; - - /* Add all external refs */ - for_each_ref(add_one_ref, &revs); - - /* Add all refs from the index file */ - add_cache_refs(&revs); - - /* Add all reflog info from refs */ - for_each_ref(add_one_reflog, &revs); - - /* - * Set up the revision walk - this will move all commits - * from the pending list to the commit walking list. - */ - prepare_revision_walk(&revs); - - walk_commit_list(&revs); + mark_reachable_objects(&revs, 1); prune_object_dir(get_object_directory()); diff --git a/reachable.c b/reachable.c new file mode 100644 index 0000000..4dfee1d --- /dev/null +++ b/reachable.c @@ -0,0 +1,199 @@ +#include "cache.h" +#include "refs.h" +#include "tag.h" +#include "commit.h" +#include "blob.h" +#include "diff.h" +#include "revision.h" +#include "reachable.h" +#include "cache-tree.h" + +static void process_blob(struct blob *blob, + struct object_array *p, + struct name_path *path, + const char *name) +{ + struct object *obj = &blob->object; + + if (obj->flags & SEEN) + return; + obj->flags |= SEEN; + /* Nothing to do, really .. The blob lookup was the important part */ +} + +static void process_tree(struct tree *tree, + struct object_array *p, + struct name_path *path, + const char *name) +{ + struct object *obj = &tree->object; + struct tree_desc desc; + struct name_entry entry; + struct name_path me; + + if (obj->flags & SEEN) + return; + obj->flags |= SEEN; + if (parse_tree(tree) < 0) + die("bad tree object %s", sha1_to_hex(obj->sha1)); + name = xstrdup(name); + add_object(obj, p, path, name); + me.up = path; + me.elem = name; + me.elem_len = strlen(name); + + desc.buf = tree->buffer; + desc.size = tree->size; + + while (tree_entry(&desc, &entry)) { + if (S_ISDIR(entry.mode)) + process_tree(lookup_tree(entry.sha1), p, &me, entry.path); + else + process_blob(lookup_blob(entry.sha1), p, &me, entry.path); + } + free(tree->buffer); + tree->buffer = NULL; +} + +static void process_tag(struct tag *tag, struct object_array *p, const char *name) +{ + struct object *obj = &tag->object; + struct name_path me; + + if (obj->flags & SEEN) + return; + obj->flags |= SEEN; + + me.up = NULL; + me.elem = "tag:/"; + me.elem_len = 5; + + if (parse_tag(tag) < 0) + die("bad tag object %s", sha1_to_hex(obj->sha1)); + add_object(tag->tagged, p, NULL, name); +} + +static void walk_commit_list(struct rev_info *revs) +{ + int i; + struct commit *commit; + struct object_array objects = { 0, 0, NULL }; + + /* Walk all commits, process their trees */ + while ((commit = get_revision(revs)) != NULL) + process_tree(commit->tree, &objects, NULL, ""); + + /* Then walk all the pending objects, recursively processing them too */ + for (i = 0; i < revs->pending.nr; i++) { + struct object_array_entry *pending = revs->pending.objects + i; + struct object *obj = pending->item; + const char *name = pending->name; + if (obj->type == OBJ_TAG) { + process_tag((struct tag *) obj, &objects, name); + continue; + } + if (obj->type == OBJ_TREE) { + process_tree((struct tree *)obj, &objects, NULL, name); + continue; + } + if (obj->type == OBJ_BLOB) { + process_blob((struct blob *)obj, &objects, NULL, name); + continue; + } + die("unknown pending object %s (%s)", sha1_to_hex(obj->sha1), name); + } +} + +static int add_one_reflog_ent(unsigned char *osha1, unsigned char *nsha1, char *datail, void *cb_data) +{ + struct object *object; + struct rev_info *revs = (struct rev_info *)cb_data; + + object = parse_object(osha1); + if (object) + add_pending_object(revs, object, ""); + object = parse_object(nsha1); + if (object) + add_pending_object(revs, object, ""); + return 0; +} + +static int add_one_ref(const char *path, const unsigned char *sha1, int flag, void *cb_data) +{ + struct object *object = parse_object(sha1); + struct rev_info *revs = (struct rev_info *)cb_data; + + if (!object) + die("bad object ref: %s:%s", path, sha1_to_hex(sha1)); + add_pending_object(revs, object, ""); + + return 0; +} + +static int add_one_reflog(const char *path, const unsigned char *sha1, int flag, void *cb_data) +{ + for_each_reflog_ent(path, add_one_reflog_ent, cb_data); + return 0; +} + +static void add_one_tree(const unsigned char *sha1, struct rev_info *revs) +{ + struct tree *tree = lookup_tree(sha1); + add_pending_object(revs, &tree->object, ""); +} + +static void add_cache_tree(struct cache_tree *it, struct rev_info *revs) +{ + int i; + + if (it->entry_count >= 0) + add_one_tree(it->sha1, revs); + for (i = 0; i < it->subtree_nr; i++) + add_cache_tree(it->down[i]->cache_tree, revs); +} + +static void add_cache_refs(struct rev_info *revs) +{ + int i; + + read_cache(); + for (i = 0; i < active_nr; i++) { + lookup_blob(active_cache[i]->sha1); + /* + * We could add the blobs to the pending list, but quite + * frankly, we don't care. Once we've looked them up, and + * added them as objects, we've really done everything + * there is to do for a blob + */ + } + if (active_cache_tree) + add_cache_tree(active_cache_tree, revs); +} + +void mark_reachable_objects(struct rev_info *revs, int mark_reflog) +{ + /* + * Set up revision parsing, and mark us as being interested + * in all object types, not just commits. + */ + revs->tag_objects = 1; + revs->blob_objects = 1; + revs->tree_objects = 1; + + /* Add all refs from the index file */ + add_cache_refs(revs); + + /* Add all external refs */ + for_each_ref(add_one_ref, revs); + + /* Add all reflog info from refs */ + if (mark_reflog) + for_each_ref(add_one_reflog, revs); + + /* + * Set up the revision walk - this will move all commits + * from the pending list to the commit walking list. + */ + prepare_revision_walk(revs); + walk_commit_list(revs); +} diff --git a/reachable.h b/reachable.h new file mode 100644 index 0000000..4075181 --- /dev/null +++ b/reachable.h @@ -0,0 +1,6 @@ +#ifndef REACHEABLE_H +#define REACHEABLE_H + +extern void mark_reachable_objects(struct rev_info *revs, int mark_reflog); + +#endif -- cgit v0.10.2-6-g49f6 From 1389d9ddaa68a4cbf5018d88f971b9bbb7aaa3c9 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 6 Jan 2007 02:16:19 -0800 Subject: reflog expire --fix-stale The logic in an earlier round to detect reflog entries that point at a broken commit was not sufficient. Just like we do not trust presense of a commit during pack transfer (we trust only our refs), we should not trust a commit's presense, even if the tree of that commit is complete. A repository that had reflog enabled on some of the refs that was rewound and then run git-repack or git-prune from older versions of git can have reflog entries that point at a commit that still exist but lack commits (or trees and blobs needed for that commit) between it and some commit that is reachable from one of the refs. This revamps the logic -- the definition of "broken commit" becomes: a commit that is not reachable from any of the refs and there is a missing object among the commit, tree, or blob objects reachable from it that is not reachable from any of the refs. Entries in the reflog that refer to such a commit are expired. Since this computation involves traversing all the reachable objects, i.e. it has the same cost as 'git prune', it is enabled only when a new option --fix-stale. Fortunately, once this is run, we should not have to ever worry about missing objects, because the current prune and pack-objects know about reflogs and protect objects referred by them. Unfortunately, this will be absolutely necessary to help people migrate to the newer prune and repack. Signed-off-by: Junio C Hamano diff --git a/builtin-reflog.c b/builtin-reflog.c index d3f2f50..1da7da0 100644 --- a/builtin-reflog.c +++ b/builtin-reflog.c @@ -4,16 +4,34 @@ #include "refs.h" #include "dir.h" #include "tree-walk.h" +#include "diff.h" +#include "revision.h" +#include "reachable.h" + +/* + * reflog expire + */ + +static const char reflog_expire_usage[] = +"git-reflog expire [--verbose] [--dry-run] [--fix-stale] [--expire=