From 5f73076c1a9b4b8dc94f77eac98eb558d25e33c0 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Wed, 8 Feb 2006 21:15:24 -0800
Subject: "Assume unchanged" git

This adds "assume unchanged" logic, started by this message in the list
discussion recently:

	<Pine.LNX.4.64.0601311807470.7301@g5.osdl.org>

This is a workaround for filesystems that do not have lstat()
that is quick enough for the index mechanism to take advantage
of.  On the paths marked as "assumed to be unchanged", the user
needs to explicitly use update-index to register the object name
to be in the next commit.

You can use two new options to update-index to set and reset the
CE_VALID bit:

	git-update-index --assume-unchanged path...
	git-update-index --no-assume-unchanged path...

These forms manipulate only the CE_VALID bit; it does not change
the object name recorded in the index file.  Nor they add a new
entry to the index.

When the configuration variable "core.ignorestat = true" is set,
the index entries are marked with CE_VALID bit automatically
after:

 - update-index to explicitly register the current object name to the
   index file.

 - when update-index --refresh finds the path to be up-to-date.

 - when tools like read-tree -u and apply --index update the working
   tree file and register the current object name to the index file.

The flag is dropped upon read-tree that does not check out the index
entry.  This happens regardless of the core.ignorestat settings.

Index entries marked with CE_VALID bit are assumed to be
unchanged most of the time.  However, there are cases that
CE_VALID bit is ignored for the sake of safety and usability:

 - while "git-read-tree -m" or git-apply need to make sure
   that the paths involved in the merge do not have local
   modifications.  This sacrifices performance for safety.

 - when git-checkout-index -f -q -u -a tries to see if it needs
   to checkout the paths.  Otherwise you can never check
   anything out ;-).

 - when git-update-index --really-refresh (a new flag) tries to
   see if the index entry is up to date.  You can start with
   everything marked as CE_VALID and run this once to drop
   CE_VALID bit for paths that are modified.

Most notably, "update-index --refresh" honours CE_VALID and does
not actively stat, so after you modified a file in the working
tree, update-index --refresh would not notice until you tell the
index about it with "git-update-index path" or "git-update-index
--no-assume-unchanged path".

This version is not expected to be perfect.  I think diff
between index and/or tree and working files may need some
adjustment, and there probably needs other cases we should
automatically unmark paths that are marked to be CE_VALID.

But the basics seem to work, and ready to be tested by people
who asked for this feature.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/apply.c b/apply.c
index 2ad47fb..35ae48e 100644
--- a/apply.c
+++ b/apply.c
@@ -1309,7 +1309,7 @@ static int check_patch(struct patch *patch)
 					return -1;
 			}
 
-			changed = ce_match_stat(active_cache[pos], &st);
+			changed = ce_match_stat(active_cache[pos], &st, 1);
 			if (changed)
 				return error("%s: does not match index",
 					     old_name);
diff --git a/cache.h b/cache.h
index bdbe2d6..cd58fad 100644
--- a/cache.h
+++ b/cache.h
@@ -91,6 +91,7 @@ struct cache_entry {
 #define CE_NAMEMASK  (0x0fff)
 #define CE_STAGEMASK (0x3000)
 #define CE_UPDATE    (0x4000)
+#define CE_VALID     (0x8000)
 #define CE_STAGESHIFT 12
 
 #define create_ce_flags(len, stage) htons((len) | ((stage) << CE_STAGESHIFT))
@@ -144,8 +145,8 @@ extern int add_cache_entry(struct cache_entry *ce, int option);
 extern int remove_cache_entry_at(int pos);
 extern int remove_file_from_cache(const char *path);
 extern int ce_same_name(struct cache_entry *a, struct cache_entry *b);
-extern int ce_match_stat(struct cache_entry *ce, struct stat *st);
-extern int ce_modified(struct cache_entry *ce, struct stat *st);
+extern int ce_match_stat(struct cache_entry *ce, struct stat *st, int);
+extern int ce_modified(struct cache_entry *ce, struct stat *st, int);
 extern int ce_path_match(const struct cache_entry *ce, const char **pathspec);
 extern int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, const char *type);
 extern int index_pipe(unsigned char *sha1, int fd, const char *type, int write_object);
@@ -161,6 +162,7 @@ extern int commit_index_file(struct cache_file *);
 extern void rollback_index_file(struct cache_file *);
 
 extern int trust_executable_bit;
+extern int assume_unchanged;
 extern int only_use_symrefs;
 extern int diff_rename_limit_default;
 extern int shared_repository;
diff --git a/checkout-index.c b/checkout-index.c
index 53dd8cb..957b4a8 100644
--- a/checkout-index.c
+++ b/checkout-index.c
@@ -116,6 +116,7 @@ int main(int argc, char **argv)
 	int all = 0;
 
 	prefix = setup_git_directory();
+	git_config(git_default_config);
 	prefix_length = prefix ? strlen(prefix) : 0;
 
 	if (read_cache() < 0) {
diff --git a/config.c b/config.c
index 8355224..7dbdce1 100644
--- a/config.c
+++ b/config.c
@@ -222,6 +222,11 @@ int git_default_config(const char *var, const char *value)
 		return 0;
 	}
 
+	if (!strcmp(var, "core.ignorestat")) {
+		assume_unchanged = git_config_bool(var, value);
+		return 0;
+	}
+
 	if (!strcmp(var, "core.symrefsonly")) {
 		only_use_symrefs = git_config_bool(var, value);
 		return 0;
diff --git a/diff-files.c b/diff-files.c
index d24d11c..c96ad35 100644
--- a/diff-files.c
+++ b/diff-files.c
@@ -191,7 +191,7 @@ int main(int argc, const char **argv)
 			show_file('-', ce);
 			continue;
 		}
-		changed = ce_match_stat(ce, &st);
+		changed = ce_match_stat(ce, &st, 0);
 		if (!changed && !diff_options.find_copies_harder)
 			continue;
 		oldmode = ntohl(ce->ce_mode);
diff --git a/diff-index.c b/diff-index.c
index f8a102e..12a9418 100644
--- a/diff-index.c
+++ b/diff-index.c
@@ -33,7 +33,7 @@ static int get_stat_data(struct cache_entry *ce,
 			}
 			return -1;
 		}
-		changed = ce_match_stat(ce, &st);
+		changed = ce_match_stat(ce, &st, 0);
 		if (changed) {
 			mode = create_ce_mode(st.st_mode);
 			if (!trust_executable_bit &&
diff --git a/diff.c b/diff.c
index ec51e7d..c72064e 100644
--- a/diff.c
+++ b/diff.c
@@ -311,7 +311,7 @@ static int work_tree_matches(const char *name, const unsigned char *sha1)
 	ce = active_cache[pos];
 	if ((lstat(name, &st) < 0) ||
 	    !S_ISREG(st.st_mode) || /* careful! */
-	    ce_match_stat(ce, &st) ||
+	    ce_match_stat(ce, &st, 0) ||
 	    memcmp(sha1, ce->sha1, 20))
 		return 0;
 	/* we return 1 only when we can stat, it is a regular file,
diff --git a/entry.c b/entry.c
index 6c47c3a..8fb99bc 100644
--- a/entry.c
+++ b/entry.c
@@ -123,7 +123,7 @@ int checkout_entry(struct cache_entry *ce, struct checkout *state)
 	strcpy(path + len, ce->name);
 
 	if (!lstat(path, &st)) {
-		unsigned changed = ce_match_stat(ce, &st);
+		unsigned changed = ce_match_stat(ce, &st, 1);
 		if (!changed)
 			return 0;
 		if (!state->force) {
diff --git a/environment.c b/environment.c
index 0596fc6..251e53c 100644
--- a/environment.c
+++ b/environment.c
@@ -12,6 +12,7 @@
 char git_default_email[MAX_GITNAME];
 char git_default_name[MAX_GITNAME];
 int trust_executable_bit = 1;
+int assume_unchanged = 0;
 int only_use_symrefs = 0;
 int repository_format_version = 0;
 char git_commit_encoding[MAX_ENCODING_LENGTH] = "utf-8";
diff --git a/read-cache.c b/read-cache.c
index c5474d4..efbb1be 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -27,6 +27,9 @@ void fill_stat_cache_info(struct cache_entry *ce, struct stat *st)
 	ce->ce_uid = htonl(st->st_uid);
 	ce->ce_gid = htonl(st->st_gid);
 	ce->ce_size = htonl(st->st_size);
+
+	if (assume_unchanged)
+		ce->ce_flags |= htons(CE_VALID);
 }
 
 static int ce_compare_data(struct cache_entry *ce, struct stat *st)
@@ -146,9 +149,18 @@ static int ce_match_stat_basic(struct cache_entry *ce, struct stat *st)
 	return changed;
 }
 
-int ce_match_stat(struct cache_entry *ce, struct stat *st)
+int ce_match_stat(struct cache_entry *ce, struct stat *st, int ignore_valid)
 {
-	unsigned int changed = ce_match_stat_basic(ce, st);
+	unsigned int changed;
+
+	/*
+	 * If it's marked as always valid in the index, it's
+	 * valid whatever the checked-out copy says.
+	 */
+	if (!ignore_valid && (ce->ce_flags & htons(CE_VALID)))
+		return 0;
+
+	changed = ce_match_stat_basic(ce, st);
 
 	/*
 	 * Within 1 second of this sequence:
@@ -164,7 +176,7 @@ int ce_match_stat(struct cache_entry *ce, struct stat *st)
 	 * effectively mean we can make at most one commit per second,
 	 * which is not acceptable.  Instead, we check cache entries
 	 * whose mtime are the same as the index file timestamp more
-	 * careful than others.
+	 * carefully than others.
 	 */
 	if (!changed &&
 	    index_file_timestamp &&
@@ -174,10 +186,10 @@ int ce_match_stat(struct cache_entry *ce, struct stat *st)
 	return changed;
 }
 
-int ce_modified(struct cache_entry *ce, struct stat *st)
+int ce_modified(struct cache_entry *ce, struct stat *st, int really)
 {
 	int changed, changed_fs;
-	changed = ce_match_stat(ce, st);
+	changed = ce_match_stat(ce, st, really);
 	if (!changed)
 		return 0;
 	/*
@@ -233,6 +245,11 @@ int cache_name_compare(const char *name1, int flags1, const char *name2, int fla
 		return -1;
 	if (len1 > len2)
 		return 1;
+
+	/* Differences between "assume up-to-date" should not matter. */
+	flags1 &= ~CE_VALID;
+	flags2 &= ~CE_VALID;
+
 	if (flags1 < flags2)
 		return -1;
 	if (flags1 > flags2)
@@ -430,6 +447,7 @@ int add_cache_entry(struct cache_entry *ce, int option)
 	int ok_to_add = option & ADD_CACHE_OK_TO_ADD;
 	int ok_to_replace = option & ADD_CACHE_OK_TO_REPLACE;
 	int skip_df_check = option & ADD_CACHE_SKIP_DFCHECK;
+
 	pos = cache_name_pos(ce->name, ntohs(ce->ce_flags));
 
 	/* existing match? Just replace it. */
diff --git a/read-tree.c b/read-tree.c
index 5580f15..52f06e3 100644
--- a/read-tree.c
+++ b/read-tree.c
@@ -349,7 +349,7 @@ static void verify_uptodate(struct cache_entry *ce)
 		return;
 
 	if (!lstat(ce->name, &st)) {
-		unsigned changed = ce_match_stat(ce, &st);
+		unsigned changed = ce_match_stat(ce, &st, 1);
 		if (!changed)
 			return;
 		errno = 0;
diff --git a/update-index.c b/update-index.c
index afec98d..767fd49 100644
--- a/update-index.c
+++ b/update-index.c
@@ -23,6 +23,10 @@ static int quiet; /* --refresh needing update is not error */
 static int info_only;
 static int force_remove;
 static int verbose;
+static int mark_valid_only = 0;
+#define MARK_VALID 1
+#define UNMARK_VALID 2
+
 
 /* Three functions to allow overloaded pointer return; see linux/err.h */
 static inline void *ERR_PTR(long error)
@@ -53,6 +57,25 @@ static void report(const char *fmt, ...)
 	va_end(vp);
 }
 
+static int mark_valid(const char *path)
+{
+	int namelen = strlen(path);
+	int pos = cache_name_pos(path, namelen);
+	if (0 <= pos) {
+		switch (mark_valid_only) {
+		case MARK_VALID:
+			active_cache[pos]->ce_flags |= htons(CE_VALID);
+			break;
+		case UNMARK_VALID:
+			active_cache[pos]->ce_flags &= ~htons(CE_VALID);
+			break;
+		}
+		active_cache_changed = 1;
+		return 0;
+	}
+	return -1;
+}
+
 static int add_file_to_cache(const char *path)
 {
 	int size, namelen, option, status;
@@ -94,6 +117,7 @@ static int add_file_to_cache(const char *path)
 	ce = xmalloc(size);
 	memset(ce, 0, size);
 	memcpy(ce->name, path, namelen);
+	ce->ce_flags = htons(namelen);
 	fill_stat_cache_info(ce, &st);
 
 	ce->ce_mode = create_ce_mode(st.st_mode);
@@ -105,7 +129,6 @@ static int add_file_to_cache(const char *path)
 		if (0 <= pos)
 			ce->ce_mode = active_cache[pos]->ce_mode;
 	}
-	ce->ce_flags = htons(namelen);
 
 	if (index_path(ce->sha1, path, &st, !info_only))
 		return -1;
@@ -128,7 +151,7 @@ static int add_file_to_cache(const char *path)
  * For example, you'd want to do this after doing a "git-read-tree",
  * to link up the stat cache details with the proper files.
  */
-static struct cache_entry *refresh_entry(struct cache_entry *ce)
+static struct cache_entry *refresh_entry(struct cache_entry *ce, int really)
 {
 	struct stat st;
 	struct cache_entry *updated;
@@ -137,21 +160,22 @@ static struct cache_entry *refresh_entry(struct cache_entry *ce)
 	if (lstat(ce->name, &st) < 0)
 		return ERR_PTR(-errno);
 
-	changed = ce_match_stat(ce, &st);
+	changed = ce_match_stat(ce, &st, really);
 	if (!changed)
 		return NULL;
 
-	if (ce_modified(ce, &st))
+	if (ce_modified(ce, &st, really))
 		return ERR_PTR(-EINVAL);
 
 	size = ce_size(ce);
 	updated = xmalloc(size);
 	memcpy(updated, ce, size);
 	fill_stat_cache_info(updated, &st);
+
 	return updated;
 }
 
-static int refresh_cache(void)
+static int refresh_cache(int really)
 {
 	int i;
 	int has_errors = 0;
@@ -171,12 +195,19 @@ static int refresh_cache(void)
 			continue;
 		}
 
-		new = refresh_entry(ce);
+		new = refresh_entry(ce, really);
 		if (!new)
 			continue;
 		if (IS_ERR(new)) {
 			if (not_new && PTR_ERR(new) == -ENOENT)
 				continue;
+			if (really && PTR_ERR(new) == -EINVAL) {
+				/* If we are doing --really-refresh that
+				 * means the index is not valid anymore.
+				 */
+				ce->ce_flags &= ~htons(CE_VALID);
+				active_cache_changed = 1;
+			}
 			if (quiet)
 				continue;
 			printf("%s: needs update\n", ce->name);
@@ -274,6 +305,8 @@ static int add_cacheinfo(unsigned int mode, const unsigned char *sha1,
 	memcpy(ce->name, path, len);
 	ce->ce_flags = create_ce_flags(len, stage);
 	ce->ce_mode = create_ce_mode(mode);
+	if (assume_unchanged)
+		ce->ce_flags |= htons(CE_VALID);
 	option = allow_add ? ADD_CACHE_OK_TO_ADD : 0;
 	option |= allow_replace ? ADD_CACHE_OK_TO_REPLACE : 0;
 	if (add_cache_entry(ce, option))
@@ -317,6 +350,12 @@ static void update_one(const char *path, const char *prefix, int prefix_length)
 		fprintf(stderr, "Ignoring path %s\n", path);
 		return;
 	}
+	if (mark_valid_only) {
+		if (mark_valid(p))
+			die("Unable to mark file %s", path);
+		return;
+	}
+
 	if (force_remove) {
 		if (remove_file_from_cache(p))
 			die("git-update-index: unable to remove %s", path);
@@ -467,7 +506,11 @@ int main(int argc, const char **argv)
 				continue;
 			}
 			if (!strcmp(path, "--refresh")) {
-				has_errors |= refresh_cache();
+				has_errors |= refresh_cache(0);
+				continue;
+			}
+			if (!strcmp(path, "--really-refresh")) {
+				has_errors |= refresh_cache(1);
 				continue;
 			}
 			if (!strcmp(path, "--cacheinfo")) {
@@ -493,6 +536,14 @@ int main(int argc, const char **argv)
 					die("git-update-index: %s cannot chmod %s", path, argv[i]);
 				continue;
 			}
+			if (!strcmp(path, "--assume-unchanged")) {
+				mark_valid_only = MARK_VALID;
+				continue;
+			}
+			if (!strcmp(path, "--no-assume-unchanged")) {
+				mark_valid_only = UNMARK_VALID;
+				continue;
+			}
 			if (!strcmp(path, "--info-only")) {
 				info_only = 1;
 				continue;
diff --git a/write-tree.c b/write-tree.c
index f866059..addb5de 100644
--- a/write-tree.c
+++ b/write-tree.c
@@ -111,7 +111,7 @@ int main(int argc, char **argv)
 	funny = 0;
 	for (i = 0; i < entries; i++) {
 		struct cache_entry *ce = active_cache[i];
-		if (ntohs(ce->ce_flags) & ~CE_NAMEMASK) {
+		if (ce_stage(ce)) {
 			if (10 < ++funny) {
 				fprintf(stderr, "...\n");
 				break;
-- 
cgit v0.10.2-6-g49f6


From 8b9b0f3af73233595b6b1103ffb30242508a5e47 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Wed, 8 Feb 2006 21:49:47 -0800
Subject: "Assume unchanged" git: do not set CE_VALID with --refresh

When working with automatic assume-unchanged mode using
core.ignorestat, setting CE_VALID after --refresh makes things
more cumbersome to use.  Consider this scenario:

 (1) the working tree is on a filesystem with slow lstat(2).
     The user sets core.ignorestat = true.

 (2) "git checkout" to switch to a different branch (or initial
     checkout) updates all paths and the index starts out with
     "all clean".

 (3) The user knows she wants to edit certain paths.  She uses
     update-index --no-assume-unchanged (we could call it --edit;
     the name is inmaterial) to mark these paths and starts
     editing.

 (4) After editing half of the paths marked to be edited, she
     runs "git status".  This runs "update-index --refresh" to
     reduce the false hits from diff-files.

 (5) Now the other half of the paths, since she has not changed
     them, are found to match the index, and CE_VALID is set on
     them again.

For this reason, this commit makes update-index --refresh not to
set CE_VALID even after the path without CE_VALID are verified
to be up to date.  The user still can run --really-refresh to
force lstat() to match the index entries to the reality.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/update-index.c b/update-index.c
index 767fd49..bb73050 100644
--- a/update-index.c
+++ b/update-index.c
@@ -172,6 +172,15 @@ static struct cache_entry *refresh_entry(struct cache_entry *ce, int really)
 	memcpy(updated, ce, size);
 	fill_stat_cache_info(updated, &st);
 
+	/* In this case, if really is not set, we should leave
+	 * CE_VALID bit alone.  Otherwise, paths marked with
+	 * --no-assume-unchanged (i.e. things to be edited) will
+	 * reacquire CE_VALID bit automatically, which is not
+	 * really what we want.
+	 */
+	if (!really && assume_unchanged && !(ce->ce_flags & htons(CE_VALID)))
+		updated->ce_flags &= ~htons(CE_VALID);
+
 	return updated;
 }
 
-- 
cgit v0.10.2-6-g49f6


From 2bcab24080dc97fc628e1b601a195a9a96773fac Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Wed, 8 Feb 2006 21:50:18 -0800
Subject: ls-files: debugging aid for CE_VALID changes.

This is not really part of the proposed updates for CE_VALID,
but with this change, ls-files -t shows CE_VALID paths with
lowercase tag letters instead of the usual uppercase.  Useful
for checking out what is going on.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/ls-files.c b/ls-files.c
index 6af3b09..3f06ece 100644
--- a/ls-files.c
+++ b/ls-files.c
@@ -447,6 +447,22 @@ static void show_ce_entry(const char *tag, struct cache_entry *ce)
 	if (pathspec && !match(pathspec, ce->name, len))
 		return;
 
+	if (tag && *tag && (ce->ce_flags & htons(CE_VALID))) {
+		static char alttag[4];
+		memcpy(alttag, tag, 3);
+		if (isalpha(tag[0]))
+			alttag[0] = tolower(tag[0]);
+		else if (tag[0] == '?')
+			alttag[0] = '!';
+		else {
+			alttag[0] = 'v';
+			alttag[1] = tag[0];
+			alttag[2] = ' ';
+			alttag[3] = 0;
+		}
+		tag = alttag;
+	}
+
 	if (!show_stage) {
 		fputs(tag, stdout);
 		write_name_quoted("", 0, ce->name + offset,
@@ -503,7 +519,7 @@ static void show_files(void)
 			err = lstat(ce->name, &st);
 			if (show_deleted && err)
 				show_ce_entry(tag_removed, ce);
-			if (show_modified && ce_modified(ce, &st))
+			if (show_modified && ce_modified(ce, &st, 0))
 				show_ce_entry(tag_modified, ce);
 		}
 	}
-- 
cgit v0.10.2-6-g49f6


From b92b2ce94e5673ab3a2fbc9c762b39aaf7d72de5 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Thu, 9 Feb 2006 00:55:17 -0800
Subject: "Assume unchanged" git: --really-refresh fix.

The earlier round failed to make --really-refresh to mark
up-to-date index entry to valid again due to a trivial thinko.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/update-index.c b/update-index.c
index bb73050..ce1db38 100644
--- a/update-index.c
+++ b/update-index.c
@@ -161,8 +161,13 @@ static struct cache_entry *refresh_entry(struct cache_entry *ce, int really)
 		return ERR_PTR(-errno);
 
 	changed = ce_match_stat(ce, &st, really);
-	if (!changed)
-		return NULL;
+	if (!changed) {
+		if (really && assume_unchanged &&
+		    !(ce->ce_flags & htons(CE_VALID)))
+			; /* mark this one VALID again */
+		else
+			return NULL;
+	}
 
 	if (ce_modified(ce, &st, really))
 		return ERR_PTR(-EINVAL);
-- 
cgit v0.10.2-6-g49f6


From 69d47bdd6c1d6cb9c8603fd1da8b756e2903f955 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 10 Feb 2006 10:29:26 +1100
Subject: gitk: Make "find" on "Files" work again.

It was broken by the change to supply just the child id to
git-diff-tree rather than both child and parent.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/gitk b/gitk
index e482140..f4c6624 100755
--- a/gitk
+++ b/gitk
@@ -1936,7 +1936,7 @@ proc findfiles {} {
     global selectedline numcommits lineid ctext
     global ffileline finddidsel parents nparents
     global findinprogress findstartline findinsertpos
-    global treediffs fdiffids fdiffsneeded fdiffpos
+    global treediffs fdiffid fdiffsneeded fdiffpos
     global findmergefiles
 
     if {$numcommits == 0} return
@@ -1953,11 +1953,9 @@ proc findfiles {} {
     while 1 {
 	set id $lineid($l)
 	if {$findmergefiles || $nparents($id) == 1} {
-	    foreach p $parents($id) {
-		if {![info exists treediffs([list $id $p])]} {
-		    append diffsneeded "$id $p\n"
-		    lappend fdiffsneeded [list $id $p]
-		}
+	    if {![info exists treediffs($id)]} {
+		append diffsneeded "$id\n"
+		lappend fdiffsneeded $id
 	    }
 	}
 	if {[incr l] >= $numcommits} {
@@ -1974,7 +1972,7 @@ proc findfiles {} {
 	    error_popup "Error starting search process: $err"
 	    return
 	}
-	catch {unset fdiffids}
+	catch {unset fdiffid}
 	set fdiffpos 0
 	fconfigure $df -blocking 0
 	fileevent $df readable [list readfilediffs $df]
@@ -1983,16 +1981,15 @@ proc findfiles {} {
     set finddidsel 0
     set findinsertpos end
     set id $lineid($l)
-    set p [lindex $parents($id) 0]
     . config -cursor watch
     settextcursor watch
     set findinprogress 1
-    findcont [list $id $p]
+    findcont $id
     update
 }
 
 proc readfilediffs {df} {
-    global findids fdiffids fdiffs
+    global findid fdiffid fdiffs
 
     set n [gets $df line]
     if {$n < 0} {
@@ -2002,19 +1999,19 @@ proc readfilediffs {df} {
 		stopfindproc
 		bell
 		error_popup "Error in git-diff-tree: $err"
-	    } elseif {[info exists findids]} {
-		set ids $findids
+	    } elseif {[info exists findid]} {
+		set id $findid
 		stopfindproc
 		bell
-		error_popup "Couldn't find diffs for {$ids}"
+		error_popup "Couldn't find diffs for $id"
 	    }
 	}
 	return
     }
-    if {[regexp {^([0-9a-f]{40}) \(from ([0-9a-f]{40})\)} $line match id p]} {
+    if {[regexp {^([0-9a-f]{40})$} $line match id]} {
 	# start of a new string of diffs
 	donefilediff
-	set fdiffids [list $id $p]
+	set fdiffid $id
 	set fdiffs {}
     } elseif {[string match ":*" $line]} {
 	lappend fdiffs [lindex $line 5]
@@ -2022,53 +2019,50 @@ proc readfilediffs {df} {
 }
 
 proc donefilediff {} {
-    global fdiffids fdiffs treediffs findids
+    global fdiffid fdiffs treediffs findid
     global fdiffsneeded fdiffpos
 
-    if {[info exists fdiffids]} {
-	while {[lindex $fdiffsneeded $fdiffpos] ne $fdiffids
+    if {[info exists fdiffid]} {
+	while {[lindex $fdiffsneeded $fdiffpos] ne $fdiffid
 	       && $fdiffpos < [llength $fdiffsneeded]} {
 	    # git-diff-tree doesn't output anything for a commit
 	    # which doesn't change anything
-	    set nullids [lindex $fdiffsneeded $fdiffpos]
-	    set treediffs($nullids) {}
-	    if {[info exists findids] && $nullids eq $findids} {
-		unset findids
-		findcont $nullids
+	    set nullid [lindex $fdiffsneeded $fdiffpos]
+	    set treediffs($nullid) {}
+	    if {[info exists findid] && $nullid eq $findid} {
+		unset findid
+		findcont $nullid
 	    }
 	    incr fdiffpos
 	}
 	incr fdiffpos
 
-	if {![info exists treediffs($fdiffids)]} {
-	    set treediffs($fdiffids) $fdiffs
+	if {![info exists treediffs($fdiffid)]} {
+	    set treediffs($fdiffid) $fdiffs
 	}
-	if {[info exists findids] && $fdiffids eq $findids} {
-	    unset findids
-	    findcont $fdiffids
+	if {[info exists findid] && $fdiffid eq $findid} {
+	    unset findid
+	    findcont $fdiffid
 	}
     }
 }
 
-proc findcont {ids} {
-    global findids treediffs parents nparents
+proc findcont {id} {
+    global findid treediffs parents nparents
     global ffileline findstartline finddidsel
     global lineid numcommits matchinglines findinprogress
     global findmergefiles
 
-    set id [lindex $ids 0]
-    set p [lindex $ids 1]
-    set pi [lsearch -exact $parents($id) $p]
     set l $ffileline
     while 1 {
 	if {$findmergefiles || $nparents($id) == 1} {
-	    if {![info exists treediffs($ids)]} {
-		set findids $ids
+	    if {![info exists treediffs($id)]} {
+		set findid $id
 		set ffileline $l
 		return
 	    }
 	    set doesmatch 0
-	    foreach f $treediffs($ids) {
+	    foreach f $treediffs($id) {
 		set x [findmatches $f]
 		if {$x != {}} {
 		    set doesmatch 1
@@ -2077,21 +2071,13 @@ proc findcont {ids} {
 	    }
 	    if {$doesmatch} {
 		insertmatch $l $id
-		set pi $nparents($id)
 	    }
-	} else {
-	    set pi $nparents($id)
 	}
-	if {[incr pi] >= $nparents($id)} {
-	    set pi 0
-	    if {[incr l] >= $numcommits} {
-		set l 0
-	    }
-	    if {$l == $findstartline} break
-	    set id $lineid($l)
+	if {[incr l] >= $numcommits} {
+	    set l 0
 	}
-	set p [lindex $parents($id) $pi]
-	set ids [list $id $p]
+	if {$l == $findstartline} break
+	set id $lineid($l)
     }
     stopfindproc
     if {!$finddidsel} {
-- 
cgit v0.10.2-6-g49f6


From 8bb2e03b9d47d87657b67ddfaf712e736cf3db8f Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sun, 12 Feb 2006 01:47:57 -0800
Subject: ls-files: split "show-valid-bit" into a different option.

To preserve compatibility with scripts that expect uppercase
letters to be shown, do not make '-t' to unconditionally show
the valid bit.  Introduce '-v' option for that.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/ls-files.c b/ls-files.c
index 3f06ece..30296fd 100644
--- a/ls-files.c
+++ b/ls-files.c
@@ -20,6 +20,7 @@ static int show_unmerged = 0;
 static int show_modified = 0;
 static int show_killed = 0;
 static int show_other_directories = 0;
+static int show_valid_bit = 0;
 static int line_terminator = '\n';
 
 static int prefix_len = 0, prefix_offset = 0;
@@ -447,7 +448,8 @@ static void show_ce_entry(const char *tag, struct cache_entry *ce)
 	if (pathspec && !match(pathspec, ce->name, len))
 		return;
 
-	if (tag && *tag && (ce->ce_flags & htons(CE_VALID))) {
+	if (tag && *tag && show_valid_bit &&
+	    (ce->ce_flags & htons(CE_VALID))) {
 		static char alttag[4];
 		memcpy(alttag, tag, 3);
 		if (isalpha(tag[0]))
@@ -592,7 +594,7 @@ static void verify_pathspec(void)
 }
 
 static const char ls_files_usage[] =
-	"git-ls-files [-z] [-t] (--[cached|deleted|others|stage|unmerged|killed|modified])* "
+	"git-ls-files [-z] [-t] [-v] (--[cached|deleted|others|stage|unmerged|killed|modified])* "
 	"[ --ignored ] [--exclude=<pattern>] [--exclude-from=<file>] "
 	"[ --exclude-per-directory=<filename> ] [--full-name] [--] [<file>]*";
 
@@ -617,13 +619,15 @@ int main(int argc, const char **argv)
 			line_terminator = 0;
 			continue;
 		}
-		if (!strcmp(arg, "-t")) {
+		if (!strcmp(arg, "-t") || !strcmp(arg, "-v")) {
 			tag_cached = "H ";
 			tag_unmerged = "M ";
 			tag_removed = "R ";
 			tag_modified = "C ";
 			tag_other = "? ";
 			tag_killed = "K ";
+			if (arg[1] == 'v')
+				show_valid_bit = 1;
 			continue;
 		}
 		if (!strcmp(arg, "-c") || !strcmp(arg, "--cached")) {
-- 
cgit v0.10.2-6-g49f6


From f9666adfead47d265a65c7ba6f90d0ebae3e6b21 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sun, 12 Feb 2006 01:48:47 -0800
Subject: "assume unchanged" git: documentation.

This updates documentation to describe the "assume unchanged"
behaviour.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/Documentation/git-ls-files.txt b/Documentation/git-ls-files.txt
index e433407..e5a92f4 100644
--- a/Documentation/git-ls-files.txt
+++ b/Documentation/git-ls-files.txt
@@ -8,7 +8,8 @@ git-ls-files - Information about files in the index/working directory
 
 SYNOPSIS
 --------
-'git-ls-files' [-z] [-t]
+[verse]
+'git-ls-files' [-z] [-t] [-v]
 		(--[cached|deleted|others|ignored|stage|unmerged|killed|modified])\*
 		(-[c|d|o|i|s|u|k|m])\*
 		[-x <pattern>|--exclude=<pattern>]
@@ -82,6 +83,10 @@ OPTIONS
 	K::	to be killed
 	?	other
 
+-v::
+	Similar to `-t`, but use lowercase letters for files
+	that are marked as 'always matching index'.
+
 --full-name::
 	When run from a subdirectory, the command usually
 	outputs paths relative to the current directory.  This
diff --git a/Documentation/git-update-index.txt b/Documentation/git-update-index.txt
index c74311d..0a1b0ad 100644
--- a/Documentation/git-update-index.txt
+++ b/Documentation/git-update-index.txt
@@ -8,11 +8,14 @@ git-update-index - Modifies the index or directory cache
 
 SYNOPSIS
 --------
+[verse]
 'git-update-index'
 	     [--add] [--remove | --force-remove] [--replace] 
 	     [--refresh [-q] [--unmerged] [--ignore-missing]]
 	     [--cacheinfo <mode> <object> <file>]\*
 	     [--chmod=(+|-)x]
+	     [--assume-unchanged | --no-assume-unchanged]
+	     [--really-refresh]
 	     [--info-only] [--index-info]
 	     [-z] [--stdin]
 	     [--verbose]
@@ -65,6 +68,18 @@ OPTIONS
 --chmod=(+|-)x::
         Set the execute permissions on the updated files.        
 
+--assume-unchanged, --no-assume-unchanged::
+	When these flags are specified, the object name recorded
+	for the paths are not updated.  Instead, these options
+	sets and unsets the "assume unchanged" bit for the
+	paths.  When the "assume unchanged" bit is on, git stops
+	checking the working tree files for possible
+	modifications, so you need to manually unset the bit to
+	tell git when you change the working tree file. This is
+	sometimes helpful when working with a big project on a
+	filesystem that has very slow lstat(2) system call
+	(e.g. cifs).
+
 --info-only::
 	Do not create objects in the object database for all
 	<file> arguments that follow this flag; just insert
@@ -193,6 +208,37 @@ $ git ls-files -s
 ------------
 
 
+Using "assume unchanged" bit
+----------------------------
+
+Many operations in git depend on your filesystem to have an
+efficient `lstat(2)` implementation, so that `st_mtime`
+information for working tree files can be cheaply checked to see
+if the file contents have changed from the version recorded in
+the index file.  Unfortunately, some filesystems have
+inefficient `lstat(2)`.  If your filesystem is one of them, you
+can set "assume unchanged" bit to paths you have not changed to
+cause git not to do this check.  Note that setting this bit on a
+path does not mean git will check the contents of the file to
+see if it has changed -- it makes git to omit any checking and
+assume it has *not* changed.  When you make changes to working
+tree files, you have to explicitly tell git about it by dropping
+"assume unchanged" bit, either before or after you modify them.
+
+In order to set "assume unchanged" bit, use `--assume-unchanged`
+option.  To unset, use `--no-assume-unchanged`.
+
+The command looks at `core.ignorestat` configuration variable.  When
+this is true, paths updated with `git-update-index paths...` and
+paths updated with other git commands that update both index and
+working tree (e.g. `git-apply --index`, `git-checkout-index -u`,
+and `git-read-tree -u`) are automatically marked as "assume
+unchanged".  Note that "assume unchanged" bit is *not* set if
+`git-update-index --refresh` finds the working tree file matches
+the index (use `git-update-index --really-refresh` if you want
+to mark them as "assume unchanged").
+
+
 Examples
 --------
 To update and refresh only the files already checked out:
@@ -201,6 +247,35 @@ To update and refresh only the files already checked out:
 $ git-checkout-index -n -f -a && git-update-index --ignore-missing --refresh
 ----------------
 
+On an inefficient filesystem with `core.ignorestat` set:
+
+------------
+$ git update-index --really-refresh <1>
+$ git update-index --no-assume-unchanged foo.c <2>
+$ git diff --name-only <3>
+$ edit foo.c
+$ git diff --name-only <4>
+M foo.c
+$ git update-index foo.c <5>
+$ git diff --name-only <6>
+$ edit foo.c
+$ git diff --name-only <7>
+$ git update-index --no-assume-unchanged foo.c <8>
+$ git diff --name-only <9>
+M foo.c
+
+<1> forces lstat(2) to set "assume unchanged" bits for paths
+    that match index.
+<2> mark the path to be edited.
+<3> this does lstat(2) and finds index matches the path.
+<4> this does lstat(2) and finds index does not match the path.
+<5> registering the new version to index sets "assume unchanged" bit.
+<6> and it is assumed unchanged.
+<7> even after you edit it.
+<8> you can tell about the change after the fact.
+<9> now it checks with lstat(2) and finds it has been changed.
+------------
+
 
 Configuration
 -------------
@@ -213,6 +288,9 @@ in the index and the file mode on the filesystem if they differ only on
 executable bit.   On such an unfortunate filesystem, you may
 need to use `git-update-index --chmod=`.
 
+The command looks at `core.ignorestat` configuration variable.  See
+'Using "assume unchanged" bit' section above.
+
 
 See Also
 --------
-- 
cgit v0.10.2-6-g49f6


From 7b80be150ce137a790f498a69a784d61d8fc2e78 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sun, 12 Feb 2006 23:46:25 -0800
Subject: cache_name_compare() compares name and stage, nothing else.

The code was a bit unclear in expressing what it wants to compare.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/read-cache.c b/read-cache.c
index efbb1be..f97f92d 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -246,9 +246,9 @@ int cache_name_compare(const char *name1, int flags1, const char *name2, int fla
 	if (len1 > len2)
 		return 1;
 
-	/* Differences between "assume up-to-date" should not matter. */
-	flags1 &= ~CE_VALID;
-	flags2 &= ~CE_VALID;
+	/* Compare stages  */
+	flags1 &= CE_STAGEMASK;
+	flags2 &= CE_STAGEMASK;
 
 	if (flags1 < flags2)
 		return -1;
-- 
cgit v0.10.2-6-g49f6


From a49dd05fd047f504a74fb053bb8ddbb9e4bd152b Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Wed, 15 Feb 2006 17:34:29 -0800
Subject: pack-objects: reuse data from existing packs.

When generating a new pack, notice if we have already needed
objects in existing packs.  If an object is stored deltified,
and its base object is also what we are going to pack, then
reuse the existing deltified representation unconditionally,
bypassing all the expensive find_deltas() and try_deltas()
calls.

Also, notice if what we are going to write out exactly match
what is already in an existing pack (either deltified or just
compressed).  In such a case, we can just copy it instead of
going through the usual uncompressing & recompressing cycle.

Without this patch, in linux-2.6 repository with about 1500
loose objects and a single mega pack:

    $ git-rev-list --objects v2.6.16-rc3 >RL
    $ wc -l RL
    184141 RL
    $ time git-pack-objects p <RL
    Generating pack...
    Done counting 184141 objects.
    Packing 184141 objects....................
    a1fc7b3e537fcb9b3c46b7505df859f0a11e79d2

    real    12m4.323s
    user    11m2.560s
    sys     0m55.950s

With this patch, the same input:

    $ time ../git.junio/git-pack-objects q <RL
    Generating pack...
    Done counting 184141 objects.
    Packing 184141 objects.....................
    a1fc7b3e537fcb9b3c46b7505df859f0a11e79d2
    Total 184141, written 184141, reused 182441

    real    1m2.608s
    user    0m55.090s
    sys     0m1.830s

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/pack-objects.c b/pack-objects.c
index c5a5e61..70fb2af 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -9,15 +9,31 @@ static const char pack_usage[] = "git-pack-objects [-q] [--non-empty] [--local]
 
 struct object_entry {
 	unsigned char sha1[20];
-	unsigned long size;
-	unsigned long offset;
-	unsigned int depth;
-	unsigned int hash;
+	unsigned long size;	/* uncompressed size */
+	unsigned long offset;	/* offset into the final pack file (nonzero if already written) */
+	unsigned int depth;	/* delta depth */
+	unsigned int hash;	/* name hint hash */
 	enum object_type type;
-	unsigned long delta_size;
-	struct object_entry *delta;
+	unsigned long delta_size;	/* delta data size (uncompressed) */
+	struct object_entry *delta;	/* delta base object */
+	struct packed_git *in_pack; 	/* already in pack */
+	enum object_type in_pack_type;	/* could be delta */
+	unsigned int in_pack_offset;
 };
 
+/*
+ * Objects we are going to pack are colected in objects array (dynamically
+ * expanded).  nr_objects & nr_alloc controls this array.  They are stored
+ * in the order we see -- typically rev-list --objects order that gives us
+ * nice "minimum seek" order.
+ *
+ * sorted-by-sha ans sorted-by-type are arrays of pointers that point at
+ * elements in the objects array.  The former is used to build the pack
+ * index (lists object names in the ascending order to help offset lookup),
+ * and the latter is used to group similar things together by try_delta()
+ * heuristics.
+ */
+
 static unsigned char object_list_sha1[20];
 static int non_empty = 0;
 static int local = 0;
@@ -29,6 +45,135 @@ static const char *base_name;
 static unsigned char pack_file_sha1[20];
 static int progress = 1;
 
+/*
+ * The object names in objects array are hashed with this hashtable,
+ * to help looking up the entry by object name.  Binary search from
+ * sorted_by_sha is also possible but this was easier to code and faster.
+ * This hashtable is built after all the objects are seen.
+ */
+static int *object_ix = NULL;
+static int object_ix_hashsz = 0;
+
+/*
+ * Pack index for existing packs give us easy access to the offsets into
+ * corresponding pack file where each object's data starts, but the entries
+ * do not store the size of the compressed representation (uncompressed
+ * size is easily available by examining the pack entry header).  We build
+ * a hashtable of existing packs (pack_revindex), and keep reverse index
+ * here -- pack index file is sorted by object name mapping to offset; this
+ * pack_revindex[].revindex array is an ordered list of offsets, so if you
+ * know the offset of an object, next offset is where its packed
+ * representation ends.
+ */
+struct pack_revindex {
+	struct packed_git *p;
+	unsigned long *revindex;
+} *pack_revindex = NULL;
+static int pack_revindex_hashsz = 0;
+
+/*
+ * stats
+ */
+static int written = 0;
+static int reused = 0;
+
+static int pack_revindex_ix(struct packed_git *p)
+{
+	unsigned int ui = (unsigned int) p;
+	int i;
+
+	ui = ui ^ (ui >> 16); /* defeat structure alignment */
+	i = (int)(ui % pack_revindex_hashsz);
+	while (pack_revindex[i].p) {
+		if (pack_revindex[i].p == p)
+			return i;
+		if (++i == pack_revindex_hashsz)
+			i = 0;
+	}
+	return -1 - i;
+}
+
+static void prepare_pack_ix(void)
+{
+	int num;
+	struct packed_git *p;
+	for (num = 0, p = packed_git; p; p = p->next)
+		num++;
+	if (!num)
+		return;
+	pack_revindex_hashsz = num * 11;
+	pack_revindex = xcalloc(sizeof(*pack_revindex), pack_revindex_hashsz);
+	for (p = packed_git; p; p = p->next) {
+		num = pack_revindex_ix(p);
+		num = - 1 - num;
+		pack_revindex[num].p = p;
+	}
+	/* revindex elements are lazily initialized */
+}
+
+static int cmp_offset(const void *a_, const void *b_)
+{
+	unsigned long a = *(unsigned long *) a_;
+	unsigned long b = *(unsigned long *) b_;
+	if (a < b)
+		return -1;
+	else if (a == b)
+		return 0;
+	else
+		return 1;
+}
+
+/*
+ * Ordered list of offsets of objects in the pack.
+ */
+static void prepare_pack_revindex(struct pack_revindex *rix)
+{
+	struct packed_git *p = rix->p;
+	int num_ent = num_packed_objects(p);
+	int i;
+	void *index = p->index_base + 256;
+
+	rix->revindex = xmalloc(sizeof(unsigned long) * (num_ent + 1));
+	for (i = 0; i < num_ent; i++) {
+		long hl = *((long *)(index + 24 * i));
+		rix->revindex[i] = ntohl(hl);
+	}
+	/* This knows the pack format -- the 20-byte trailer
+	 * follows immediately after the last object data.
+	 */
+	rix->revindex[num_ent] = p->pack_size - 20;
+	qsort(rix->revindex, num_ent, sizeof(unsigned long), cmp_offset);
+}
+
+static unsigned long find_packed_object_size(struct packed_git *p,
+					     unsigned long ofs)
+{
+	int num;
+	int lo, hi;
+	struct pack_revindex *rix;
+	unsigned long *revindex;
+	num = pack_revindex_ix(p);
+	if (num < 0)
+		die("internal error: pack revindex uninitialized");
+	rix = &pack_revindex[num];
+	if (!rix->revindex)
+		prepare_pack_revindex(rix);
+	revindex = rix->revindex;
+	lo = 0;
+	hi = num_packed_objects(p) + 1;
+	do {
+		int mi = (lo + hi) / 2;
+		if (revindex[mi] == ofs) {
+			return revindex[mi+1] - ofs;
+		}
+		else if (ofs < revindex[mi])
+			hi = mi;
+		else
+			lo = mi + 1;
+	} while (lo < hi);
+	die("internal error: pack revindex corrupt");
+}
+
 static void *delta_against(void *buf, unsigned long size, struct object_entry *entry)
 {
 	unsigned long othersize, delta_size;
@@ -78,35 +223,52 @@ static unsigned long write_object(struct sha1file *f, struct object_entry *entry
 {
 	unsigned long size;
 	char type[10];
-	void *buf = read_sha1_file(entry->sha1, type, &size);
+	void *buf;
 	unsigned char header[10];
 	unsigned hdrlen, datalen;
 	enum object_type obj_type;
 
-	if (!buf)
-		die("unable to read %s", sha1_to_hex(entry->sha1));
-	if (size != entry->size)
-		die("object %s size inconsistency (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
-
-	/*
-	 * The object header is a byte of 'type' followed by zero or
-	 * more bytes of length.  For deltas, the 20 bytes of delta sha1
-	 * follows that.
-	 */
 	obj_type = entry->type;
-	if (entry->delta) {
-		buf = delta_against(buf, size, entry);
-		size = entry->delta_size;
-		obj_type = OBJ_DELTA;
+	if (!entry->in_pack ||
+	    (obj_type != entry->in_pack_type)) {
+		buf = read_sha1_file(entry->sha1, type, &size);
+		if (!buf)
+			die("unable to read %s", sha1_to_hex(entry->sha1));
+		if (size != entry->size)
+			die("object %s size inconsistency (%lu vs %lu)",
+			    sha1_to_hex(entry->sha1), size, entry->size);
+		if (entry->delta) {
+			buf = delta_against(buf, size, entry);
+			size = entry->delta_size;
+			obj_type = OBJ_DELTA;
+		}
+		/*
+		 * The object header is a byte of 'type' followed by zero or
+		 * more bytes of length.  For deltas, the 20 bytes of delta
+		 * sha1 follows that.
+		 */
+		hdrlen = encode_header(obj_type, size, header);
+		sha1write(f, header, hdrlen);
+
+		if (entry->delta) {
+			sha1write(f, entry->delta, 20);
+			hdrlen += 20;
+		}
+		datalen = sha1write_compressed(f, buf, size);
+		free(buf);
 	}
-	hdrlen = encode_header(obj_type, size, header);
-	sha1write(f, header, hdrlen);
-	if (entry->delta) {
-		sha1write(f, entry->delta, 20);
-		hdrlen += 20;
+	else {
+		struct packed_git *p = entry->in_pack;
+		use_packed_git(p);
+
+		datalen = find_packed_object_size(p, entry->in_pack_offset);
+		buf = p->pack_base + entry->in_pack_offset;
+		sha1write(f, buf, datalen);
+		unuse_packed_git(p);
+		hdrlen = 0; /* not really */
+		reused++;
 	}
-	datalen = sha1write_compressed(f, buf, size);
-	free(buf);
+	written++;
 	return hdrlen + datalen;
 }
 
@@ -148,8 +310,6 @@ static void write_pack_file(void)
 		offset = write_one(f, objects + i, offset);
 
 	sha1close(f, pack_file_sha1, 1);
-	mb = offset >> 20;
-	offset &= 0xfffff;
 }
 
 static void write_index_file(void)
@@ -196,18 +356,21 @@ static int add_object_entry(unsigned char *sha1, unsigned int hash)
 {
 	unsigned int idx = nr_objects;
 	struct object_entry *entry;
-
-	if (incremental || local) {
-		struct packed_git *p;
-
-		for (p = packed_git; p; p = p->next) {
-			struct pack_entry e;
-
-			if (find_pack_entry_one(sha1, &e, p)) {
-				if (incremental)
-					return 0;
-				if (local && !p->pack_local)
-					return 0;
+	struct packed_git *p;
+	unsigned int found_offset;
+	struct packed_git *found_pack;
+
+	found_pack = NULL;
+	for (p = packed_git; p; p = p->next) {
+		struct pack_entry e;
+		if (find_pack_entry_one(sha1, &e, p)) {
+			if (incremental)
+				return 0;
+			if (local && !p->pack_local)
+				return 0;
+			if (!found_pack) {
+				found_offset = e.offset;
+				found_pack = e.p;
 			}
 		}
 	}
@@ -221,30 +384,107 @@ static int add_object_entry(unsigned char *sha1, unsigned int hash)
 	memset(entry, 0, sizeof(*entry));
 	memcpy(entry->sha1, sha1, 20);
 	entry->hash = hash;
+	if (found_pack) {
+		entry->in_pack = found_pack;
+		entry->in_pack_offset = found_offset;
+	}
 	nr_objects = idx+1;
 	return 1;
 }
 
+static int locate_object_entry_hash(unsigned char *sha1)
+{
+	int i;
+	unsigned int ui;
+	memcpy(&ui, sha1, sizeof(unsigned int));
+	i = ui % object_ix_hashsz;
+	while (0 < object_ix[i]) {
+		if (!memcmp(sha1, objects[object_ix[i]-1].sha1, 20))
+			return i;
+		if (++i == object_ix_hashsz)
+			i = 0;
+	}
+	return -1 - i;
+}
+
+static struct object_entry *locate_object_entry(unsigned char *sha1)
+{
+	int i = locate_object_entry_hash(sha1);
+	if (0 <= i)
+		return &objects[object_ix[i]-1];
+	return NULL;
+}
+
 static void check_object(struct object_entry *entry)
 {
 	char type[20];
 
-	if (!sha1_object_info(entry->sha1, type, &entry->size)) {
-		if (!strcmp(type, "commit")) {
-			entry->type = OBJ_COMMIT;
-		} else if (!strcmp(type, "tree")) {
-			entry->type = OBJ_TREE;
-		} else if (!strcmp(type, "blob")) {
-			entry->type = OBJ_BLOB;
-		} else if (!strcmp(type, "tag")) {
-			entry->type = OBJ_TAG;
-		} else
-			die("unable to pack object %s of type %s",
-			    sha1_to_hex(entry->sha1), type);
+	if (entry->in_pack) {
+		/* Check if it is delta, and the base is also an object
+		 * we are going to pack.  If so we will reuse the existing
+		 * delta.
+		 */
+		unsigned char base[20];
+		unsigned long size;
+		struct object_entry *base_entry;
+		if (!check_reuse_pack_delta(entry->in_pack,
+					    entry->in_pack_offset,
+					    base, &size,
+					    &entry->in_pack_type) &&
+		    (base_entry = locate_object_entry(base))) {
+			/* We do not know depth at this point, but it
+			 * does not matter.  Getting delta_chain_length
+			 * with packed_object_info_detail() is not so
+			 * expensive, so we could do that later if we
+			 * wanted to.  Calling sha1_object_info to get
+			 * the true size (and later an uncompressed
+			 * representation) of deeply deltified object
+			 * is quite expensive.
+			 */
+			entry->depth = 1;
+			/* uncompressed size */
+			entry->size = entry->delta_size = size;
+			entry->delta = base_entry;
+			entry->type = OBJ_DELTA;
+			return;
+		}
+		/* Otherwise we would do the usual */
 	}
-	else
+
+	if (sha1_object_info(entry->sha1, type, &entry->size))
 		die("unable to get type of object %s",
 		    sha1_to_hex(entry->sha1));
+
+	if (!strcmp(type, "commit")) {
+		entry->type = OBJ_COMMIT;
+	} else if (!strcmp(type, "tree")) {
+		entry->type = OBJ_TREE;
+	} else if (!strcmp(type, "blob")) {
+		entry->type = OBJ_BLOB;
+	} else if (!strcmp(type, "tag")) {
+		entry->type = OBJ_TAG;
+	} else
+		die("unable to pack object %s of type %s",
+		    sha1_to_hex(entry->sha1), type);
+}
+
+static void hash_objects(void)
+{
+	int i;
+	struct object_entry *oe;
+
+	object_ix_hashsz = nr_objects * 2;
+	object_ix = xcalloc(sizeof(int), object_ix_hashsz);
+	for (i = 0, oe = objects; i < nr_objects; i++, oe++) {
+		int ix = locate_object_entry_hash(oe->sha1);
+		if (0 <= ix) {
+			error("the same object '%s' added twice",
+			      sha1_to_hex(oe->sha1));
+			continue;
+		}
+		ix = -1 - ix;
+		object_ix[ix] = i + 1;
+	}
 }
 
 static void get_object_details(void)
@@ -252,6 +492,8 @@ static void get_object_details(void)
 	int i;
 	struct object_entry *entry = objects;
 
+	hash_objects();
+	prepare_pack_ix();
 	for (i = 0; i < nr_objects; i++)
 		check_object(entry++);
 }
@@ -382,6 +624,13 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 			eye_candy -= nr_objects / 20;
 			fputc('.', stderr);
 		}
+
+		if (entry->delta)
+			/* This happens if we decided to reuse existing
+			 * delta from a pack.
+			 */
+			continue;
+
 		free(n->data);
 		n->entry = entry;
 		n->data = read_sha1_file(entry->sha1, type, &size);
@@ -411,10 +660,12 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 
 static void prepare_pack(int window, int depth)
 {
-	get_object_details();
-
 	if (progress)
 		fprintf(stderr, "Packing %d objects", nr_objects);
+	get_object_details();
+	if (progress)
+		fprintf(stderr, ".");
+
 	sorted_by_type = create_sorted_list(type_size_sort);
 	if (window && depth)
 		find_deltas(sorted_by_type, window+1, depth);
@@ -599,5 +850,7 @@ int main(int argc, char **argv)
 			puts(sha1_to_hex(object_list_sha1));
 		}
 	}
+	fprintf(stderr, "Total %d, written %d, reused %d\n",
+		nr_objects, written, reused);
 	return 0;
 }
diff --git a/pack.h b/pack.h
index 9dafa2b..694e0c5 100644
--- a/pack.h
+++ b/pack.h
@@ -29,5 +29,7 @@ struct pack_header {
 };
 
 extern int verify_pack(struct packed_git *, int);
-
+extern int check_reuse_pack_delta(struct packed_git *, unsigned long,
+				  unsigned char *, unsigned long *,
+				  enum object_type *);
 #endif
diff --git a/sha1_file.c b/sha1_file.c
index 64cf245..0a3a721 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -826,6 +826,25 @@ static unsigned long unpack_object_header(struct packed_git *p, unsigned long of
 	return offset;
 }
 
+int check_reuse_pack_delta(struct packed_git *p, unsigned long offset,
+			   unsigned char *base, unsigned long *sizep,
+			   enum object_type *kindp)
+{
+	unsigned long ptr;
+	int status = -1;
+
+	use_packed_git(p);
+	ptr = offset;
+	ptr = unpack_object_header(p, ptr, kindp, sizep);
+	if (*kindp != OBJ_DELTA)
+		goto done;
+	memcpy(base, p->pack_base + ptr, 20);
+	status = 0;
+ done:
+	unuse_packed_git(p);
+	return status;
+}
+
 void packed_object_info_detail(struct pack_entry *e,
 			       char *type,
 			       unsigned long *size,
-- 
cgit v0.10.2-6-g49f6


From ca5381d43e8595f592d8d7ecfc9bb0bfa5e52f6d Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Thu, 16 Feb 2006 11:55:51 -0800
Subject: pack-objects: finishing touches.

This introduces --no-reuse-delta option to disable reusing of
existing delta, which is a large part of the optimization
introduced by this series.  This may become necessary if
repeated repacking makes delta chain too long.  With this, the
output of the command becomes identical to that of the older
implementation.  But the performance suffers greatly.

It still allows reusing non-deltified representations; there is
no point uncompressing and recompressing the whole text.

It also adds a couple more statistics output, while squelching
it under -q flag, which the last round forgot to do.

  $ time old-git-pack-objects --stdout >/dev/null <RL
  Generating pack...
  Done counting 184141 objects.
  Packing 184141 objects....................
  real    12m8.530s       user    11m1.450s       sys     0m57.920s
  $ time git-pack-objects --stdout >/dev/null <RL
  Generating pack...
  Done counting 184141 objects.
  Packing 184141 objects.....................
  Total 184141, written 184141 (delta 138297), reused 178833 (delta 134081)
  real    0m59.549s       user    0m56.670s       sys     0m2.400s
  $ time git-pack-objects --stdout --no-reuse-delta >/dev/null <RL
  Generating pack...
  Done counting 184141 objects.
  Packing 184141 objects.....................
  Total 184141, written 184141 (delta 134833), reused 47904 (delta 0)
  real    11m13.830s      user    9m45.240s       sys     0m44.330s

There is one remaining issue when --no-reuse-delta option is not
used.  It can create delta chains that are deeper than specified.

    A<--B<--C<--D   E   F   G

Suppose we have a delta chain A to D (A is stored in full either
in a pack or as a loose object. B is depth1 delta relative to A,
C is depth2 delta relative to B...) with loose objects E, F, G.
And we are going to pack all of them.

B, C and D are left as delta against A, B and C respectively.
So A, E, F, and G are examined for deltification, and let's say
we decided to keep E expanded, and store the rest as deltas like
this:

    E<--F<--G<--A

Oops.  We ended up making D a bit too deep, didn't we?  B, C and
D form a chain on top of A!

This is because we did not know what the final depth of A would
be, when we checked objects and decided to keep the existing
delta.  Unfortunately, deferring the decision until just before
the deltification is not an option.  To be able to make B, C,
and D candidates for deltification with the rest, we need to
know the type and final unexpanded size of them, but the major
part of the optimization comes from the fact that we do not read
the delta data to do so -- getting the final size is quite an
expensive operation.

To prevent this from happening, we should keep A from being
deltified.  But how would we tell that, cheaply?

To do this most precisely, after check_object() runs, each
object that is used as the base object of some existing delta
needs to be marked with the maximum depth of the objects we
decided to keep deltified (in this case, D is depth 3 relative
to A, so if no other delta chain that is longer than 3 based on
A exists, mark A with 3).  Then when attempting to deltify A, we
would take that number into account to see if the final delta
chain that leads to D becomes too deep.

However, this is a bit cumbersome to compute, so we would cheat
and reduce the maximum depth for A arbitrarily to depth/4 in
this implementation.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt
index 2d67d39..4cb2e83 100644
--- a/Documentation/git-pack-objects.txt
+++ b/Documentation/git-pack-objects.txt
@@ -8,7 +8,10 @@ git-pack-objects - Create a packed archive of objects.
 
 SYNOPSIS
 --------
-'git-pack-objects' [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list
+[verse]
+'git-pack-objects' [-q] [--no-reuse-delta] [--non-empty]
+	[--local] [--incremental] [--window=N] [--depth=N]
+	{--stdout | base-name} < object-list
 
 
 DESCRIPTION
@@ -32,6 +35,10 @@ Placing both in the pack/ subdirectory of $GIT_OBJECT_DIRECTORY (or
 any of the directories on $GIT_ALTERNATE_OBJECT_DIRECTORIES)
 enables git to read from such an archive.
 
+In a packed archive, an object is either stored as a compressed
+whole, or as a difference from some other object.  The latter is
+often called a delta.
+
 
 OPTIONS
 -------
@@ -74,6 +81,18 @@ base-name::
         Only create a packed archive if it would contain at
         least one object.
 
+-q::
+	This flag makes the command not to report its progress
+	on the standard error stream.
+
+--no-reuse-delta::
+	When creating a packed archive in a repository that
+	has existing packs, the command reuses existing deltas.
+	This sometimes results in a slightly suboptimal pack.
+	This flag tells the command not to reuse existing deltas
+	but compute them from scratch.
+
+
 Author
 ------
 Written by Linus Torvalds <torvalds@osdl.org>
diff --git a/pack-objects.c b/pack-objects.c
index 70fb2af..38e1c99 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -5,7 +5,7 @@
 #include "csum-file.h"
 #include <sys/time.h>
 
-static const char pack_usage[] = "git-pack-objects [-q] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list";
+static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list";
 
 struct object_entry {
 	unsigned char sha1[20];
@@ -14,10 +14,11 @@ struct object_entry {
 	unsigned int depth;	/* delta depth */
 	unsigned int hash;	/* name hint hash */
 	enum object_type type;
+	unsigned char edge;	/* reused delta chain points at this entry. */
+	enum object_type in_pack_type;	/* could be delta */
 	unsigned long delta_size;	/* delta data size (uncompressed) */
 	struct object_entry *delta;	/* delta base object */
 	struct packed_git *in_pack; 	/* already in pack */
-	enum object_type in_pack_type;	/* could be delta */
 	unsigned int in_pack_offset;
 };
 
@@ -36,6 +37,7 @@ struct object_entry {
 
 static unsigned char object_list_sha1[20];
 static int non_empty = 0;
+static int no_reuse_delta = 0;
 static int local = 0;
 static int incremental = 0;
 static struct object_entry **sorted_by_sha, **sorted_by_type;
@@ -75,7 +77,9 @@ static int pack_revindex_hashsz = 0;
  * stats
  */
 static int written = 0;
+static int written_delta = 0;
 static int reused = 0;
+static int reused_delta = 0;
 
 static int pack_revindex_ix(struct packed_git *p)
 {
@@ -227,10 +231,23 @@ static unsigned long write_object(struct sha1file *f, struct object_entry *entry
 	unsigned char header[10];
 	unsigned hdrlen, datalen;
 	enum object_type obj_type;
+	int to_reuse = 0;
 
 	obj_type = entry->type;
-	if (!entry->in_pack ||
-	    (obj_type != entry->in_pack_type)) {
+	if (! entry->in_pack)
+		to_reuse = 0;	/* can't reuse what we don't have */
+	else if (obj_type == OBJ_DELTA)
+		to_reuse = 1;	/* check_object() decided it for us */
+	else if (obj_type != entry->in_pack_type)
+		to_reuse = 0;	/* pack has delta which is unusable */
+	else if (entry->delta)
+		to_reuse = 0;	/* we want to pack afresh */
+	else
+		to_reuse = 1;	/* we have it in-pack undeltified,
+				 * and we do not need to deltify it.
+				 */
+
+	if (! to_reuse) {
 		buf = read_sha1_file(entry->sha1, type, &size);
 		if (!buf)
 			die("unable to read %s", sha1_to_hex(entry->sha1));
@@ -266,8 +283,12 @@ static unsigned long write_object(struct sha1file *f, struct object_entry *entry
 		sha1write(f, buf, datalen);
 		unuse_packed_git(p);
 		hdrlen = 0; /* not really */
+		if (obj_type == OBJ_DELTA)
+			reused_delta++;
 		reused++;
 	}
+	if (obj_type == OBJ_DELTA)
+		written_delta++;
 	written++;
 	return hdrlen + datalen;
 }
@@ -294,7 +315,6 @@ static void write_pack_file(void)
 	int i;
 	struct sha1file *f;
 	unsigned long offset;
-	unsigned long mb;
 	struct pack_header hdr;
 
 	if (!base_name)
@@ -357,10 +377,9 @@ static int add_object_entry(unsigned char *sha1, unsigned int hash)
 	unsigned int idx = nr_objects;
 	struct object_entry *entry;
 	struct packed_git *p;
-	unsigned int found_offset;
-	struct packed_git *found_pack;
+	unsigned int found_offset = 0;
+	struct packed_git *found_pack = NULL;
 
-	found_pack = NULL;
 	for (p = packed_git; p; p = p->next) {
 		struct pack_entry e;
 		if (find_pack_entry_one(sha1, &e, p)) {
@@ -420,32 +439,39 @@ static void check_object(struct object_entry *entry)
 	char type[20];
 
 	if (entry->in_pack) {
+		unsigned char base[20];
+		unsigned long size;
+		struct object_entry *base_entry;
+
+		/* We want in_pack_type even if we do not reuse delta.
+		 * There is no point not reusing non-delta representations.
+		 */
+		check_reuse_pack_delta(entry->in_pack,
+				       entry->in_pack_offset,
+				       base, &size,
+				       &entry->in_pack_type);
+
 		/* Check if it is delta, and the base is also an object
 		 * we are going to pack.  If so we will reuse the existing
 		 * delta.
 		 */
-		unsigned char base[20];
-		unsigned long size;
-		struct object_entry *base_entry;
-		if (!check_reuse_pack_delta(entry->in_pack,
-					    entry->in_pack_offset,
-					    base, &size,
-					    &entry->in_pack_type) &&
+		if (!no_reuse_delta &&
+		    entry->in_pack_type == OBJ_DELTA &&
 		    (base_entry = locate_object_entry(base))) {
-			/* We do not know depth at this point, but it
-			 * does not matter.  Getting delta_chain_length
-			 * with packed_object_info_detail() is not so
-			 * expensive, so we could do that later if we
-			 * wanted to.  Calling sha1_object_info to get
-			 * the true size (and later an uncompressed
-			 * representation) of deeply deltified object
-			 * is quite expensive.
+
+			/* Depth value does not matter - find_deltas()
+			 * will never consider reused delta as the
+			 * base object to deltify other objects
+			 * against, in order to avoid circular deltas.
 			 */
-			entry->depth = 1;
-			/* uncompressed size */
+
+			/* uncompressed size of the delta data */
 			entry->size = entry->delta_size = size;
 			entry->delta = base_entry;
 			entry->type = OBJ_DELTA;
+
+			base_entry->edge = 1;
+
 			return;
 		}
 		/* Otherwise we would do the usual */
@@ -568,6 +594,13 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
 	if (cur_entry->type != old_entry->type)
 		return -1;
 
+	/* If the current object is at edge, take the depth the objects
+	 * that depend on the current object into account -- otherwise
+	 * they would become too deep.
+	 */
+	if (cur_entry->edge)
+		max_depth /= 4;
+
 	size = cur_entry->size;
 	if (size < 50)
 		return -1;
@@ -627,7 +660,7 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 
 		if (entry->delta)
 			/* This happens if we decided to reuse existing
-			 * delta from a pack.
+			 * delta from a pack.  "!no_reuse_delta &&" is implied.
 			 */
 			continue;
 
@@ -636,6 +669,7 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 		n->data = read_sha1_file(entry->sha1, type, &size);
 		if (size != entry->size)
 			die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
+
 		j = window;
 		while (--j > 0) {
 			unsigned int other_idx = idx + j;
@@ -664,7 +698,7 @@ static void prepare_pack(int window, int depth)
 		fprintf(stderr, "Packing %d objects", nr_objects);
 	get_object_details();
 	if (progress)
-		fprintf(stderr, ".");
+		fputc('.', stderr);
 
 	sorted_by_type = create_sorted_list(type_size_sort);
 	if (window && depth)
@@ -694,8 +728,9 @@ static int reuse_cached_pack(unsigned char *sha1, int pack_to_stdout)
 		}
 	}
 
-	fprintf(stderr, "Reusing %d objects pack %s\n", nr_objects,
-		sha1_to_hex(sha1));
+	if (progress)
+		fprintf(stderr, "Reusing %d objects pack %s\n", nr_objects,
+			sha1_to_hex(sha1));
 
 	if (pack_to_stdout) {
 		if (copy_fd(ifd, 1))
@@ -775,6 +810,10 @@ int main(int argc, char **argv)
 				progress = 0;
 				continue;
 			}
+			if (!strcmp("--no-reuse-delta", arg)) {
+				no_reuse_delta = 1;
+				continue;
+			}
 			if (!strcmp("--stdout", arg)) {
 				pack_to_stdout = 1;
 				continue;
@@ -850,7 +889,8 @@ int main(int argc, char **argv)
 			puts(sha1_to_hex(object_list_sha1));
 		}
 	}
-	fprintf(stderr, "Total %d, written %d, reused %d\n",
-		nr_objects, written, reused);
+	if (progress)
+		fprintf(stderr, "Total %d, written %d (delta %d), reused %d (delta %d)\n",
+			nr_objects, written, written_delta, reused, reused_delta);
 	return 0;
 }
-- 
cgit v0.10.2-6-g49f6


From cec2be76d9789b11c7f955b62dcad4b1050202af Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Thu, 16 Feb 2006 11:57:18 -0800
Subject: git-repack: allow passing a couple of flags to pack-objects.

A new flag -q makes underlying pack-objects less chatty.
A new flag -f forces delta to be recomputed from scratch.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt
index 9060fe8..6c0f792 100644
--- a/Documentation/git-repack.txt
+++ b/Documentation/git-repack.txt
@@ -9,7 +9,7 @@ objects into pack files.
 
 SYNOPSIS
 --------
-'git-repack' [-a] [-d] [-l] [-n]
+'git-repack' [-a] [-d] [-f] [-l] [-n] [-q]
 
 DESCRIPTION
 -----------
@@ -43,6 +43,14 @@ OPTIONS
         Pass the `--local` option to `git pack-objects`, see
         gitlink:git-pack-objects[1].
 
+-f::
+        Pass the `--no-reuse-delta` option to `git pack-objects`, see
+        gitlink:git-pack-objects[1].
+
+-q::
+        Pass the `-q` option to `git pack-objects`, see
+        gitlink:git-pack-objects[1].
+
 -n::
         Do not update the server information with
         `git update-server-info`.
diff --git a/git-repack.sh b/git-repack.sh
index 1fafb6e..3d6fec1 100755
--- a/git-repack.sh
+++ b/git-repack.sh
@@ -3,17 +3,20 @@
 # Copyright (c) 2005 Linus Torvalds
 #
 
-USAGE='[-a] [-d] [-l] [-n]'
+USAGE='[-a] [-d] [-f] [-l] [-n] [-q]'
 . git-sh-setup
 	
-no_update_info= all_into_one= remove_redundant= local=
+no_update_info= all_into_one= remove_redundant=
+local= quiet= no_reuse_delta=
 while case "$#" in 0) break ;; esac
 do
 	case "$1" in
 	-n)	no_update_info=t ;;
 	-a)	all_into_one=t ;;
 	-d)	remove_redundant=t ;;
-	-l)	local=t ;;
+	-q)	quiet=-q ;;
+	-f)	no_reuse_delta=--no-reuse-delta ;;
+	-l)	local=--local ;;
 	*)	usage ;;
 	esac
 	shift
@@ -39,9 +42,7 @@ case ",$all_into_one," in
 	    find . -type f \( -name '*.pack' -o -name '*.idx' \) -print`
 	;;
 esac
-if [ "$local" ]; then
-	pack_objects="$pack_objects --local"
-fi
+pack_objects="$pack_objects $local $quiet $no_reuse_delta"
 name=$(git-rev-list --objects $rev_list $(git-rev-parse $rev_parse) 2>&1 |
 	git-pack-objects --non-empty $pack_objects .tmp-pack) ||
 	exit 1
-- 
cgit v0.10.2-6-g49f6


From e4c9327a77bd59e85d4b17a612e78977d68773ee Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Fri, 17 Feb 2006 20:58:45 -0800
Subject: pack-objects: avoid delta chains that are too long.

This tries to rework the solution for the excess delta chain
problem. An earlier commit worked it around ``cheaply'', but
repeated repacking risks unbound growth of delta chains.

This version counts the length of delta chain we are reusing
from the existing pack, and makes sure a base object that has
sufficiently long delta chain does not get deltified.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/pack-objects.c b/pack-objects.c
index 38e1c99..0c9f4c9 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -10,16 +10,22 @@ static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--no
 struct object_entry {
 	unsigned char sha1[20];
 	unsigned long size;	/* uncompressed size */
-	unsigned long offset;	/* offset into the final pack file (nonzero if already written) */
+	unsigned long offset;	/* offset into the final pack file;
+				 * nonzero if already written.
+				 */
 	unsigned int depth;	/* delta depth */
+	unsigned int delta_limit;	/* base adjustment for in-pack delta */
 	unsigned int hash;	/* name hint hash */
 	enum object_type type;
-	unsigned char edge;	/* reused delta chain points at this entry. */
 	enum object_type in_pack_type;	/* could be delta */
 	unsigned long delta_size;	/* delta data size (uncompressed) */
 	struct object_entry *delta;	/* delta base object */
 	struct packed_git *in_pack; 	/* already in pack */
 	unsigned int in_pack_offset;
+	struct object_entry *delta_child; /* delitified objects who bases me */
+	struct object_entry *delta_sibling; /* other deltified objects who
+					     * uses the same base as me
+					     */
 };
 
 /*
@@ -470,7 +476,8 @@ static void check_object(struct object_entry *entry)
 			entry->delta = base_entry;
 			entry->type = OBJ_DELTA;
 
-			base_entry->edge = 1;
+			entry->delta_sibling = base_entry->delta_child;
+			base_entry->delta_child = entry;
 
 			return;
 		}
@@ -513,15 +520,32 @@ static void hash_objects(void)
 	}
 }
 
+static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
+{
+	struct object_entry *child = me->delta_child;
+	unsigned int m = n;
+	while (child) {
+		unsigned int c = check_delta_limit(child, n + 1);
+		if (m < c)
+			m = c;
+		child = child->delta_sibling;
+	}
+	return m;
+}
+
 static void get_object_details(void)
 {
 	int i;
-	struct object_entry *entry = objects;
+	struct object_entry *entry;
 
 	hash_objects();
 	prepare_pack_ix();
-	for (i = 0; i < nr_objects; i++)
-		check_object(entry++);
+	for (i = 0, entry = objects; i < nr_objects; i++, entry++)
+		check_object(entry);
+	for (i = 0, entry = objects; i < nr_objects; i++, entry++)
+		if (!entry->delta && entry->delta_child)
+			entry->delta_limit =
+				check_delta_limit(entry, 1);
 }
 
 typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *);
@@ -598,8 +622,11 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
 	 * that depend on the current object into account -- otherwise
 	 * they would become too deep.
 	 */
-	if (cur_entry->edge)
-		max_depth /= 4;
+	if (cur_entry->delta_child) {
+		if (max_depth <= cur_entry->delta_limit)
+			return 0;
+		max_depth -= cur_entry->delta_limit;
+	}
 
 	size = cur_entry->size;
 	if (size < 50)
-- 
cgit v0.10.2-6-g49f6


From 39c015c556f285106931e0500f301de462b0e46e Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Date: Sat, 18 Feb 2006 12:40:22 +0100
Subject: Fixes for ancient versions of GNU make

Some versions of GNU make do not understand $(call), and have problems to
interpret rules like this:

some_target: CFLAGS += -Dsome=defs

[jc: simplified substitution a bit. ]

Signed-off-by: Johannes E. Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/Makefile b/Makefile
index c8ee4a4..b43d66f 100644
--- a/Makefile
+++ b/Makefile
@@ -204,12 +204,6 @@ LIB_OBJS = \
 LIBS = $(LIB_FILE)
 LIBS += -lz
 
-# Shell quote;
-# Result of this needs to be placed inside ''
-shq = $(subst ','\'',$(1))
-# This has surrounding ''
-shellquote = '$(call shq,$(1))'
-
 #
 # Platform specific tweaks
 #
@@ -422,7 +416,21 @@ ifdef NO_ACCURATE_DIFF
 	ALL_CFLAGS += -DNO_ACCURATE_DIFF
 endif
 
-ALL_CFLAGS += -DSHA1_HEADER=$(call shellquote,$(SHA1_HEADER)) $(COMPAT_CFLAGS)
+# Shell quote (do not use $(call) to accomodate ancient setups);
+
+SHA1_HEADER_SQ = $(subst ','\'',$(SHA1_HEADER))
+
+DESTDIR_SQ = $(subst ','\'',$(DESTDIR))
+bindir_SQ = $(subst ','\'',$(bindir))
+gitexecdir_SQ = $(subst ','\'',$(gitexecdir))
+template_dir_SQ = $(subst ','\'',$(template_dir))
+
+SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
+PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH))
+PYTHON_PATH_SQ = $(subst ','\'',$(PYTHON_PATH))
+GIT_PYTHON_DIR_SQ = $(subst ','\'',$(GIT_PYTHON_DIR))
+
+ALL_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' $(COMPAT_CFLAGS)
 LIB_OBJS += $(COMPAT_OBJS)
 export prefix TAR INSTALL DESTDIR SHELL_PATH template_dir
 ### Build rules
@@ -441,7 +449,7 @@ git$X: git.c $(LIB_FILE)
 
 $(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh
 	rm -f $@
-	sed -e '1s|#!.*/sh|#!$(call shq,$(SHELL_PATH))|' \
+	sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \
 	    -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' \
 	    -e 's/@@NO_CURL@@/$(NO_CURL)/g' \
 	    -e 's/@@NO_PYTHON@@/$(NO_PYTHON)/g' \
@@ -450,15 +458,15 @@ $(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh
 
 $(patsubst %.perl,%,$(SCRIPT_PERL)) : % : %.perl
 	rm -f $@
-	sed -e '1s|#!.*perl|#!$(call shq,$(PERL_PATH))|' \
+	sed -e '1s|#!.*perl|#!$(PERL_PATH_SQ)|' \
 	    -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' \
 	    $@.perl >$@
 	chmod +x $@
 
 $(patsubst %.py,%,$(SCRIPT_PYTHON)) : % : %.py
 	rm -f $@
-	sed -e '1s|#!.*python|#!$(call shq,$(PYTHON_PATH))|' \
-	    -e 's|@@GIT_PYTHON_PATH@@|$(call shq,$(GIT_PYTHON_DIR))|g' \
+	sed -e '1s|#!.*python|#!$(PYTHON_PATH_SQ)|' \
+	    -e 's|@@GIT_PYTHON_PATH@@|$(GIT_PYTHON_DIR_SQ)|g' \
 	    -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' \
 	    $@.py >$@
 	chmod +x $@
@@ -484,32 +492,42 @@ git$X git.spec \
 %.o: %.S
 	$(CC) -o $*.o -c $(ALL_CFLAGS) $<
 
-exec_cmd.o: ALL_CFLAGS += -DGIT_EXEC_PATH=\"$(gitexecdir)\"
+exec_cmd.o: exec_cmd.c
+	$(CC) -o $*.o -c $(ALL_CFLAGS) '-DGIT_EXEC_PATH="$(gitexecdir_SQ)"' $<
 
 git-%$X: %.o $(LIB_FILE)
 	$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
 
-git-mailinfo$X : SIMPLE_LIB += $(LIB_4_ICONV)
 $(SIMPLE_PROGRAMS) : $(LIB_FILE)
 $(SIMPLE_PROGRAMS) : git-%$X : %.o
 	$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
 		$(LIB_FILE) $(SIMPLE_LIB)
 
-git-http-fetch$X: fetch.o http.o
-git-http-push$X: http.o
+git-mailinfo$X: mailinfo.o $(LIB_FILE)
+	$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
+		$(LIB_FILE) $(SIMPLE_LIB) $(LIB_4_ICONV)
+
 git-local-fetch$X: fetch.o
 git-ssh-fetch$X: rsh.o fetch.o
 git-ssh-upload$X: rsh.o
 git-ssh-pull$X: rsh.o fetch.o
 git-ssh-push$X: rsh.o
 
-git-http-fetch$X: LIBS += $(CURL_LIBCURL)
-git-http-push$X: LIBS += $(CURL_LIBCURL) $(EXPAT_LIBEXPAT)
-git-rev-list$X: LIBS += $(OPENSSL_LIBSSL)
+git-http-fetch$X: fetch.o http.o http-fetch.o
+	$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
+		$(LIBS) $(CURL_LIBCURL)
+
+git-http-push$X: http.o http-push.o
+	$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
+		$(LIBS) $(CURL_LIBCURL) $(EXPAT_LIBEXPAT)
+
+git-rev-list$X: rev-list.o
+	$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
+		$(LIBS) $(OPENSSL_LIBSSL)
 
 init-db.o: init-db.c
 	$(CC) -c $(ALL_CFLAGS) \
-		-DDEFAULT_GIT_TEMPLATE_DIR=$(call shellquote,"$(template_dir)") $*.c
+		-DDEFAULT_GIT_TEMPLATE_DIR='"$(template_dir_SQ)"' $*.c
 
 $(LIB_OBJS): $(LIB_H)
 $(patsubst git-%$X,%.o,$(PROGRAMS)): $(LIB_H)
@@ -547,13 +565,13 @@ check:
 ### Installation rules
 
 install: all
-	$(INSTALL) -d -m755 $(call shellquote,$(DESTDIR)$(bindir))
-	$(INSTALL) -d -m755 $(call shellquote,$(DESTDIR)$(gitexecdir))
-	$(INSTALL) $(ALL_PROGRAMS) $(call shellquote,$(DESTDIR)$(gitexecdir))
-	$(INSTALL) git$X gitk $(call shellquote,$(DESTDIR)$(bindir))
+	$(INSTALL) -d -m755 '$(DESTDIR_SQ)$(bindir_SQ)'
+	$(INSTALL) -d -m755 '$(DESTDIR_SQ)$(gitexecdir_SQ)'
+	$(INSTALL) $(ALL_PROGRAMS) '$(DESTDIR_SQ)$(gitexecdir_SQ)'
+	$(INSTALL) git$X gitk '$(DESTDIR_SQ)$(bindir_SQ)'
 	$(MAKE) -C templates install
-	$(INSTALL) -d -m755 $(call shellquote,$(DESTDIR)$(GIT_PYTHON_DIR))
-	$(INSTALL) $(PYMODULES) $(call shellquote,$(DESTDIR)$(GIT_PYTHON_DIR))
+	$(INSTALL) -d -m755 '$(DESTDIR_SQ)$(GIT_PYTHON_DIR_SQ)'
+	$(INSTALL) $(PYMODULES) '$(DESTDIR_SQ)$(GIT_PYTHON_DIR_SQ)'
 
 install-doc:
 	$(MAKE) -C Documentation install
diff --git a/t/Makefile b/t/Makefile
index ba6ddbe..fe65f53 100644
--- a/t/Makefile
+++ b/t/Makefile
@@ -8,10 +8,7 @@ SHELL_PATH ?= $(SHELL)
 TAR ?= $(TAR)
 
 # Shell quote;
-# Result of this needs to be placed inside ''
-shq = $(subst ','\'',$(1))
-# This has surrounding ''
-shellquote = '$(call shq,$(1))'
+SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
 
 T = $(wildcard t[0-9][0-9][0-9][0-9]-*.sh)
 
@@ -22,7 +19,7 @@ endif
 all: $(T) clean
 
 $(T):
-	@echo "*** $@ ***"; $(call shellquote,$(SHELL_PATH)) $@ $(GIT_TEST_OPTS)
+	@echo "*** $@ ***"; '$(SHELL_PATH_SQ)' $@ $(GIT_TEST_OPTS)
 
 clean:
 	rm -fr trash
-- 
cgit v0.10.2-6-g49f6


From 2a3763ef3d26eb38c0a47997b8e5fd2a7c5214cc Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Date: Sun, 19 Feb 2006 21:16:08 +0100
Subject: avoid makefile override warning

Signed-off-by: Johannes E. Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/Makefile b/Makefile
index b43d66f..080651e 100644
--- a/Makefile
+++ b/Makefile
@@ -136,7 +136,7 @@ SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) \
 
 # The ones that do not have to link with lcrypto nor lz.
 SIMPLE_PROGRAMS = \
-	git-get-tar-commit-id$X git-mailinfo$X git-mailsplit$X \
+	git-get-tar-commit-id$X git-mailsplit$X \
 	git-stripspace$X git-daemon$X
 
 # ... and all the rest that could be moved out of bindir to gitexecdir
@@ -146,8 +146,8 @@ PROGRAMS = \
 	git-convert-objects$X git-diff-files$X \
 	git-diff-index$X git-diff-stages$X \
 	git-diff-tree$X git-fetch-pack$X git-fsck-objects$X \
-	git-hash-object$X git-index-pack$X git-init-db$X \
-	git-local-fetch$X git-ls-files$X git-ls-tree$X git-merge-base$X \
+	git-hash-object$X git-index-pack$X git-init-db$X git-local-fetch$X \
+	git-ls-files$X git-ls-tree$X git-mailinfo$X git-merge-base$X \
 	git-merge-index$X git-mktag$X git-pack-objects$X git-patch-id$X \
 	git-peek-remote$X git-prune-packed$X git-read-tree$X \
 	git-receive-pack$X git-rev-list$X git-rev-parse$X \
-- 
cgit v0.10.2-6-g49f6


From a348ab702a84983c258e4961a58b1b9502f428c8 Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Date: Sun, 19 Feb 2006 21:13:48 +0100
Subject: Really honour NO_PYTHON

Do not even test for subprocess (trying to execute python).

Signed-off-by: Johannes E. Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/Makefile b/Makefile
index 080651e..d5a95c4 100644
--- a/Makefile
+++ b/Makefile
@@ -291,8 +291,10 @@ endif
 ifdef WITH_OWN_SUBPROCESS_PY
 	PYMODULES += compat/subprocess.py
 else
-	ifneq ($(shell $(PYTHON_PATH) -c 'import subprocess;print"OK"' 2>/dev/null),OK)
-		PYMODULES += compat/subprocess.py
+	ifeq ($(NO_PYTHON),)
+		ifneq ($(shell $(PYTHON_PATH) -c 'import subprocess;print"OK"' 2>/dev/null),OK)
+			PYMODULES += compat/subprocess.py
+		endif
 	endif
 endif
 
-- 
cgit v0.10.2-6-g49f6


From c649657501bada28794a30102d9c13cc28ca0e5e Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sun, 19 Feb 2006 03:32:31 -0800
Subject: rev-list --objects-edge

This new flag is similar to --objects, but causes rev-list to
show list of "uninteresting" commits that appear on the edge
commit prefixed with '-'.

Downstream pack-objects will be changed to take these as hints
to use the trees and blobs contained with them as base objects
of resulting pack, producing an incomplete (not self-contained)
pack.

Such a pack cannot be used in .git/objects/pack (it is prevented
by git-index-pack erroring out if it is fed to git-fetch-pack -k
or git-clone-pack), but would be useful when transferring only
small changes to huge blobs.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/rev-list.c b/rev-list.c
index f2d1105..373549e 100644
--- a/rev-list.c
+++ b/rev-list.c
@@ -30,7 +30,7 @@ static const char rev_list_usage[] =
 "    --date-order\n"
 "  formatting output:\n"
 "    --parents\n"
-"    --objects\n"
+"    --objects | --objects-edge\n"
 "    --unpacked\n"
 "    --header | --pretty\n"
 "    --abbrev=nr | --no-abbrev\n"
@@ -44,6 +44,7 @@ static int bisect_list = 0;
 static int tag_objects = 0;
 static int tree_objects = 0;
 static int blob_objects = 0;
+static int edge_hint = 0;
 static int verbose_header = 0;
 static int abbrev = DEFAULT_ABBREV;
 static int show_parents = 0;
@@ -430,16 +431,30 @@ static struct commit_list *find_bisection(struct commit_list *list)
 	return best;
 }
 
+static void mark_edge_parents_uninteresting(struct commit *commit)
+{
+	struct commit_list *parents;
+
+	for (parents = commit->parents; parents; parents = parents->next) {
+		struct commit *parent = parents->item;
+		if (!(parent->object.flags & UNINTERESTING))
+			continue;
+		mark_tree_uninteresting(parent->tree);
+		if (edge_hint)
+			printf("-%s\n", sha1_to_hex(parent->object.sha1));
+	}
+}
+
 static void mark_edges_uninteresting(struct commit_list *list)
 {
 	for ( ; list; list = list->next) {
-		struct commit_list *parents = list->item->parents;
+		struct commit *commit = list->item;
 
-		for ( ; parents; parents = parents->next) {
-			struct commit *commit = parents->item;
-			if (commit->object.flags & UNINTERESTING)
-				mark_tree_uninteresting(commit->tree);
+		if (commit->object.flags & UNINTERESTING) {
+			mark_tree_uninteresting(commit->tree);
+			continue;
 		}
+		mark_edge_parents_uninteresting(commit);
 	}
 }
 
@@ -843,6 +858,13 @@ int main(int argc, const char **argv)
 			blob_objects = 1;
 			continue;
 		}
+		if (!strcmp(arg, "--objects-edge")) {
+			tag_objects = 1;
+			tree_objects = 1;
+			blob_objects = 1;
+			edge_hint = 1;
+			continue;
+		}
 		if (!strcmp(arg, "--unpacked")) {
 			unpacked = 1;
 			limited = 1;
diff --git a/rev-parse.c b/rev-parse.c
index a5fb93c..610eacb 100644
--- a/rev-parse.c
+++ b/rev-parse.c
@@ -43,6 +43,7 @@ static int is_rev_argument(const char *arg)
 		"--min-age=",
 		"--no-merges",
 		"--objects",
+		"--objects-edge",
 		"--parents",
 		"--pretty",
 		"--show-breaks",
-- 
cgit v0.10.2-6-g49f6


From 7a979d99bafae3576d9480007f958eed0c9e0278 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sun, 19 Feb 2006 14:47:21 -0800
Subject: Thin pack - create packfile with missing delta base.

This goes together with "rev-list --object-edge" change, to feed
pack-objects list of edge commits in addition to the usual
object list.  Upon seeing such list, pack-objects loosens the
usual "self contained delta" constraints, and can produce delta
against blobs and trees contained in the edge commits without
storing the delta base objects themselves.

The resulting packfile is not usable in .git/object/packs, but
is a good way to implement "delta-only" transfer.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/pack-objects.c b/pack-objects.c
index 0c9f4c9..ceb107f 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -3,6 +3,7 @@
 #include "delta.h"
 #include "pack.h"
 #include "csum-file.h"
+#include "diff.h"
 #include <sys/time.h>
 
 static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list";
@@ -26,6 +27,13 @@ struct object_entry {
 	struct object_entry *delta_sibling; /* other deltified objects who
 					     * uses the same base as me
 					     */
+	int preferred_base;	/* we do not pack this, but is encouraged to
+				 * be used as the base objectto delta huge
+				 * objects against.
+				 */
+	int based_on_preferred;	/* current delta candidate is a preferred
+				 * one, or delta against a preferred one.
+				 */
 };
 
 /*
@@ -48,7 +56,7 @@ static int local = 0;
 static int incremental = 0;
 static struct object_entry **sorted_by_sha, **sorted_by_type;
 static struct object_entry *objects = NULL;
-static int nr_objects = 0, nr_alloc = 0;
+static int nr_objects = 0, nr_alloc = 0, nr_result = 0;
 static const char *base_name;
 static unsigned char pack_file_sha1[20];
 static int progress = 1;
@@ -229,7 +237,8 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha
 	return n;
 }
 
-static unsigned long write_object(struct sha1file *f, struct object_entry *entry)
+static unsigned long write_object(struct sha1file *f,
+				  struct object_entry *entry)
 {
 	unsigned long size;
 	char type[10];
@@ -239,6 +248,9 @@ static unsigned long write_object(struct sha1file *f, struct object_entry *entry
 	enum object_type obj_type;
 	int to_reuse = 0;
 
+	if (entry->preferred_base)
+		return 0;
+
 	obj_type = entry->type;
 	if (! entry->in_pack)
 		to_reuse = 0;	/* can't reuse what we don't have */
@@ -326,10 +338,11 @@ static void write_pack_file(void)
 	if (!base_name)
 		f = sha1fd(1, "<stdout>");
 	else
-		f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "pack");
+		f = sha1create("%s-%s.%s", base_name,
+			       sha1_to_hex(object_list_sha1), "pack");
 	hdr.hdr_signature = htonl(PACK_SIGNATURE);
 	hdr.hdr_version = htonl(PACK_VERSION);
-	hdr.hdr_entries = htonl(nr_objects);
+	hdr.hdr_entries = htonl(nr_result);
 	sha1write(f, &hdr, sizeof(hdr));
 	offset = sizeof(hdr);
 	for (i = 0; i < nr_objects; i++)
@@ -341,9 +354,10 @@ static void write_pack_file(void)
 static void write_index_file(void)
 {
 	int i;
-	struct sha1file *f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "idx");
+	struct sha1file *f = sha1create("%s-%s.%s", base_name,
+					sha1_to_hex(object_list_sha1), "idx");
 	struct object_entry **list = sorted_by_sha;
-	struct object_entry **last = list + nr_objects;
+	struct object_entry **last = list + nr_result;
 	unsigned int array[256];
 
 	/*
@@ -368,7 +382,7 @@ static void write_index_file(void)
 	 * Write the actual SHA1 entries..
 	 */
 	list = sorted_by_sha;
-	for (i = 0; i < nr_objects; i++) {
+	for (i = 0; i < nr_result; i++) {
 		struct object_entry *entry = *list++;
 		unsigned int offset = htonl(entry->offset);
 		sha1write(f, &offset, 4);
@@ -378,27 +392,87 @@ static void write_index_file(void)
 	sha1close(f, NULL, 1);
 }
 
-static int add_object_entry(unsigned char *sha1, unsigned int hash)
+static int locate_object_entry_hash(const unsigned char *sha1)
+{
+	int i;
+	unsigned int ui;
+	memcpy(&ui, sha1, sizeof(unsigned int));
+	i = ui % object_ix_hashsz;
+	while (0 < object_ix[i]) {
+		if (!memcmp(sha1, objects[object_ix[i]-1].sha1, 20))
+			return i;
+		if (++i == object_ix_hashsz)
+			i = 0;
+	}
+	return -1 - i;
+}
+
+static struct object_entry *locate_object_entry(const unsigned char *sha1)
+{
+	int i;
+
+	if (!object_ix_hashsz)
+		return NULL;
+
+	i = locate_object_entry_hash(sha1);
+	if (0 <= i)
+		return &objects[object_ix[i]-1];
+	return NULL;
+}
+
+static void rehash_objects(void)
 {
+	int i;
+	struct object_entry *oe;
+
+	object_ix_hashsz = nr_objects * 3;
+	if (object_ix_hashsz < 1024)
+		object_ix_hashsz = 1024;
+	object_ix = xrealloc(object_ix, sizeof(int) * object_ix_hashsz);
+	object_ix = memset(object_ix, 0, sizeof(int) * object_ix_hashsz);
+	for (i = 0, oe = objects; i < nr_objects; i++, oe++) {
+		int ix = locate_object_entry_hash(oe->sha1);
+		if (0 <= ix)
+			continue;
+		ix = -1 - ix;
+		object_ix[ix] = i + 1;
+	}
+}
+
+static int add_object_entry(const unsigned char *sha1, const char *name, int exclude)
+{
+	unsigned int hash = 0;
 	unsigned int idx = nr_objects;
 	struct object_entry *entry;
 	struct packed_git *p;
 	unsigned int found_offset = 0;
 	struct packed_git *found_pack = NULL;
-
-	for (p = packed_git; p; p = p->next) {
-		struct pack_entry e;
-		if (find_pack_entry_one(sha1, &e, p)) {
-			if (incremental)
-				return 0;
-			if (local && !p->pack_local)
-				return 0;
-			if (!found_pack) {
-				found_offset = e.offset;
-				found_pack = e.p;
+	int ix;
+
+	if (!exclude) {
+		for (p = packed_git; p; p = p->next) {
+			struct pack_entry e;
+			if (find_pack_entry_one(sha1, &e, p)) {
+				if (incremental)
+					return 0;
+				if (local && !p->pack_local)
+					return 0;
+				if (!found_pack) {
+					found_offset = e.offset;
+					found_pack = e.p;
+				}
 			}
 		}
 	}
+	if ((entry = locate_object_entry(sha1)) != NULL)
+		goto already_added;
+
+	while (*name) {
+		unsigned char c = *name++;
+		if (isspace(c))
+			continue;
+		hash = hash * 11 + c;
+	}
 
 	if (idx >= nr_alloc) {
 		unsigned int needed = (idx + 1024) * 3 / 2;
@@ -406,45 +480,79 @@ static int add_object_entry(unsigned char *sha1, unsigned int hash)
 		nr_alloc = needed;
 	}
 	entry = objects + idx;
+	nr_objects = idx + 1;
 	memset(entry, 0, sizeof(*entry));
 	memcpy(entry->sha1, sha1, 20);
 	entry->hash = hash;
-	if (found_pack) {
-		entry->in_pack = found_pack;
-		entry->in_pack_offset = found_offset;
+
+	if (object_ix_hashsz * 3 <= nr_objects * 4)
+		rehash_objects();
+	else {
+		ix = locate_object_entry_hash(entry->sha1);
+		if (0 <= ix)
+			die("internal error in object hashing.");
+		object_ix[-1 - ix] = idx + 1;
+	}
+
+ already_added:
+	if (exclude)
+		entry->preferred_base = 1;
+	else {
+		if (found_pack) {
+			entry->in_pack = found_pack;
+			entry->in_pack_offset = found_offset;
+		}
 	}
-	nr_objects = idx+1;
 	return 1;
 }
 
-static int locate_object_entry_hash(unsigned char *sha1)
+static void add_pbase_tree(struct tree_desc *tree)
 {
-	int i;
-	unsigned int ui;
-	memcpy(&ui, sha1, sizeof(unsigned int));
-	i = ui % object_ix_hashsz;
-	while (0 < object_ix[i]) {
-		if (!memcmp(sha1, objects[object_ix[i]-1].sha1, 20))
-			return i;
-		if (++i == object_ix_hashsz)
-			i = 0;
+	while (tree->size) {
+		const unsigned char *sha1;
+		const char *name;
+		unsigned mode;
+		unsigned long size;
+		char type[20];
+
+		sha1 = tree_entry_extract(tree, &name, &mode);
+		update_tree_entry(tree);
+		if (!has_sha1_file(sha1))
+			continue;
+		if (sha1_object_info(sha1, type, &size))
+			continue;
+		add_object_entry(sha1, name, 1);
+		if (!strcmp(type, "tree")) {
+			struct tree_desc sub;
+			void *elem;
+			elem = read_sha1_file(sha1, type, &sub.size);
+			sub.buf = elem;
+			if (sub.buf) {
+				add_pbase_tree(&sub);
+				free(elem);
+			}
+		}
 	}
-	return -1 - i;
 }
 
-static struct object_entry *locate_object_entry(unsigned char *sha1)
+static void add_preferred_base(unsigned char *sha1)
 {
-	int i = locate_object_entry_hash(sha1);
-	if (0 <= i)
-		return &objects[object_ix[i]-1];
-	return NULL;
+	struct tree_desc tree;
+	void *elem;
+	elem = read_object_with_reference(sha1, "tree", &tree.size, NULL);
+	tree.buf = elem;
+	if (!tree.buf)
+		return;
+	add_object_entry(sha1, "", 1);
+	add_pbase_tree(&tree);
+	free(elem);
 }
 
 static void check_object(struct object_entry *entry)
 {
 	char type[20];
 
-	if (entry->in_pack) {
+	if (entry->in_pack && !entry->preferred_base) {
 		unsigned char base[20];
 		unsigned long size;
 		struct object_entry *base_entry;
@@ -463,7 +571,8 @@ static void check_object(struct object_entry *entry)
 		 */
 		if (!no_reuse_delta &&
 		    entry->in_pack_type == OBJ_DELTA &&
-		    (base_entry = locate_object_entry(base))) {
+		    (base_entry = locate_object_entry(base)) &&
+		    (!base_entry->preferred_base)) {
 
 			/* Depth value does not matter - find_deltas()
 			 * will never consider reused delta as the
@@ -501,25 +610,6 @@ static void check_object(struct object_entry *entry)
 		    sha1_to_hex(entry->sha1), type);
 }
 
-static void hash_objects(void)
-{
-	int i;
-	struct object_entry *oe;
-
-	object_ix_hashsz = nr_objects * 2;
-	object_ix = xcalloc(sizeof(int), object_ix_hashsz);
-	for (i = 0, oe = objects; i < nr_objects; i++, oe++) {
-		int ix = locate_object_entry_hash(oe->sha1);
-		if (0 <= ix) {
-			error("the same object '%s' added twice",
-			      sha1_to_hex(oe->sha1));
-			continue;
-		}
-		ix = -1 - ix;
-		object_ix[ix] = i + 1;
-	}
-}
-
 static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
 {
 	struct object_entry *child = me->delta_child;
@@ -538,7 +628,6 @@ static void get_object_details(void)
 	int i;
 	struct object_entry *entry;
 
-	hash_objects();
 	prepare_pack_ix();
 	for (i = 0, entry = objects; i < nr_objects; i++, entry++)
 		check_object(entry);
@@ -576,6 +665,24 @@ static int sha1_sort(const struct object_entry *a, const struct object_entry *b)
 	return memcmp(a->sha1, b->sha1, 20);
 }
 
+static struct object_entry **create_final_object_list()
+{
+	struct object_entry **list;
+	int i, j;
+
+	for (i = nr_result = 0; i < nr_objects; i++)
+		if (!objects[i].preferred_base)
+			nr_result++;
+	list = xmalloc(nr_result * sizeof(struct object_entry *));
+	for (i = j = 0; i < nr_objects; i++) {
+		if (!objects[i].preferred_base)
+			list[j++] = objects + i;
+	}
+	current_sort = sha1_sort;
+	qsort(list, nr_result, sizeof(struct object_entry *), sort_comparator);
+	return list;
+}
+
 static int type_size_sort(const struct object_entry *a, const struct object_entry *b)
 {
 	if (a->type < b->type)
@@ -586,6 +693,10 @@ static int type_size_sort(const struct object_entry *a, const struct object_entr
 		return -1;
 	if (a->hash > b->hash)
 		return 1;
+	if (a->preferred_base < b->preferred_base)
+		return -1;
+	if (a->preferred_base > b->preferred_base)
+		return 1;
 	if (a->size < b->size)
 		return -1;
 	if (a->size > b->size)
@@ -610,6 +721,8 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
 {
 	struct object_entry *cur_entry = cur->entry;
 	struct object_entry *old_entry = old->entry;
+	int old_preferred = (old_entry->preferred_base ||
+			     old_entry->based_on_preferred);
 	unsigned long size, oldsize, delta_size, sizediff;
 	long max_size;
 	void *delta_buf;
@@ -618,9 +731,15 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
 	if (cur_entry->type != old_entry->type)
 		return -1;
 
-	/* If the current object is at edge, take the depth the objects
-	 * that depend on the current object into account -- otherwise
-	 * they would become too deep.
+	/* We do not compute delta to *create* objects we are not
+	 * going to pack.
+	 */
+	if (cur_entry->preferred_base)
+		return -1;
+
+	/* If the current object is at pack edge, take the depth the
+	 * objects that depend on the current object into account --
+	 * otherwise they would become too deep.
 	 */
 	if (cur_entry->delta_child) {
 		if (max_depth <= cur_entry->delta_limit)
@@ -646,8 +765,27 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
 	 * delete).
 	 */
 	max_size = size / 2 - 20;
-	if (cur_entry->delta)
-		max_size = cur_entry->delta_size-1;
+	if (cur_entry->delta) {
+		if (cur_entry->based_on_preferred) {
+			if (old_preferred)
+				max_size = cur_entry->delta_size-1;
+			else
+				/* trying with non-preferred one when we
+				 * already have a delta based on preferred
+				 * one is pointless.
+				 */
+				return 0;
+		}
+		else if (!old_preferred)
+			max_size = cur_entry->delta_size-1;
+		else
+			/* otherwise...  even if delta with a
+			 * preferred one produces a bigger result than
+			 * what we currently have, which is based on a
+			 * non-preferred one, it is OK.
+			 */
+			;
+	}
 	if (sizediff >= max_size)
 		return -1;
 	delta_buf = diff_delta(old->data, oldsize,
@@ -657,6 +795,7 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
 	cur_entry->delta = old_entry;
 	cur_entry->delta_size = delta_size;
 	cur_entry->depth = old_entry->depth + 1;
+	cur_entry->based_on_preferred = old_preferred;
 	free(delta_buf);
 	return 0;
 }
@@ -722,7 +861,7 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 static void prepare_pack(int window, int depth)
 {
 	if (progress)
-		fprintf(stderr, "Packing %d objects", nr_objects);
+		fprintf(stderr, "Packing %d objects", nr_result);
 	get_object_details();
 	if (progress)
 		fputc('.', stderr);
@@ -861,8 +1000,6 @@ int main(int argc, char **argv)
 		gettimeofday(&prev_tv, NULL);
 	}
 	while (fgets(line, sizeof(line), stdin) != NULL) {
-		unsigned int hash;
-		char *p;
 		unsigned char sha1[20];
 
 		if (progress && (eye_candy <= nr_objects)) {
@@ -881,31 +1018,32 @@ int main(int argc, char **argv)
 			}
 			eye_candy += eye_candy_incr;
 		}
+		if (line[0] == '-') {
+			if (get_sha1_hex(line+1, sha1))
+				die("expected edge sha1, got garbage:\n %s",
+				    line+1);
+			add_preferred_base(sha1);
+			continue;
+		}
 		if (get_sha1_hex(line, sha1))
 			die("expected sha1, got garbage:\n %s", line);
-		hash = 0;
-		p = line+40;
-		while (*p) {
-			unsigned char c = *p++;
-			if (isspace(c))
-				continue;
-			hash = hash * 11 + c;
-		}
-		add_object_entry(sha1, hash);
+		add_object_entry(sha1, line+40, 0);
 	}
 	if (progress)
 		fprintf(stderr, "Done counting %d objects.\n", nr_objects);
 	if (non_empty && !nr_objects)
 		return 0;
 
-	sorted_by_sha = create_sorted_list(sha1_sort);
+	sorted_by_sha = create_final_object_list();
 	SHA1_Init(&ctx);
 	list = sorted_by_sha;
-	for (i = 0; i < nr_objects; i++) {
+	for (i = 0; i < nr_result; i++) {
 		struct object_entry *entry = *list++;
 		SHA1_Update(&ctx, entry->sha1, 20);
 	}
 	SHA1_Final(object_list_sha1, &ctx);
+	if (progress && (nr_objects != nr_result))
+		fprintf(stderr, "Result has %d objects.\n", nr_result);
 
 	if (reuse_cached_pack(object_list_sha1, pack_to_stdout))
 		;
@@ -918,6 +1056,6 @@ int main(int argc, char **argv)
 	}
 	if (progress)
 		fprintf(stderr, "Total %d, written %d (delta %d), reused %d (delta %d)\n",
-			nr_objects, written, written_delta, reused, reused_delta);
+			nr_result, written, written_delta, reused, reused_delta);
 	return 0;
 }
-- 
cgit v0.10.2-6-g49f6


From 2245be3e7a5a2999ebf7d38e569c98994b0cda31 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sun, 19 Feb 2006 15:03:49 -0800
Subject: send-pack --thin: use "thin pack" delta transfer.

The new flag loosens the usual "self containedness" requirment
of packfiles, and sends deltified representation of objects when
we know the other side has the base objects needed to unpack
them.  This would help reducing the transfer size.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/send-pack.c b/send-pack.c
index 990be3f..ad22da5 100644
--- a/send-pack.c
+++ b/send-pack.c
@@ -12,6 +12,7 @@ static const char *exec = "git-receive-pack";
 static int verbose = 0;
 static int send_all = 0;
 static int force_update = 0;
+static int use_thin_pack = 0;
 
 static int is_zero_sha1(const unsigned char *sha1)
 {
@@ -41,7 +42,10 @@ static void exec_rev_list(struct ref *refs)
 	int i = 0;
 
 	args[i++] = "rev-list";	/* 0 */
-	args[i++] = "--objects";	/* 1 */
+	if (use_thin_pack)	/* 1 */
+		args[i++] = "--objects-edge";
+	else
+		args[i++] = "--objects";
 	while (refs) {
 		char *buf = malloc(100);
 		if (i > 900)
@@ -361,6 +365,10 @@ int main(int argc, char **argv)
 				verbose = 1;
 				continue;
 			}
+			if (!strcmp(arg, "--thin")) {
+				use_thin_pack = 1;
+				continue;
+			}
 			usage(send_pack_usage);
 		}
 		if (!dest) {
-- 
cgit v0.10.2-6-g49f6


From a79a27636098be2b9652f59bd447ac074f741e26 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Mon, 20 Feb 2006 00:09:41 -0800
Subject: Add git-push --thin.

Maybe we would want to make this default before it graduates to
the master branch, but in the meantime to help testing things,
this allows you to say "git push --thin destination".

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/git-push.sh b/git-push.sh
index 706db99..73dcf06 100755
--- a/git-push.sh
+++ b/git-push.sh
@@ -8,6 +8,7 @@ USAGE='[--all] [--tags] [--force] <repository> [<refspec>...]'
 has_all=
 has_force=
 has_exec=
+has_thin=
 remote=
 do_tags=
 
@@ -22,6 +23,8 @@ do
 		has_force=--force ;;
 	--exec=*)
 		has_exec="$1" ;;
+	--thin)
+		has_thin="$1" ;;
 	-*)
                 usage ;;
         *)
@@ -72,6 +75,7 @@ set x "$remote" "$@"; shift
 test "$has_all" && set x "$has_all" "$@" && shift
 test "$has_force" && set x "$has_force" "$@" && shift
 test "$has_exec" && set x "$has_exec" "$@" && shift
+test "$has_thin" && set x "$has_thin" "$@" && shift
 
 case "$remote" in
 http://* | https://*)
-- 
cgit v0.10.2-6-g49f6


From b19696c2e7c3e753777189100b2ac09c9e04080b Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Mon, 20 Feb 2006 00:38:39 -0800
Subject: Use thin pack transfer in "git fetch".

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/fetch-pack.c b/fetch-pack.c
index aa6f42a..09738fe 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -8,7 +8,7 @@ static int keep_pack;
 static int quiet;
 static int verbose;
 static const char fetch_pack_usage[] =
-"git-fetch-pack [-q] [-v] [-k] [--exec=upload-pack] [host:]directory <refs>...";
+"git-fetch-pack [-q] [-v] [-k] [--thin] [--exec=upload-pack] [host:]directory <refs>...";
 static const char *exec = "git-upload-pack";
 
 #define COMPLETE	(1U << 0)
@@ -18,7 +18,7 @@ static const char *exec = "git-upload-pack";
 #define POPPED		(1U << 4)
 
 static struct commit_list *rev_list = NULL;
-static int non_common_revs = 0, multi_ack = 0;
+static int non_common_revs = 0, multi_ack = 0, use_thin_pack = 0;
 
 static void rev_list_push(struct commit *commit, int mark)
 {
@@ -156,8 +156,9 @@ static int find_common(int fd[2], unsigned char *result_sha1,
 			continue;
 		}
 
-		packet_write(fd[1], "want %s%s\n", sha1_to_hex(remote),
-			multi_ack ? " multi_ack" : "");
+		packet_write(fd[1], "want %s%s%s\n", sha1_to_hex(remote),
+			     (multi_ack ? " multi_ack" : ""),
+			     (use_thin_pack ? " thin-pack" : ""));
 		fetching++;
 	}
 	packet_flush(fd[1]);
@@ -421,6 +422,10 @@ int main(int argc, char **argv)
 				keep_pack = 1;
 				continue;
 			}
+			if (!strcmp("--thin", arg)) {
+				use_thin_pack = 1;
+				continue;
+			}
 			if (!strcmp("-v", arg)) {
 				verbose = 1;
 				continue;
@@ -434,6 +439,8 @@ int main(int argc, char **argv)
 	}
 	if (!dest)
 		usage(fetch_pack_usage);
+	if (keep_pack)
+		use_thin_pack = 0;
 	pid = git_connect(fd, dest, exec);
 	if (pid < 0)
 		return 1;
diff --git a/git-fetch.sh b/git-fetch.sh
index b4325d9..23d965f 100755
--- a/git-fetch.sh
+++ b/git-fetch.sh
@@ -320,7 +320,7 @@ fetch_main () {
     ( : subshell because we muck with IFS
       IFS=" 	$LF"
       (
-	  git-fetch-pack $exec $keep "$remote" $rref || echo failed "$remote"
+	  git-fetch-pack $exec $keep --thin "$remote" $rref || echo failed "$remote"
       ) |
       while read sha1 remote_name
       do
diff --git a/upload-pack.c b/upload-pack.c
index d198055..3cdf428 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -14,6 +14,7 @@ static const char upload_pack_usage[] = "git-upload-pack [--strict] [--timeout=n
 #define MAX_HAS 256
 #define MAX_NEEDS 256
 static int nr_has = 0, nr_needs = 0, multi_ack = 0, nr_our_refs = 0;
+static int use_thin_pack = 0;
 static unsigned char has_sha1[MAX_HAS][20];
 static unsigned char needs_sha1[MAX_NEEDS][20];
 static unsigned int timeout = 0;
@@ -49,8 +50,10 @@ static void create_pack_file(void)
 		char *buf;
 		char **p;
 
-		if (create_full_pack)
+		if (create_full_pack) {
 			args = 10;
+			use_thin_pack = 0; /* no point doing it */
+		}
 		else
 			args = nr_has + nr_needs + 5;
 		argv = xmalloc(args * sizeof(char *));
@@ -62,7 +65,7 @@ static void create_pack_file(void)
 		close(fd[0]);
 		close(fd[1]);
 		*p++ = "rev-list";
-		*p++ = "--objects";
+		*p++ = use_thin_pack ? "--objects-edge" : "--objects";
 		if (create_full_pack || MAX_NEEDS <= nr_needs)
 			*p++ = "--all";
 		else {
@@ -192,6 +195,8 @@ static int receive_needs(void)
 			    "expected to get sha, not '%s'", line);
 		if (strstr(line+45, "multi_ack"))
 			multi_ack = 1;
+		if (strstr(line+45, "thin-pack"))
+			use_thin_pack = 1;
 
 		/* We have sent all our refs already, and the other end
 		 * should have chosen out of them; otherwise they are
@@ -213,7 +218,7 @@ static int receive_needs(void)
 
 static int send_ref(const char *refname, const unsigned char *sha1)
 {
-	static char *capabilities = "multi_ack";
+	static char *capabilities = "multi_ack thin-pack";
 	struct object *o = parse_object(sha1);
 
 	if (capabilities)
-- 
cgit v0.10.2-6-g49f6


From c65e898754ef68a5520b2791890dda51753d00c6 Mon Sep 17 00:00:00 2001
From: Ryan Anderson <ryan@michonline.com>
Date: Mon, 20 Feb 2006 05:46:09 -0500
Subject: Add git-annotate, a tool for assigning blame.

Signed-off-by: Ryan Anderson <ryan@michonline.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/Makefile b/Makefile
index 317be3c..86ffcf4 100644
--- a/Makefile
+++ b/Makefile
@@ -119,6 +119,7 @@ SCRIPT_SH = \
 SCRIPT_PERL = \
 	git-archimport.perl git-cvsimport.perl git-relink.perl \
 	git-shortlog.perl git-fmt-merge-msg.perl git-rerere.perl \
+	git-annotate.perl \
 	git-svnimport.perl git-mv.perl git-cvsexportcommit.perl
 
 SCRIPT_PYTHON = \
diff --git a/git-annotate.perl b/git-annotate.perl
new file mode 100755
index 0000000..8f98431
--- /dev/null
+++ b/git-annotate.perl
@@ -0,0 +1,321 @@
+#!/usr/bin/perl
+# Copyright 2006, Ryan Anderson <ryan@michonline.com>
+#
+# GPL v2 (See COPYING)
+#
+# This file is licensed under the GPL v2, or a later version
+# at the discretion of Linus Torvalds.
+
+use warnings;
+use strict;
+
+my $filename = shift @ARGV;
+
+
+my @stack = (
+	{
+		'rev' => "HEAD",
+		'filename' => $filename,
+	},
+);
+
+our (@lineoffsets, @pendinglineoffsets);
+our @filelines = ();
+open(F,"<",$filename)
+	or die "Failed to open filename: $!";
+
+while(<F>) {
+	chomp;
+	push @filelines, $_;
+}
+close(F);
+our $leftover_lines = @filelines;
+our %revs;
+our @revqueue;
+our $head;
+
+my $revsprocessed = 0;
+while (my $bound = pop @stack) {
+	my @revisions = git_rev_list($bound->{'rev'}, $bound->{'filename'});
+	foreach my $revinst (@revisions) {
+		my ($rev, @parents) = @$revinst;
+		$head ||= $rev;
+
+		$revs{$rev}{'filename'} = $bound->{'filename'};
+		if (scalar @parents > 0) {
+			$revs{$rev}{'parents'} = \@parents;
+			next;
+		}
+
+		my $newbound = find_parent_renames($rev, $bound->{'filename'});
+		if ( exists $newbound->{'filename'} && $newbound->{'filename'} ne $bound->{'filename'}) {
+			push @stack, $newbound;
+			$revs{$rev}{'parents'} = [$newbound->{'rev'}];
+		}
+	}
+}
+push @revqueue, $head;
+init_claim($head);
+$revs{$head}{'lineoffsets'} = {};
+handle_rev();
+
+
+my $i = 0;
+foreach my $l (@filelines) {
+	my ($output, $rev, $committer, $date);
+	if (ref $l eq 'ARRAY') {
+		($output, $rev, $committer, $date) = @$l;
+		if (length($rev) > 8) {
+			$rev = substr($rev,0,8);
+		}
+	} else {
+		$output = $l;
+		($rev, $committer, $date) = ('unknown', 'unknown', 'unknown');
+	}
+
+	printf("(%8s %10s %10s %d)%s\n", $rev, $committer, $date, $i++, $output);
+}
+
+sub init_claim {
+	my ($rev) = @_;
+	my %revinfo = git_commit_info($rev);
+	for (my $i = 0; $i < @filelines; $i++) {
+		$filelines[$i] = [ $filelines[$i], '', '', '', 1];
+			# line,
+			# rev,
+			# author,
+			# date,
+			# 1 <-- belongs to the original file.
+	}
+	$revs{$rev}{'lines'} = \@filelines;
+}
+
+
+sub handle_rev {
+	my $i = 0;
+	while (my $rev = shift @revqueue) {
+
+		my %revinfo = git_commit_info($rev);
+
+		foreach my $p (@{$revs{$rev}{'parents'}}) {
+
+			git_diff_parse($p, $rev, %revinfo);
+			push @revqueue, $p;
+		}
+
+
+		if (scalar @{$revs{$rev}{parents}} == 0) {
+			# We must be at the initial rev here, so claim everything that is left.
+			for (my $i = 0; $i < @{$revs{$rev}{lines}}; $i++) {
+				if (ref ${$revs{$rev}{lines}}[$i] eq '' || ${$revs{$rev}{lines}}[$i][1] eq '') {
+					claim_line($i, $rev, $revs{$rev}{lines}, %revinfo);
+				}
+			}
+		}
+	}
+}
+
+
+sub git_rev_list {
+	my ($rev, $file) = @_;
+
+	open(P,"-|","git-rev-list","--parents","--remove-empty",$rev,"--",$file)
+		or die "Failed to exec git-rev-list: $!";
+
+	my @revs;
+	while(my $line = <P>) {
+		chomp $line;
+		my ($rev, @parents) = split /\s+/, $line;
+		push @revs, [ $rev, @parents ];
+	}
+	close(P);
+
+	printf("0 revs found for rev %s (%s)\n", $rev, $file) if (@revs == 0);
+	return @revs;
+}
+
+sub find_parent_renames {
+	my ($rev, $file) = @_;
+
+	open(P,"-|","git-diff-tree", "-M50", "-r","--name-status", "-z","$rev")
+		or die "Failed to exec git-diff: $!";
+
+	local $/ = "\0";
+	my %bound;
+	my $junk = <P>;
+	while (my $change = <P>) {
+		chomp $change;
+		my $filename = <P>;
+		chomp $filename;
+
+		if ($change =~ m/^[AMD]$/ ) {
+			next;
+		} elsif ($change =~ m/^R/ ) {
+			my $oldfilename = $filename;
+			$filename = <P>;
+			chomp $filename;
+			if ( $file eq $filename ) {
+				my $parent = git_find_parent($rev, $oldfilename);
+				@bound{'rev','filename'} = ($parent, $oldfilename);
+				last;
+			}
+		}
+	}
+	close(P);
+
+	return \%bound;
+}
+
+
+sub git_find_parent {
+	my ($rev, $filename) = @_;
+
+	open(REVPARENT,"-|","git-rev-list","--remove-empty", "--parents","--max-count=1","$rev","--",$filename)
+		or die "Failed to open git-rev-list to find a single parent: $!";
+
+	my $parentline = <REVPARENT>;
+	chomp $parentline;
+	my ($revfound,$parent) = split m/\s+/, $parentline;
+
+	close(REVPARENT);
+
+	return $parent;
+}
+
+
+# Get a diff between the current revision and a parent.
+# Record the commit information that results.
+sub git_diff_parse {
+	my ($parent, $rev, %revinfo) = @_;
+
+	my ($ri, $pi) = (0,0);
+	open(DIFF,"-|","git-diff-tree","-M","-p",$rev,$parent,"--",
+			$revs{$rev}{'filename'}, $revs{$parent}{'filename'})
+		or die "Failed to call git-diff for annotation: $!";
+
+	my $slines = $revs{$rev}{'lines'};
+	my @plines;
+
+	my $gotheader = 0;
+	my ($remstart, $remlength, $addstart, $addlength);
+	my ($hunk_start, $hunk_index, $hunk_adds);
+	while(<DIFF>) {
+		chomp;
+		if (m/^@@ -(\d+),(\d+) \+(\d+),(\d+)/) {
+			($remstart, $remlength, $addstart, $addlength) = ($1, $2, $3, $4);
+			# Adjust for 0-based arrays
+			$remstart--;
+			$addstart--;
+			# Reinit hunk tracking.
+			$hunk_start = $remstart;
+			$hunk_index = 0;
+			$gotheader = 1;
+
+			for (my $i = $ri; $i < $remstart; $i++) {
+				$plines[$pi++] = $slines->[$i];
+				$ri++;
+			}
+			next;
+		} elsif (!$gotheader) {
+			next;
+		}
+
+		if (m/^\+(.*)$/) {
+			my $line = $1;
+			$plines[$pi++] = [ $line, '', '', '', 0 ];
+			next;
+
+		} elsif (m/^-(.*)$/) {
+			my $line = $1;
+			if (get_line($slines, $ri) eq $line) {
+				# Found a match, claim
+				claim_line($ri, $rev, $slines, %revinfo);
+			} else {
+				die sprintf("Sync error: %d/%d\n|%s\n|%s\n%s => %s\n",
+						$ri, $hunk_start + $hunk_index,
+						$line,
+						get_line($slines, $ri),
+						$rev, $parent);
+			}
+			$ri++;
+
+		} else {
+			if (substr($_,1) ne get_line($slines,$ri) ) {
+				die sprintf("Line %d (%d) does not match:\n|%s\n|%s\n%s => %s\n",
+						$hunk_start + $hunk_index, $ri,
+						substr($_,1),
+						get_line($slines,$ri),
+						$rev, $parent);
+			}
+			$plines[$pi++] = $slines->[$ri++];
+		}
+		$hunk_index++;
+	}
+	close(DIFF);
+	for (my $i = $ri; $i < @{$slines} ; $i++) {
+		push @plines, $slines->[$ri++];
+	}
+
+	$revs{$parent}{lines} = \@plines;
+	return;
+}
+
+sub get_line {
+	my ($lines, $index) = @_;
+
+	return ref $lines->[$index] ne '' ? $lines->[$index][0] : $lines->[$index];
+}
+
+sub git_cat_file {
+	my ($parent, $filename) = @_;
+	return () unless defined $parent && defined $filename;
+	my $blobline = `git-ls-tree $parent $filename`;
+	my ($mode, $type, $blob, $tfilename) = split(/\s+/, $blobline, 4);
+
+	open(C,"-|","git-cat-file", "blob", $blob)
+		or die "Failed to git-cat-file blob $blob (rev $parent, file $filename): " . $!;
+
+	my @lines;
+	while(<C>) {
+		chomp;
+		push @lines, $_;
+	}
+	close(C);
+
+	return @lines;
+}
+
+
+sub claim_line {
+	my ($floffset, $rev, $lines, %revinfo) = @_;
+	my $oline = get_line($lines, $floffset);
+	@{$lines->[$floffset]} = ( $oline, $rev,
+		$revinfo{'author'}, $revinfo{'author_date'} );
+	#printf("Claiming line %d with rev %s: '%s'\n",
+	#		$floffset, $rev, $oline) if 1;
+}
+
+sub git_commit_info {
+	my ($rev) = @_;
+	open(COMMIT, "-|","git-cat-file", "commit", $rev)
+		or die "Failed to call git-cat-file: $!";
+
+	my %info;
+	while(<COMMIT>) {
+		chomp;
+		last if (length $_ == 0);
+
+		if (m/^author (.*) <(.*)> (.*)$/) {
+			$info{'author'} = $1;
+			$info{'author_email'} = $2;
+			$info{'author_date'} = $3;
+		} elsif (m/^committer (.*) <(.*)> (.*)$/) {
+			$info{'committer'} = $1;
+			$info{'committer_email'} = $2;
+			$info{'committer_date'} = $3;
+		}
+	}
+	close(COMMIT);
+
+	return %info;
+}
-- 
cgit v0.10.2-6-g49f6


From 2a86ec46da99e4cd5e6e394d378ba119d8f5069d Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Mon, 20 Feb 2006 14:02:25 -0800
Subject: fmt-merge-msg: avoid open "-|" list form for Perl 5.6

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/git-fmt-merge-msg.perl b/git-fmt-merge-msg.perl
index c34ddc5..a77e94e 100755
--- a/git-fmt-merge-msg.perl
+++ b/git-fmt-merge-msg.perl
@@ -28,11 +28,12 @@ sub andjoin {
 }
 
 sub repoconfig {
-	my $fh;
 	my $val;
 	eval {
-		open $fh, '-|', 'git-repo-config', '--get', 'merge.summary'
-		    or die "$!";
+		my $pid = open(my $fh, '-|');
+		if (!$pid) {
+			exec('git-repo-config', '--get', 'merge.summary');
+		}
 		($val) = <$fh>;
 		close $fh;
 	};
@@ -41,25 +42,32 @@ sub repoconfig {
 
 sub current_branch {
 	my $fh;
-	open $fh, '-|', 'git-symbolic-ref', 'HEAD' or die "$!";
+	my $pid = open($fh, '-|');
+	die "$!" unless defined $pid;
+	if (!$pid) {
+	    exec('git-symbolic-ref', 'HEAD') or die "$!";
+	}
 	my ($bra) = <$fh>;
 	chomp($bra);
+	close $fh or die "$!";
 	$bra =~ s|^refs/heads/||;
 	if ($bra ne 'master') {
 		$bra = " into $bra";
 	} else {
 		$bra = "";
 	}
-
 	return $bra;
 }
 
 sub shortlog {
 	my ($tip, $limit) = @_;
 	my ($fh, @result);
-	open $fh, '-|', ('git-log', "--max-count=$limit", '--topo-order',
-			 '--pretty=oneline', $tip, '^HEAD')
-	    or die "$!";
+	my $pid = open($fh, '-|');
+	die "$!" unless defined $pid;
+	if (!$pid) {
+	    exec('git-log', "--max-count=$limit", '--topo-order',
+		 '--pretty=oneline', $tip, '^HEAD') or die "$!";
+	}
 	while (<$fh>) {
 		s/^[0-9a-f]{40}\s+//;
 		push @result, $_;
-- 
cgit v0.10.2-6-g49f6


From fedd273b758e6aa844555e4e50ebfce59255e015 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Mon, 20 Feb 2006 14:02:47 -0800
Subject: rerere: avoid open "-|" list form for Perl 5.6

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/git-rerere.perl b/git-rerere.perl
index df11951..d3664ff 100755
--- a/git-rerere.perl
+++ b/git-rerere.perl
@@ -131,7 +131,11 @@ sub record_preimage {
 sub find_conflict {
 	my $in;
 	local $/ = "\0";
-	open $in, '-|', qw(git ls-files -z -u) or die "$!: ls-files";
+	my $pid = open($in, '-|');
+	die "$!" unless defined $pid;
+	if (!$pid) {
+		exec(qw(git ls-files -z -u)) or die "$!: ls-files";
+	}
 	my %path = ();
 	my @path = ();
 	while (<$in>) {
-- 
cgit v0.10.2-6-g49f6


From e415907d6c746616f399cf2a4679372dd0cbce83 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Mon, 20 Feb 2006 14:23:51 -0800
Subject: send-email: avoid open "-|" list form for Perl 5.6

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/git-send-email.perl b/git-send-email.perl
index 13b85dd..b0d095b 100755
--- a/git-send-email.perl
+++ b/git-send-email.perl
@@ -59,24 +59,29 @@ my $rc = GetOptions("from=s" => \$from,
 
 # Now, let's fill any that aren't set in with defaults:
 
-open(GITVAR,"-|","git-var","-l")
-	or die "Failed to open pipe from git-var: $!";
-
-my ($author,$committer);
-while(<GITVAR>) {
-	chomp;
-	my ($var,$data) = split /=/,$_,2;
-	my @fields = split /\s+/, $data;
-
-	my $ident = join(" ", @fields[0...(@fields-3)]);
+sub gitvar {
+    my ($var) = @_;
+    my $fh;
+    my $pid = open($fh, '-|');
+    die "$!" unless defined $pid;
+    if (!$pid) {
+	exec('git-var', $var) or die "$!";
+    }
+    my ($val) = <$fh>;
+    close $fh or die "$!";
+    chomp($val);
+    return $val;
+}
 
-	if ($var eq 'GIT_AUTHOR_IDENT') {
-		$author = $ident;
-	} elsif ($var eq 'GIT_COMMITTER_IDENT') {
-		$committer = $ident;
-	}
+sub gitvar_ident {
+    my ($name) = @_;
+    my $val = gitvar($name);
+    my @field = split(/\s+/, $val);
+    return join(' ', @field[0...(@field-3)]);
 }
-close(GITVAR);
+
+my ($author) = gitvar_ident('GIT_AUTHOR_IDENT');
+my ($committer) = gitvar_ident('GIT_COMMITTER_IDENT');
 
 my $prompting = 0;
 if (!defined $from) {
-- 
cgit v0.10.2-6-g49f6


From 7ae0dc015dc38ce937bb2c044ff8b4875678e8af Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Mon, 20 Feb 2006 14:14:15 -0800
Subject: svnimport: avoid open "-|" list form for Perl 5.6

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/git-svnimport.perl b/git-svnimport.perl
index c536d70..ee2940f 100755
--- a/git-svnimport.perl
+++ b/git-svnimport.perl
@@ -10,7 +10,6 @@
 # The head revision is on branch "origin" by default.
 # You can change that with the '-o' option.
 
-require 5.008; # for shell-safe open("-|",LIST)
 use strict;
 use warnings;
 use Getopt::Std;
@@ -322,8 +321,12 @@ sub get_file($$$) {
 		return undef unless defined $name;
 	}
 
-	open my $F, '-|', "git-hash-object", "-w", $name
+	my $pid = open(my $F, '-|');
+	die $! unless defined $pid;
+	if (!$pid) {
+	    exec("git-hash-object", "-w", $name)
 		or die "Cannot create object: $!\n";
+	}
 	my $sha = <$F>;
 	chomp $sha;
 	close $F;
@@ -398,7 +401,12 @@ sub copy_path($$$$$$$$) {
 			$srcpath =~ s#/*$#/#;
 	}
 	
-	open my $f,"-|","git-ls-tree","-r","-z",$gitrev,$srcpath;
+	my $pid = open my $f,'-|';
+	die $! unless defined $pid;
+	if (!$pid) {
+		exec("git-ls-tree","-r","-z",$gitrev,$srcpath)
+			or die $!;
+	}
 	local $/ = "\0";
 	while(<$f>) {
 		chomp;
@@ -554,7 +562,11 @@ sub commit {
 				@o1 = @old;
 				@old = ();
 			}
-			open my $F, "-|", "git-ls-files", "-z", @o1 or die $!;
+			my $pid = open my $F, "-|";
+			die "$!" unless defined $pid;
+			if (!$pid) {
+				exec("git-ls-files", "-z", @o1) or die $!;
+			}
 			@o1 = ();
 			local $/ = "\0";
 			while(<$F>) {
-- 
cgit v0.10.2-6-g49f6


From dd27478f092295069b148a4cfcf92fea9a4aab03 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Mon, 20 Feb 2006 14:17:28 -0800
Subject: cvsimport: avoid open "-|" list form for Perl 5.6

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/git-cvsimport.perl b/git-cvsimport.perl
index 24f9834..b46469a 100755
--- a/git-cvsimport.perl
+++ b/git-cvsimport.perl
@@ -846,8 +846,12 @@ while(<CVS>) {
 			print "Drop $fn\n" if $opt_v;
 		} else {
 			print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
-			open my $F, '-|', "git-hash-object -w $tmpname"
+			my $pid = open(my $F, '-|');
+			die $! unless defined $pid;
+			if (!$pid) {
+			    exec("git-hash-object", "-w", $tmpname)
 				or die "Cannot create object: $!\n";
+			}
 			my $sha = <$F>;
 			chomp $sha;
 			close $F;
-- 
cgit v0.10.2-6-g49f6


From e15f545155bb4b2cad4475a25cad8fb576d37dee Mon Sep 17 00:00:00 2001
From: Paul Jakma <paul@clubi.ie>
Date: Mon, 20 Feb 2006 23:36:28 +0000
Subject: Makefile tweaks: Solaris 9+ dont need iconv / move up uname variables

- Solaris 9 and up do not need -liconv, so NEEDS_LIBICONV should be set
   only for S8.
- Move the declaration of the uname variables to early in the Makefile
   so they can be referenced by prefix and gitexecdir variables.
- gitexecdir defaults to being same as bindir, it might as well reference
   that variable.

[jc: corrupt patch, sneakily tried to remove inclusion of GIT-VERSION-FILE
 I do not know why I am applying this...]

Signed-off-by: Paul Jakma <paul@quagga.net>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/Makefile b/Makefile
index 821f637..fbfea56 100644
--- a/Makefile
+++ b/Makefile
@@ -77,6 +77,12 @@ GIT-VERSION-FILE: .FORCE-GIT-VERSION-FILE
 	@$(SHELL_PATH) ./GIT-VERSION-GEN
 -include GIT-VERSION-FILE
 
+uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
+uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not')
+uname_O := $(shell sh -c 'uname -o 2>/dev/null || echo not')
+uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not')
+uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not')
+
 # CFLAGS and LDFLAGS are for the users to override from the command line.
 
 CFLAGS = -g -O2 -Wall
@@ -87,7 +93,7 @@ STRIP ?= strip
 
 prefix = $(HOME)
 bindir = $(prefix)/bin
-gitexecdir = $(prefix)/bin
+gitexecdir = $(bindir)
 template_dir = $(prefix)/share/git-core/templates/
 GIT_PYTHON_DIR = $(prefix)/share/git-core/python
 # DESTDIR=
@@ -217,10 +223,6 @@ shellquote = '$(call shq,$(1))'
 # We choose to avoid "if .. else if .. else .. endif endif"
 # because maintaining the nesting to match is a pain.  If
 # we had "elif" things would have been much nicer...
-uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
-uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not')
-uname_O := $(shell sh -c 'uname -o 2>/dev/null || echo not')
-uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not')
 
 ifeq ($(uname_S),Darwin)
 	NEEDS_SSL_WITH_CRYPTO = YesPlease
@@ -235,10 +237,10 @@ endif
 ifeq ($(uname_S),SunOS)
 	NEEDS_SOCKET = YesPlease
 	NEEDS_NSL = YesPlease
-	NEEDS_LIBICONV = YesPlease
 	SHELL_PATH = /bin/bash
 	NO_STRCASESTR = YesPlease
 	ifeq ($(uname_R),5.8)
+		NEEDS_LIBICONV = YesPlease
 		NO_UNSETENV = YesPlease
 		NO_SETENV = YesPlease
 	endif
-- 
cgit v0.10.2-6-g49f6


From b992933853ccffac85f7e40310167ef7b8f0432e Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Date: Tue, 21 Feb 2006 13:08:21 +0100
Subject: Fix "gmake -j"

In my attempt to port git to IRIX, I broke it. Sorry.

Signed-off-by: Johannes E. Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/Makefile b/Makefile
index d5a95c4..c434e0d 100644
--- a/Makefile
+++ b/Makefile
@@ -515,15 +515,15 @@ git-ssh-upload$X: rsh.o
 git-ssh-pull$X: rsh.o fetch.o
 git-ssh-push$X: rsh.o
 
-git-http-fetch$X: fetch.o http.o http-fetch.o
+git-http-fetch$X: fetch.o http.o http-fetch.o $(LIB_FILE)
 	$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
 		$(LIBS) $(CURL_LIBCURL)
 
-git-http-push$X: http.o http-push.o
+git-http-push$X: http.o http-push.o $(LIB_FILE)
 	$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
 		$(LIBS) $(CURL_LIBCURL) $(EXPAT_LIBEXPAT)
 
-git-rev-list$X: rev-list.o
+git-rev-list$X: rev-list.o $(LIB_FILE)
 	$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
 		$(LIBS) $(OPENSSL_LIBSSL)
 
-- 
cgit v0.10.2-6-g49f6


From 60ace8790f3cfba4fdc16e71e23c4f9c44ce9b44 Mon Sep 17 00:00:00 2001
From: Carl Worth <cworth@cworth.org>
Date: Tue, 21 Feb 2006 15:33:49 -0800
Subject: git-add: Add support for --, documentation, and test.

This adds support to git-add to allow the common -- to separate
command-line options and file names. It adds documentation and a new
git-add test case as well.

[jc: this should apply to 1.2.X maintenance series, so I reworked
 git-ls-files --error-unmatch test. ]

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/Documentation/git-add.txt b/Documentation/git-add.txt
index 89e4614..7e29383 100644
--- a/Documentation/git-add.txt
+++ b/Documentation/git-add.txt
@@ -7,7 +7,7 @@ git-add - Add files to the index file.
 
 SYNOPSIS
 --------
-'git-add' [-n] [-v] <file>...
+'git-add' [-n] [-v] [--] <file>...
 
 DESCRIPTION
 -----------
@@ -26,6 +26,11 @@ OPTIONS
 -v::
         Be verbose.
 
+--::
+	This option can be used to separate command-line options from
+	the list of files, (useful when filenames might be mistaken
+	for command-line options).
+
 
 DISCUSSION
 ----------
diff --git a/git-add.sh b/git-add.sh
index f719b4b..611f152 100755
--- a/git-add.sh
+++ b/git-add.sh
@@ -14,6 +14,10 @@ while : ; do
     -v)
 	verbose=--verbose
 	;;
+    --)
+	shift
+	break
+	;;
     -*)
 	usage
 	;;
diff --git a/t/t3700-add.sh b/t/t3700-add.sh
new file mode 100755
index 0000000..6cd05c3
--- /dev/null
+++ b/t/t3700-add.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+#
+# Copyright (c) 2006 Carl D. Worth
+#
+
+test_description='Test of git-add, including the -- option.'
+
+. ./test-lib.sh
+
+test_expect_success \
+    'Test of git-add' \
+    'touch foo && git-add foo'
+
+test_expect_success \
+    'Post-check that foo is in the index' \
+    'git-ls-files foo | grep foo'
+
+test_expect_success \
+    'Test that "git-add -- -q" works' \
+    'touch -- -q && git-add -- -q'
+
+test_done
-- 
cgit v0.10.2-6-g49f6


From 69a60af5d0a84c4a7af907eba87f42dd6fb3c6e1 Mon Sep 17 00:00:00 2001
From: Carl Worth <cworth@cworth.org>
Date: Tue, 21 Feb 2006 17:10:12 -0800
Subject: git-rebase: Clarify usage statement and copy it into the actual
 documentation.

I found a paper thin man page for git-rebase, but was quite happy to
see something much more useful in the usage statement of the script
when I went there to find out how this thing worked. Here it is
cleaned up slightly and expanded a bit into the actual documentation.

Signed-off-by: Carl Worth <cworth@cworth.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/Documentation/git-rebase.txt b/Documentation/git-rebase.txt
index 16c158f..f037d12 100644
--- a/Documentation/git-rebase.txt
+++ b/Documentation/git-rebase.txt
@@ -7,14 +7,54 @@ git-rebase - Rebase local commits to new upstream head.
 
 SYNOPSIS
 --------
-'git-rebase' <upstream> [<head>]
+'git-rebase' [--onto <newbase>] <upstream> [<branch>]
 
 DESCRIPTION
 -----------
-Rebases local commits to the new head of the upstream tree.
+git-rebase applies to <upstream> (or optionally to <newbase>) commits
+from <branch> that do not appear in <upstream>. When <branch> is not
+specified it defaults to the current branch (HEAD).
+
+When git-rebase is complete, <branch> will be updated to point to the
+newly created line of commit objects, so the previous line will not be
+accessible unless there are other references to it already.
+
+Assume the following history exists and the current branch is "topic":
+
+          A---B---C topic
+         /
+    D---E---F---G master
+
+From this point, the result of the following commands:
+
+    git-rebase master
+    git-rebase master topic
+
+would be:
+
+                  A'--B'--C' topic
+                 /
+    D---E---F---G master
+
+While, starting from the same point, the result of the following
+commands:
+
+    git-rebase --onto master~1 master
+    git-rebase --onto master~1 master topic
+
+would be:
+
+              A'--B'--C' topic
+             /
+    D---E---F---G master
 
 OPTIONS
 -------
+<newbase>::
+	Starting point at which to create the new commits. If the
+	--onto option is not specified, the starting point is
+	<upstream>.
+
 <upstream>::
 	Upstream branch to compare against.
 
diff --git a/git-rebase.sh b/git-rebase.sh
index 21c3d83..211bf68 100755
--- a/git-rebase.sh
+++ b/git-rebase.sh
@@ -4,24 +4,28 @@
 #
 
 USAGE='[--onto <newbase>] <upstream> [<branch>]'
-LONG_USAGE='If <branch> is specified, switch to that branch first.  Then,
-extract commits in the current branch that are not in <upstream>,
-and reconstruct the current on top of <upstream>, discarding the original
-development history.  If --onto <newbase> is specified, the history is
-reconstructed on top of <newbase>, instead of <upstream>.  For example,
-while on "topic" branch:
+LONG_USAGE='git-rebase applies to <upstream> (or optionally to <newbase>) commits
+from <branch> that do not appear in <upstream>. When <branch> is not
+specified it defaults to the current branch (HEAD).
+
+When git-rebase is complete, <branch> will be updated to point to the
+newly created line of commit objects, so the previous line will not be
+accessible unless there are other references to it already.
+
+Assuming the following history:
 
           A---B---C topic
          /
     D---E---F---G master
 
-	$ '"$0"' --onto master~1 master topic
+The result of the following command:
 
-would rewrite the history to look like this:
+    git-rebase --onto master~1 master topic
 
+  would be:
 
-	      A'\''--B'\''--C'\'' topic
-	     /
+              A'\''--B'\''--C'\'' topic
+             /
     D---E---F---G master
 '
 
-- 
cgit v0.10.2-6-g49f6


From fab5de7936f0cc086836a38d2de4374c3df223b4 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Tue, 21 Feb 2006 18:13:32 -0800
Subject: format-patch: pretty-print timestamp correctly.

Perl is not C and does not truncate the division result.  Arghh!

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/git-format-patch.sh b/git-format-patch.sh
index e54c9e4..eb75de4 100755
--- a/git-format-patch.sh
+++ b/git-format-patch.sh
@@ -189,7 +189,7 @@ my @month_names = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
 sub show_date {
     my ($time, $tz) = @_;
     my $minutes = abs($tz);
-    $minutes = ($minutes / 100) * 60 + ($minutes % 100);
+    $minutes = int($minutes / 100) * 60 + ($minutes % 100);
     if ($tz < 0) {
         $minutes = -$minutes;
     }
-- 
cgit v0.10.2-6-g49f6


From 5508a616631fb41531b638f744bd92c701727014 Mon Sep 17 00:00:00 2001
From: Carl Worth <cworth@cworth.org>
Date: Fri, 17 Feb 2006 13:33:28 -0800
Subject: New test to verify that when git-clone fails it cleans up the new
 directory.

Signed-off-by: Carl Worth <cworth@cworth.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/t/t5600-clone-fail-cleanup.sh b/t/t5600-clone-fail-cleanup.sh
new file mode 100755
index 0000000..0c6a363
--- /dev/null
+++ b/t/t5600-clone-fail-cleanup.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+#
+# Copyright (C) 2006 Carl D. Worth <cworth@cworth.org>
+#
+
+test_description='test git-clone to cleanup after failure
+
+This test covers the fact that if git-clone fails, it should remove
+the directory it created, to avoid the user having to manually
+remove the directory before attempting a clone again.'
+
+. ./test-lib.sh
+
+test_expect_failure \
+    'clone of non-existent source should fail' \
+    'git-clone foo bar'
+
+test_expect_failure \
+    'failed clone should not leave a directory' \
+    'cd bar'
+
+# Need a repo to clone
+test_create_repo foo
+
+# clone doesn't like it if there is no HEAD. Is that a bug?
+(cd foo && touch file && git add file && git commit -m 'add file' >/dev/null 2>&1)
+
+test_expect_success \
+    'clone should work now that source exists' \
+    'git-clone foo bar'
+
+test_expect_success \
+    'successfull clone must leave the directory' \
+    'cd bar'
+
+test_done
-- 
cgit v0.10.2-6-g49f6


From d0080b3cdab3a309385ad3fb3b4d2f6a08212985 Mon Sep 17 00:00:00 2001
From: Jason Riedy <ejr@EECS.Berkeley.EDU>
Date: Tue, 21 Feb 2006 12:56:14 -0800
Subject: Fix typo in git-rebase.sh.

s/upsteram/upstream in git-rebase.sh.

Signed-off-by: Jason Riedy <ejr@cs.berkeley.edu>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/git-rebase.sh b/git-rebase.sh
index 211bf68..5956f06 100755
--- a/git-rebase.sh
+++ b/git-rebase.sh
@@ -75,7 +75,7 @@ esac
 # The upstream head must be given.  Make sure it is valid.
 upstream_name="$1"
 upstream=`git rev-parse --verify "${upstream_name}^0"` ||
-    die "invalid upsteram $upstream_name"
+    die "invalid upstream $upstream_name"
 
 # If a hook exists, give it a chance to interrupt
 if test -x "$GIT_DIR/hooks/pre-rebase"
-- 
cgit v0.10.2-6-g49f6


From c8af25ca0157146de59b992c1f961f1af8465995 Mon Sep 17 00:00:00 2001
From: Carl Worth <cworth@cworth.org>
Date: Tue, 21 Feb 2006 12:48:30 -0800
Subject: git-ls-files: Fix, document, and add test for --error-unmatch option.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/Documentation/git-ls-files.txt b/Documentation/git-ls-files.txt
index fe53412..28dc533 100644
--- a/Documentation/git-ls-files.txt
+++ b/Documentation/git-ls-files.txt
@@ -14,6 +14,7 @@ SYNOPSIS
 		[-x <pattern>|--exclude=<pattern>]
 		[-X <file>|--exclude-from=<file>]
 		[--exclude-per-directory=<file>] 
+		[--error-unmatch]
 		[--full-name] [--] [<file>]\*
 
 DESCRIPTION
@@ -72,6 +73,10 @@ OPTIONS
 	read additional exclude patterns that apply only to the
 	directory and its subdirectories in <file>.
 
+--error-unmatch::
+	If any <file> does not appear in the index, treat this as an
+	error (return 1).
+
 -t::
 	Identify the file status with the following tags (followed by
 	a space) at the start of each line:
diff --git a/ls-files.c b/ls-files.c
index df93cf2..27059e2 100644
--- a/ls-files.c
+++ b/ls-files.c
@@ -758,6 +758,7 @@ int main(int argc, const char **argv)
 				continue;
 			error("pathspec '%s' did not match any.",
 			      pathspec[num] + prefix_offset);
+			errors++;
 		}
 		return errors ? 1 : 0;
 	}
diff --git a/t/t3020-ls-files-error-unmatch.sh b/t/t3020-ls-files-error-unmatch.sh
new file mode 100755
index 0000000..d55559e
--- /dev/null
+++ b/t/t3020-ls-files-error-unmatch.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+#
+# Copyright (c) 2006 Carl D. Worth
+#
+
+test_description='git-ls-files test for --error-unmatch option
+
+This test runs git-ls-files --error-unmatch to ensure it correctly
+returns an error when a non-existent path is provided on the command
+line.
+'
+. ./test-lib.sh
+
+touch foo bar
+git-update-index --add foo bar
+git-commit -m "add foo bar"
+
+test_expect_failure \
+    'git-ls-files --error-unmatch should fail with unmatched path.' \
+    'git-ls-files --error-unmatch foo bar-does-not-match'
+
+test_expect_success \
+    'git-ls-files --error-unmatch should succeed eith matched paths.' \
+    'git-ls-files --error-unmatch foo bar'
+
+test_done
+1
-- 
cgit v0.10.2-6-g49f6


From 5301eee92ceb1c349ea8090a4d8aa3aa70c4abed Mon Sep 17 00:00:00 2001
From: "aneesh.kumar@gmail.com" <aneesh.kumar@gmail.com>
Date: Tue, 21 Feb 2006 16:00:04 +0530
Subject: gitview: Read tag and branch information using git ls-remote

This fix the below bug

Junio C Hamano <junkio@cox.net> writes:

>
> It does not work in my repository, since you do not seem to
> handle branch and tag names with slashes in them.  All of my
> topic branches live in directories with two-letter names
> (e.g. ak/gitview).

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@gmail.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/contrib/gitview/gitview b/contrib/gitview/gitview
index 5862fcc..b75b39e 100755
--- a/contrib/gitview/gitview
+++ b/contrib/gitview/gitview
@@ -56,20 +56,6 @@ def show_date(epoch, tz):
 
 	return time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(secs))
 
-def get_sha1_from_tags(line):
-	fp = os.popen("git cat-file -t " + line)
-	entry = string.strip(fp.readline())
-	fp.close()
-	if (entry == "commit"):
-		return line
-	elif (entry == "tag"):
-		fp = os.popen("git cat-file tag "+ line)
-		entry = string.strip(fp.readline())
-		fp.close()
-		obj = re.split(" ", entry)
-		if (obj[0] == "object"):
-			return obj[1]
-	return None
 
 class CellRendererGraph(gtk.GenericCellRenderer):
 	"""Cell renderer for directed graph.
@@ -465,32 +451,24 @@ class GitView:
 		respective sha1 details """
 
 		self.bt_sha1 = { }
+		ls_remote = re.compile('^(.{40})\trefs/([^^]+)(?:\\^(..))?$');
 		git_dir = os.getenv("GIT_DIR")
 		if (git_dir == None):
 			git_dir = ".git"
 
-		#FIXME the path seperator
-		ref_files = os.listdir(git_dir + "/refs/tags")
-		for file in ref_files:
-			fp = open(git_dir + "/refs/tags/"+file)
-			sha1 = get_sha1_from_tags(string.strip(fp.readline()))
-			try:
-				self.bt_sha1[sha1].append(file)
-			except KeyError:
-				self.bt_sha1[sha1] = [file]
-			fp.close()
-
-
-		#FIXME the path seperator
-		ref_files = os.listdir(git_dir + "/refs/heads")
-		for file in ref_files:
-			fp = open(git_dir + "/refs/heads/" + file)
-			sha1 = get_sha1_from_tags(string.strip(fp.readline()))
-			try:
-				self.bt_sha1[sha1].append(file)
-			except KeyError:
-				self.bt_sha1[sha1] = [file]
-			fp.close()
+		fp = os.popen('git ls-remote ' + git_dir)
+		while 1:
+			line = string.strip(fp.readline())
+			if line == '':
+				break
+			m = ls_remote.match(line)
+			if not m:
+				continue
+			(sha1, name) = (m.group(1), m.group(2))
+			if not self.bt_sha1.has_key(sha1):
+				self.bt_sha1[sha1] = []
+			self.bt_sha1[sha1].append(name)
+		fp.close()
 
 
 	def construct(self):
-- 
cgit v0.10.2-6-g49f6


From d800795613a710fb18353af53730e75185861f41 Mon Sep 17 00:00:00 2001
From: "aneesh.kumar@gmail.com" <aneesh.kumar@gmail.com>
Date: Tue, 21 Feb 2006 16:00:10 +0530
Subject: gitview: Use monospace font to draw the branch and tag name

This patch address the below:
Use monospace font to draw branch and tag name
set the font size to 13.
Make the graph column resizable. This helps to accommodate large tag names

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@gmail.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/contrib/gitview/gitview b/contrib/gitview/gitview
index b75b39e..5c338c0 100755
--- a/contrib/gitview/gitview
+++ b/contrib/gitview/gitview
@@ -160,9 +160,9 @@ class CellRendererGraph(gtk.GenericCellRenderer):
 		names_len = 0
 		if (len(names) != 0):
 			for item in names:
-				names_len += len(item)/3
+				names_len += len(item)
 
-		width = box_size * (cols + 1 + names_len )
+		width = box_size * (cols + 1 ) + names_len 
 		height = box_size
 
 		# FIXME I have no idea how to use cell_area properly
@@ -244,6 +244,8 @@ class CellRendererGraph(gtk.GenericCellRenderer):
 			for item in names:
 				name = name + item + " "
 
+			ctx.select_font_face("Monospace")
+			ctx.set_font_size(13)
 			ctx.text_path(name)
 
 		self.set_colour(ctx, colour, 0.0, 0.5)
@@ -515,8 +517,8 @@ class GitView:
 
 		cell = CellRendererGraph()
 		column = gtk.TreeViewColumn()
-		column.set_resizable(False)
-		column.pack_start(cell, expand=False)
+		column.set_resizable(True)
+		column.pack_start(cell, expand=True)
 		column.add_attribute(cell, "node", 1)
 		column.add_attribute(cell, "in-lines", 2)
 		column.add_attribute(cell, "out-lines", 3)
-- 
cgit v0.10.2-6-g49f6


From aa064743fa69e2806d5e0af1fab103baa6fa57cd Mon Sep 17 00:00:00 2001
From: Carl Worth <cworth@cworth.org>
Date: Tue, 21 Feb 2006 20:28:50 -0800
Subject: git-push: Update documentation to describe the no-refspec behavior.

It turns out that the git-push documentation didn't describe what it
would do when not given a refspec, (not on the command line, nor in a
remotes file). This is fairly important for the user who is trying to
understand operations such as:

	git clone git://something/some/where
	# hack, hack, hack
	git push origin

I tracked the mystery behavior down to git-send-pack and lifted the
relevant portion of its documentation up to git-push, (namely that all
refs existing both locally and remotely are updated).

Signed-off-by: Carl Worth <cworth@cworth.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/Documentation/git-push.txt b/Documentation/git-push.txt
index 5b89110..6f4a48a 100644
--- a/Documentation/git-push.txt
+++ b/Documentation/git-push.txt
@@ -43,6 +43,12 @@ to fast forward the remote ref that matches <dst>.  If
 the optional plus `+` is used, the remote ref is updated
 even if it does not result in a fast forward update.
 +
+Note: If no explicit refspec is found, (that is neither
+on the command line nor in any Push line of the
+corresponding remotes file---see below), then all the
+refs that exist both on the local side and on the remote
+side are updated.
++
 Some short-cut notations are also supported.
 +
 * `tag <tag>` means the same as `refs/tags/<tag>:refs/tags/<tag>`.
-- 
cgit v0.10.2-6-g49f6


From 50319850343dfe534939ee6b38507d5a8fc44b50 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Wed, 22 Feb 2006 01:27:02 -0800
Subject: rev-list.c: fix non-grammatical comments.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/rev-list.c b/rev-list.c
index f2d1105..ee5f15a 100644
--- a/rev-list.c
+++ b/rev-list.c
@@ -255,8 +255,8 @@ static void show_commit_list(struct commit_list *list)
 		die("unknown pending object %s (%s)", sha1_to_hex(obj->sha1), name);
 	}
 	while (objects) {
-		/* An object with name "foo\n0000000000000000000000000000000000000000"
-		 * can be used confuse downstream git-pack-objects very badly.
+		/* An object with name "foo\n0000000..." can be used to
+		 * confuse downstream git-pack-objects very badly.
 		 */
 		const char *ep = strchr(objects->name, '\n');
 		if (ep) {
-- 
cgit v0.10.2-6-g49f6


From 797656e58ddbd82ac461a5142ed726db3a4d0ac0 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Tue, 21 Feb 2006 18:59:37 -0800
Subject: send-pack: do not give up when remote has insanely large number of
 refs.

Stephen C. Tweedie noticed that we give up running rev-list when
we see too many refs on the remote side.  Limit the number of
negative references we give to rev-list and continue.

Not sending any negative references to rev-list is very bad --
we may be pushing a ref that is new to the other end.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/send-pack.c b/send-pack.c
index 990be3f..b58bbab 100644
--- a/send-pack.c
+++ b/send-pack.c
@@ -37,26 +37,44 @@ static void exec_pack_objects(void)
 
 static void exec_rev_list(struct ref *refs)
 {
+	struct ref *ref;
 	static char *args[1000];
-	int i = 0;
+	int i = 0, j;
 
 	args[i++] = "rev-list";	/* 0 */
 	args[i++] = "--objects";	/* 1 */
-	while (refs) {
-		char *buf = malloc(100);
-		if (i > 900)
+
+	/* First send the ones we care about most */
+	for (ref = refs; ref; ref = ref->next) {
+		if (900 < i)
 			die("git-rev-list environment overflow");
-		if (!is_zero_sha1(refs->old_sha1) &&
-		    has_sha1_file(refs->old_sha1)) {
+		if (!is_zero_sha1(ref->new_sha1)) {
+			char *buf = malloc(100);
 			args[i++] = buf;
-			snprintf(buf, 50, "^%s", sha1_to_hex(refs->old_sha1));
+			snprintf(buf, 50, "%s", sha1_to_hex(ref->new_sha1));
 			buf += 50;
+			if (!is_zero_sha1(ref->old_sha1) &&
+			    has_sha1_file(ref->old_sha1)) {
+				args[i++] = buf;
+				snprintf(buf, 50, "^%s",
+					 sha1_to_hex(ref->old_sha1));
+			}
 		}
-		if (!is_zero_sha1(refs->new_sha1)) {
+	}
+
+	/* Then a handful of the remainder
+	 * NEEDSWORK: we would be better off if used the newer ones first.
+	 */
+	for (ref = refs, j = i + 16;
+	     i < 900 && i < j && ref;
+	     ref = ref->next) {
+		if (is_zero_sha1(ref->new_sha1) &&
+		    !is_zero_sha1(ref->old_sha1) &&
+		    has_sha1_file(ref->old_sha1)) {
+			char *buf = malloc(42);
 			args[i++] = buf;
-			snprintf(buf, 50, "%s", sha1_to_hex(refs->new_sha1));
+			snprintf(buf, 42, "^%s", sha1_to_hex(ref->old_sha1));
 		}
-		refs = refs->next;
 	}
 	args[i] = NULL;
 	execv_git_cmd(args);
-- 
cgit v0.10.2-6-g49f6


From 4788d11a0d2ff872d25840768b2266e936a0b1fc Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Date: Mon, 20 Feb 2006 16:20:10 +0100
Subject: Use Ryan's git-annotate instead of jsannotate

Since Ryan's git-annotate is much faster, and has support for renames,
it is likely it goes into the mainstream git soon. Adapt it a little to
work with gitcvs, and actually use it.

Signed-off-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/git-annotate.perl b/git-annotate.perl
index 8f98431..3800c46 100755
--- a/git-annotate.perl
+++ b/git-annotate.perl
@@ -8,9 +8,25 @@
 
 use warnings;
 use strict;
+use Getopt::Std;
+use POSIX qw(strftime gmtime);
 
-my $filename = shift @ARGV;
+sub usage() {
+	print STDERR 'Usage: ${\basename $0} [-s] [-S revs-file] file
+
+	-l		show long rev
+	-r		follow renames
+	-S commit	use revs from revs-file instead of calling git-rev-list
+';
+
+	exit(1);
+}
 
+our ($opt_h, $opt_l, $opt_r, $opt_S);
+getopts("hlrS:") or usage();
+$opt_h && usage();
+
+my $filename = shift @ARGV;
 
 my @stack = (
 	{
@@ -41,12 +57,19 @@ while (my $bound = pop @stack) {
 		my ($rev, @parents) = @$revinst;
 		$head ||= $rev;
 
+		if (!defined($rev)) {
+			$rev = "";
+		}
 		$revs{$rev}{'filename'} = $bound->{'filename'};
 		if (scalar @parents > 0) {
 			$revs{$rev}{'parents'} = \@parents;
 			next;
 		}
 
+		if (!$opt_r) {
+			next;
+		}
+
 		my $newbound = find_parent_renames($rev, $bound->{'filename'});
 		if ( exists $newbound->{'filename'} && $newbound->{'filename'} ne $bound->{'filename'}) {
 			push @stack, $newbound;
@@ -65,7 +88,7 @@ foreach my $l (@filelines) {
 	my ($output, $rev, $committer, $date);
 	if (ref $l eq 'ARRAY') {
 		($output, $rev, $committer, $date) = @$l;
-		if (length($rev) > 8) {
+		if (!$opt_l && length($rev) > 8) {
 			$rev = substr($rev,0,8);
 		}
 	} else {
@@ -73,7 +96,8 @@ foreach my $l (@filelines) {
 		($rev, $committer, $date) = ('unknown', 'unknown', 'unknown');
 	}
 
-	printf("(%8s %10s %10s %d)%s\n", $rev, $committer, $date, $i++, $output);
+	printf("%s\t(%10s\t%10s\t%d)%s\n", $rev, $committer,
+		format_date($date), $i++, $output);
 }
 
 sub init_claim {
@@ -119,8 +143,12 @@ sub handle_rev {
 sub git_rev_list {
 	my ($rev, $file) = @_;
 
-	open(P,"-|","git-rev-list","--parents","--remove-empty",$rev,"--",$file)
-		or die "Failed to exec git-rev-list: $!";
+	if ($opt_S) {
+		open(P, '<' . $opt_S);
+	} else {
+		open(P,"-|","git-rev-list","--parents","--remove-empty",$rev,"--",$file)
+			or die "Failed to exec git-rev-list: $!";
+	}
 
 	my @revs;
 	while(my $line = <P>) {
@@ -319,3 +347,10 @@ sub git_commit_info {
 
 	return %info;
 }
+
+sub format_date {
+	my ($timestamp, $timezone) = split(' ', $_[0]);
+
+	return strftime("%Y-%m-%d %H:%M:%S " . $timezone, gmtime($timestamp));
+}
+
-- 
cgit v0.10.2-6-g49f6


From 3fda8c4cc74f884280f7472673b79a687b43f46c Mon Sep 17 00:00:00 2001
From: Martin Langhoff <martin@catalyst.net.nz>
Date: Wed, 22 Feb 2006 22:50:15 +1300
Subject: Introducing git-cvsserver -- a CVS emulator for git.

git-cvsserver is highly functional. However, not all methods are implemented,
and for those methods that are implemented, not all switches are implemented.
All the common read operations are implemented, and add/remove/commit are
supported.

Testing has been done using both the CLI CVS client, and the Eclipse CVS
plugin. Most functionality works fine with both of these clients.

Currently git-cvsserver only works over SSH connections, see the
Documentation for more details on how to configure your client. It
does not support pserver for anonymous access but it should not be
hard to implement. Anonymous access will need tighter input validation.

In our very informal tests, it seems to be significantly faster than a real
CVS server.

This utility depends on a version of git-cvsannotate that supports -S and on
DBD::SQLite.

Licensed under GPLv2. Copyright The Open University UK.

Authors: Martyn Smith <martyn@catalyst.net.nz>
         Martin Langhoff <martin@catalyst.net.nz>

Signed-off-by: Martin Langhoff <martin@catalyst.net.nz>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/Documentation/git-cvsserver.txt b/Documentation/git-cvsserver.txt
new file mode 100644
index 0000000..88f07ff
--- /dev/null
+++ b/Documentation/git-cvsserver.txt
@@ -0,0 +1,89 @@
+git-cvsserver(1)
+================
+
+NAME
+----
+git-cvsserver - A CVS server emulator for git
+
+
+SYNOPSIS
+--------
+[verse]
+export CVS_SERVER=git-cvsserver
+'cvs' -d :ext:user@server/path/repo.git co <HEAD_name>
+
+
+DESCRIPTION
+-----------
+
+This application is a CVS emulation layer for git.
+
+It is highly functional. However, not all methods are implemented,
+and for those methods that are implemented,
+not all switches are implemented.
+
+Testing has been done using both the CLI CVS client, and the Eclipse CVS
+plugin. Most functionality works fine with both of these clients.
+
+LIMITATIONS
+-----------
+Currently gitcvs only works over ssh connections.
+
+
+INSTALLATION
+------------
+1. Put server.pl somewhere useful on the same machine that is hosting your git repos
+
+2. For each repo that you want accessible from CVS you need to edit config in
+   the repo and add the following section.
+
+   [gitcvs]
+        enabled=1
+        logfile=/path/to/logfile
+
+   n.b. you need to ensure each user that is going to invoke server.pl has
+   write access to the log file.
+
+5. On each client machine you need to set the following variables.
+     CVSROOT should be set as per normal, but the directory should point at the
+             appropriate git repo.
+     CVS_SERVER should be set to the server.pl script that has been put on the
+                remote machine.
+
+6. Clients should now be able to check out modules (where modules are the names
+   of branches in git).
+     $ cvs co -d mylocaldir master
+
+Operations supported
+--------------------
+
+All the operations required for normal use are supported, including
+checkout, diff, status, update, log, add, remove, commit.
+Legacy monitoring operations are not supported (edit, watch and related).
+Exports and tagging (tags and branches) are not supported at this stage.
+
+The server will set the -k mode to binary when relevant. In proper GIT
+tradition, the contents of the files are always respected.
+No keyword expansion or newline munging is supported.
+
+Dependencies
+------------
+
+git-cvsserver depends on DBD::SQLite.
+
+Copyright and Authors
+---------------------
+
+This program is copyright The Open University UK - 2006.
+
+Authors: Martyn Smith    <martyn@catalyst.net.nz>
+         Martin Langhoff <martin@catalyst.net.nz>
+         with ideas and patches from participants of the git-list <git@vger.kernel.org>.
+
+Documentation
+--------------
+Documentation by Martyn Smith <martyn@catalyst.net.nz> and Martin Langhoff <martin@catalyst.net.nz>Matthias Urlichs <smurf@smurf.noris.de>.
+
+GIT
+---
+Part of the gitlink:git[7] suite
diff --git a/Makefile b/Makefile
index 0c04882..2d21d4d 100644
--- a/Makefile
+++ b/Makefile
@@ -130,6 +130,7 @@ SCRIPT_SH = \
 SCRIPT_PERL = \
 	git-archimport.perl git-cvsimport.perl git-relink.perl \
 	git-shortlog.perl git-fmt-merge-msg.perl git-rerere.perl \
+	git-cvsserver.perl \
 	git-svnimport.perl git-mv.perl git-cvsexportcommit.perl
 
 SCRIPT_PYTHON = \
diff --git a/git-cvsserver.perl b/git-cvsserver.perl
new file mode 100755
index 0000000..d20d1a8
--- /dev/null
+++ b/git-cvsserver.perl
@@ -0,0 +1,2449 @@
+#!/usr/bin/perl
+
+####
+#### This application is a CVS emulation layer for git.
+#### It is intended for clients to connect over SSH.
+#### See the documentation for more details.
+####
+#### Copyright The Open University UK - 2006.
+####
+#### Authors: Martyn Smith    <martyn@catalyst.net.nz>
+####          Martin Langhoff <martin@catalyst.net.nz>
+####
+####
+#### Released under the GNU Public License, version 2.
+####
+####
+
+use strict;
+use warnings;
+
+use Fcntl;
+use File::Temp qw/tempdir tempfile/;
+use File::Basename;
+
+my $log = GITCVS::log->new();
+my $cfg;
+
+my $DATE_LIST = {
+    Jan => "01",
+    Feb => "02",
+    Mar => "03",
+    Apr => "04",
+    May => "05",
+    Jun => "06",
+    Jul => "07",
+    Aug => "08",
+    Sep => "09",
+    Oct => "10",
+    Nov => "11",
+    Dec => "12",
+};
+
+# Enable autoflush for STDOUT (otherwise the whole thing falls apart)
+$| = 1;
+
+#### Definition and mappings of functions ####
+
+my $methods = {
+    'Root'            => \&req_Root,
+    'Valid-responses' => \&req_Validresponses,
+    'valid-requests'  => \&req_validrequests,
+    'Directory'       => \&req_Directory,
+    'Entry'           => \&req_Entry,
+    'Modified'        => \&req_Modified,
+    'Unchanged'       => \&req_Unchanged,
+    'Argument'        => \&req_Argument,
+    'Argumentx'       => \&req_Argument,
+    'expand-modules'  => \&req_expandmodules,
+    'add'             => \&req_add,
+    'remove'          => \&req_remove,
+    'co'              => \&req_co,
+    'update'          => \&req_update,
+    'ci'              => \&req_ci,
+    'diff'            => \&req_diff,
+    'log'             => \&req_log,
+    'tag'             => \&req_CATCHALL,
+    'status'          => \&req_status,
+    'admin'           => \&req_CATCHALL,
+    'history'         => \&req_CATCHALL,
+    'watchers'        => \&req_CATCHALL,
+    'editors'         => \&req_CATCHALL,
+    'annotate'        => \&req_annotate,
+    'Global_option'   => \&req_Globaloption,
+    #'annotate'        => \&req_CATCHALL,
+};
+
+##############################################
+
+
+# $state holds all the bits of information the clients sends us that could
+# potentially be useful when it comes to actually _doing_ something.
+my $state = {};
+$log->info("--------------- STARTING -----------------");
+
+my $TEMP_DIR = tempdir( CLEANUP => 1 );
+$log->debug("Temporary directory is '$TEMP_DIR'");
+
+# Keep going until the client closes the connection
+while (<STDIN>)
+{
+    chomp;
+
+    # Check to see if we've seen this method, and call appropiate function.
+    if ( /^([\w-]+)(?:\s+(.*))?$/ and defined($methods->{$1}) )
+    {
+        # use the $methods hash to call the appropriate sub for this command
+        #$log->info("Method : $1");
+        &{$methods->{$1}}($1,$2);
+    } else {
+        # log fatal because we don't understand this function. If this happens
+        # we're fairly screwed because we don't know if the client is expecting
+        # a response. If it is, the client will hang, we'll hang, and the whole
+        # thing will be custard.
+        $log->fatal("Don't understand command $_\n");
+        die("Unknown command $_");
+    }
+}
+
+$log->debug("Processing time : user=" . (times)[0] . " system=" . (times)[1]);
+$log->info("--------------- FINISH -----------------");
+
+# Magic catchall method.
+#    This is the method that will handle all commands we haven't yet
+#    implemented. It simply sends a warning to the log file indicating a
+#    command that hasn't been implemented has been invoked.
+sub req_CATCHALL
+{
+    my ( $cmd, $data ) = @_;
+    $log->warn("Unhandled command : req_$cmd : $data");
+}
+
+
+# Root pathname \n
+#     Response expected: no. Tell the server which CVSROOT to use. Note that
+#     pathname is a local directory and not a fully qualified CVSROOT variable.
+#     pathname must already exist; if creating a new root, use the init
+#     request, not Root. pathname does not include the hostname of the server,
+#     how to access the server, etc.; by the time the CVS protocol is in use,
+#     connection, authentication, etc., are already taken care of. The Root
+#     request must be sent only once, and it must be sent before any requests
+#     other than Valid-responses, valid-requests, UseUnchanged, Set or init.
+sub req_Root
+{
+    my ( $cmd, $data ) = @_;
+    $log->debug("req_Root : $data");
+
+    $state->{CVSROOT} = $data;
+
+    $ENV{GIT_DIR} = $state->{CVSROOT} . "/";
+
+    foreach my $line ( `git-var -l` )
+    {
+        next unless ( $line =~ /^(.*?)\.(.*?)=(.*)$/ );
+        $cfg->{$1}{$2} = $3;
+    }
+
+    unless ( defined ( $cfg->{gitcvs}{enabled} ) and $cfg->{gitcvs}{enabled} =~ /^\s*(1|true|yes)\s*$/i )
+    {
+        print "E GITCVS emulation needs to be enabled on this repo\n";
+        print "E the repo config file needs a [gitcvs] section added, and the parameter 'enabled' set to 1\n";
+        print "E \n";
+        print "error 1 GITCVS emulation disabled\n";
+    }
+
+    if ( defined ( $cfg->{gitcvs}{logfile} ) )
+    {
+        $log->setfile($cfg->{gitcvs}{logfile});
+    } else {
+        $log->nofile();
+    }
+}
+
+# Global_option option \n
+#     Response expected: no. Transmit one of the global options `-q', `-Q',
+#     `-l', `-t', `-r', or `-n'. option must be one of those strings, no
+#     variations (such as combining of options) are allowed. For graceful
+#     handling of valid-requests, it is probably better to make new global
+#     options separate requests, rather than trying to add them to this
+#     request.
+sub req_Globaloption
+{
+    my ( $cmd, $data ) = @_;
+    $log->debug("req_Globaloption : $data");
+
+    # TODO : is this data useful ???
+}
+
+# Valid-responses request-list \n
+#     Response expected: no. Tell the server what responses the client will
+#     accept. request-list is a space separated list of tokens.
+sub req_Validresponses
+{
+    my ( $cmd, $data ) = @_;
+    $log->debug("req_Validrepsonses : $data");
+
+    # TODO : re-enable this, currently it's not particularly useful
+    #$state->{validresponses} = [ split /\s+/, $data ];
+}
+
+# valid-requests \n
+#     Response expected: yes. Ask the server to send back a Valid-requests
+#     response.
+sub req_validrequests
+{
+    my ( $cmd, $data ) = @_;
+
+    $log->debug("req_validrequests");
+
+    $log->debug("SEND : Valid-requests " . join(" ",keys %$methods));
+    $log->debug("SEND : ok");
+
+    print "Valid-requests " . join(" ",keys %$methods) . "\n";
+    print "ok\n";
+}
+
+# Directory local-directory \n
+#     Additional data: repository \n. Response expected: no. Tell the server
+#     what directory to use. The repository should be a directory name from a
+#     previous server response. Note that this both gives a default for Entry
+#     and Modified and also for ci and the other commands; normal usage is to
+#     send Directory for each directory in which there will be an Entry or
+#     Modified, and then a final Directory for the original directory, then the
+#     command. The local-directory is relative to the top level at which the
+#     command is occurring (i.e. the last Directory which is sent before the
+#     command); to indicate that top level, `.' should be sent for
+#     local-directory.
+sub req_Directory
+{
+    my ( $cmd, $data ) = @_;
+
+    my $repository = <STDIN>;
+    chomp $repository;
+
+
+    $state->{localdir} = $data;
+    $state->{repository} = $repository;
+    $state->{directory} = $repository;
+    $state->{directory} =~ s/^$state->{CVSROOT}\///;
+    $state->{module} = $1 if ($state->{directory} =~ s/^(.*?)(\/|$)//);
+    $state->{directory} .= "/" if ( $state->{directory} =~ /\S/ );
+
+    $log->debug("req_Directory : localdir=$data repository=$repository directory=$state->{directory} module=$state->{module}");
+}
+
+# Entry entry-line \n
+#     Response expected: no. Tell the server what version of a file is on the
+#     local machine. The name in entry-line is a name relative to the directory
+#     most recently specified with Directory. If the user is operating on only
+#     some files in a directory, Entry requests for only those files need be
+#     included. If an Entry request is sent without Modified, Is-modified, or
+#     Unchanged, it means the file is lost (does not exist in the working
+#     directory). If both Entry and one of Modified, Is-modified, or Unchanged
+#     are sent for the same file, Entry must be sent first. For a given file,
+#     one can send Modified, Is-modified, or Unchanged, but not more than one
+#     of these three.
+sub req_Entry
+{
+    my ( $cmd, $data ) = @_;
+
+    $log->debug("req_Entry : $data");
+
+    my @data = split(/\//, $data);
+
+    $state->{entries}{$state->{directory}.$data[1]} = {
+        revision    => $data[2],
+        conflict    => $data[3],
+        options     => $data[4],
+        tag_or_date => $data[5],
+    };
+}
+
+# add \n
+#     Response expected: yes. Add a file or directory. This uses any previous
+#     Argument, Directory, Entry, or Modified requests, if they have been sent.
+#     The last Directory sent specifies the working directory at the time of
+#     the operation. To add a directory, send the directory to be added using
+#     Directory and Argument requests.
+sub req_add
+{
+    my ( $cmd, $data ) = @_;
+
+    argsplit("add");
+
+    my $addcount = 0;
+
+    foreach my $filename ( @{$state->{args}} )
+    {
+        $filename = filecleanup($filename);
+
+        unless ( defined ( $state->{entries}{$filename}{modified_filename} ) )
+        {
+            print "E cvs add: nothing known about `$filename'\n";
+            next;
+        }
+        # TODO : check we're not squashing an already existing file
+        if ( defined ( $state->{entries}{$filename}{revision} ) )
+        {
+            print "E cvs add: `$filename' has already been entered\n";
+            next;
+        }
+
+
+        my ( $filepart, $dirpart ) = filenamesplit($filename);
+
+        print "E cvs add: scheduling file `$filename' for addition\n";
+
+        print "Checked-in $dirpart\n";
+        print "$filename\n";
+        print "/$filepart/0///\n";
+
+        $addcount++;
+    }
+
+    if ( $addcount == 1 )
+    {
+        print "E cvs add: use `cvs commit' to add this file permanently\n";
+    }
+    elsif ( $addcount > 1 )
+    {
+        print "E cvs add: use `cvs commit' to add these files permanently\n";
+    }
+
+    print "ok\n";
+}
+
+# remove \n
+#     Response expected: yes. Remove a file. This uses any previous Argument,
+#     Directory, Entry, or Modified requests, if they have been sent. The last
+#     Directory sent specifies the working directory at the time of the
+#     operation. Note that this request does not actually do anything to the
+#     repository; the only effect of a successful remove request is to supply
+#     the client with a new entries line containing `-' to indicate a removed
+#     file. In fact, the client probably could perform this operation without
+#     contacting the server, although using remove may cause the server to
+#     perform a few more checks. The client sends a subsequent ci request to
+#     actually record the removal in the repository.
+sub req_remove
+{
+    my ( $cmd, $data ) = @_;
+
+    argsplit("remove");
+
+    # Grab a handle to the SQLite db and do any necessary updates
+    my $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
+    $updater->update();
+
+    #$log->debug("add state : " . Dumper($state));
+
+    my $rmcount = 0;
+
+    foreach my $filename ( @{$state->{args}} )
+    {
+        $filename = filecleanup($filename);
+
+        if ( defined ( $state->{entries}{$filename}{unchanged} ) or defined ( $state->{entries}{$filename}{modified_filename} ) )
+        {
+            print "E cvs remove: file `$filename' still in working directory\n";
+            next;
+        }
+
+        my $meta = $updater->getmeta($filename);
+        my $wrev = revparse($filename);
+
+        unless ( defined ( $wrev ) )
+        {
+            print "E cvs remove: nothing known about `$filename'\n";
+            next;
+        }
+
+        if ( defined($wrev) and $wrev < 0 )
+        {
+            print "E cvs remove: file `$filename' already scheduled for removal\n";
+            next;
+        }
+
+        unless ( $wrev == $meta->{revision} )
+        {
+            # TODO : not sure if the format of this message is quite correct.
+            print "E cvs remove: Up to date check failed for `$filename'\n";
+            next;
+        }
+
+
+        my ( $filepart, $dirpart ) = filenamesplit($filename);
+
+        print "E cvs remove: scheduling `$filename' for removal\n";
+
+        print "Checked-in $dirpart\n";
+        print "$filename\n";
+        print "/$filepart/-1.$wrev///\n";
+
+        $rmcount++;
+    }
+
+    if ( $rmcount == 1 )
+    {
+        print "E cvs remove: use `cvs commit' to remove this file permanently\n";
+    }
+    elsif ( $rmcount > 1 )
+    {
+        print "E cvs remove: use `cvs commit' to remove these files permanently\n";
+    }
+
+    print "ok\n";
+}
+
+# Modified filename \n
+#     Response expected: no. Additional data: mode, \n, file transmission. Send
+#     the server a copy of one locally modified file. filename is a file within
+#     the most recent directory sent with Directory; it must not contain `/'.
+#     If the user is operating on only some files in a directory, only those
+#     files need to be included. This can also be sent without Entry, if there
+#     is no entry for the file.
+sub req_Modified
+{
+    my ( $cmd, $data ) = @_;
+
+    my $mode = <STDIN>;
+    chomp $mode;
+    my $size = <STDIN>;
+    chomp $size;
+
+    # Grab config information
+    my $blocksize = 8192;
+    my $bytesleft = $size;
+    my $tmp;
+
+    # Get a filehandle/name to write it to
+    my ( $fh, $filename ) = tempfile( DIR => $TEMP_DIR );
+
+    # Loop over file data writing out to temporary file.
+    while ( $bytesleft )
+    {
+        $blocksize = $bytesleft if ( $bytesleft < $blocksize );
+        read STDIN, $tmp, $blocksize;
+        print $fh $tmp;
+        $bytesleft -= $blocksize;
+    }
+
+    close $fh;
+
+    # Ensure we have something sensible for the file mode
+    if ( $mode =~ /u=(\w+)/ )
+    {
+        $mode = $1;
+    } else {
+        $mode = "rw";
+    }
+
+    # Save the file data in $state
+    $state->{entries}{$state->{directory}.$data}{modified_filename} = $filename;
+    $state->{entries}{$state->{directory}.$data}{modified_mode} = $mode;
+    $state->{entries}{$state->{directory}.$data}{modified_hash} = `git-hash-object $filename`;
+    $state->{entries}{$state->{directory}.$data}{modified_hash} =~ s/\s.*$//s;
+
+    #$log->debug("req_Modified : file=$data mode=$mode size=$size");
+}
+
+# Unchanged filename \n
+#     Response expected: no. Tell the server that filename has not been
+#     modified in the checked out directory. The filename is a file within the
+#     most recent directory sent with Directory; it must not contain `/'.
+sub req_Unchanged
+{
+    my ( $cmd, $data ) = @_;
+
+    $state->{entries}{$state->{directory}.$data}{unchanged} = 1;
+
+    #$log->debug("req_Unchanged : $data");
+}
+
+# Argument text \n
+#     Response expected: no. Save argument for use in a subsequent command.
+#     Arguments accumulate until an argument-using command is given, at which
+#     point they are forgotten.
+# Argumentx text \n
+#     Response expected: no. Append \n followed by text to the current argument
+#     being saved.
+sub req_Argument
+{
+    my ( $cmd, $data ) = @_;
+
+    # TODO :  Not quite sure how Argument and Argumentx differ, but I assume
+    # it's for multi-line arguments ... somehow ...
+
+    $log->debug("$cmd : $data");
+
+    push @{$state->{arguments}}, $data;
+}
+
+# expand-modules \n
+#     Response expected: yes. Expand the modules which are specified in the
+#     arguments. Returns the data in Module-expansion responses. Note that the
+#     server can assume that this is checkout or export, not rtag or rdiff; the
+#     latter do not access the working directory and thus have no need to
+#     expand modules on the client side. Expand may not be the best word for
+#     what this request does. It does not necessarily tell you all the files
+#     contained in a module, for example. Basically it is a way of telling you
+#     which working directories the server needs to know about in order to
+#     handle a checkout of the specified modules. For example, suppose that the
+#     server has a module defined by
+#   aliasmodule -a 1dir
+#     That is, one can check out aliasmodule and it will take 1dir in the
+#     repository and check it out to 1dir in the working directory. Now suppose
+#     the client already has this module checked out and is planning on using
+#     the co request to update it. Without using expand-modules, the client
+#     would have two bad choices: it could either send information about all
+#     working directories under the current directory, which could be
+#     unnecessarily slow, or it could be ignorant of the fact that aliasmodule
+#     stands for 1dir, and neglect to send information for 1dir, which would
+#     lead to incorrect operation. With expand-modules, the client would first
+#     ask for the module to be expanded:
+sub req_expandmodules
+{
+    my ( $cmd, $data ) = @_;
+
+    argsplit();
+
+    $log->debug("req_expandmodules : " . ( defined($data) ? $data : "[NULL]" ) );
+
+    unless ( ref $state->{arguments} eq "ARRAY" )
+    {
+        print "ok\n";
+        return;
+    }
+
+    foreach my $module ( @{$state->{arguments}} )
+    {
+        $log->debug("SEND : Module-expansion $module");
+        print "Module-expansion $module\n";
+    }
+
+    print "ok\n";
+    statecleanup();
+}
+
+# co \n
+#     Response expected: yes. Get files from the repository. This uses any
+#     previous Argument, Directory, Entry, or Modified requests, if they have
+#     been sent. Arguments to this command are module names; the client cannot
+#     know what directories they correspond to except by (1) just sending the
+#     co request, and then seeing what directory names the server sends back in
+#     its responses, and (2) the expand-modules request.
+sub req_co
+{
+    my ( $cmd, $data ) = @_;
+
+    argsplit("co");
+
+    my $module = $state->{args}[0];
+    my $checkout_path = $module;
+
+    # use the user specified directory if we're given it
+    $checkout_path = $state->{opt}{d} if ( exists ( $state->{opt}{d} ) );
+
+    $log->debug("req_co : " . ( defined($data) ? $data : "[NULL]" ) );
+
+    $log->info("Checking out module '$module' ($state->{CVSROOT}) to '$checkout_path'");
+
+    $ENV{GIT_DIR} = $state->{CVSROOT} . "/";
+
+    # Grab a handle to the SQLite db and do any necessary updates
+    my $updater = GITCVS::updater->new($state->{CVSROOT}, $module, $log);
+    $updater->update();
+
+    # instruct the client that we're checking out to $checkout_path
+    print "E cvs server: updating $checkout_path\n";
+
+    foreach my $git ( @{$updater->gethead} )
+    {
+        # Don't want to check out deleted files
+        next if ( $git->{filehash} eq "deleted" );
+
+        ( $git->{name}, $git->{dir} ) = filenamesplit($git->{name});
+
+        # modification time of this file
+        print "Mod-time $git->{modified}\n";
+
+        # print some information to the client
+        print "MT +updated\n";
+        print "MT text U\n";
+        if ( defined ( $git->{dir} ) and $git->{dir} ne "./" )
+        {
+            print "MT fname $checkout_path/$git->{dir}$git->{name}\n";
+        } else {
+            print "MT fname $checkout_path/$git->{name}\n";
+        }
+        print "MT newline\n";
+        print "MT -updated\n";
+
+        # instruct client we're sending a file to put in this path
+        print "Created $checkout_path/" . ( defined ( $git->{dir} ) ? $git->{dir} . "/" : "" ) . "\n";
+
+        print $state->{CVSROOT} . "/$module/" . ( defined ( $git->{dir} ) ? $git->{dir} . "/" : "" ) . "$git->{name}\n";
+
+        # this is an "entries" line
+        print "/$git->{name}/1.$git->{revision}///\n";
+        # permissions
+        print "u=$git->{mode},g=$git->{mode},o=$git->{mode}\n";
+
+        # transmit file
+        transmitfile($git->{filehash});
+    }
+
+    print "ok\n";
+
+    statecleanup();
+}
+
+# update \n
+#     Response expected: yes. Actually do a cvs update command. This uses any
+#     previous Argument, Directory, Entry, or Modified requests, if they have
+#     been sent. The last Directory sent specifies the working directory at the
+#     time of the operation. The -I option is not used--files which the client
+#     can decide whether to ignore are not mentioned and the client sends the
+#     Questionable request for others.
+sub req_update
+{
+    my ( $cmd, $data ) = @_;
+
+    $log->debug("req_update : " . ( defined($data) ? $data : "[NULL]" ));
+
+    argsplit("update");
+
+    # Grab a handle to the SQLite db and do any necessary updates
+    my $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
+
+    $updater->update();
+
+    # if no files were specified, we need to work out what files we should be providing status on ...
+    argsfromdir($updater) if ( scalar ( @{$state->{args}} ) == 0 );
+
+    #$log->debug("update state : " . Dumper($state));
+
+    # foreach file specified on the commandline ...
+    foreach my $filename ( @{$state->{args}} )
+    {
+        $filename = filecleanup($filename);
+
+        # if we have a -C we should pretend we never saw modified stuff
+        if ( exists ( $state->{opt}{C} ) )
+        {
+            delete $state->{entries}{$filename}{modified_hash};
+            delete $state->{entries}{$filename}{modified_filename};
+            $state->{entries}{$filename}{unchanged} = 1;
+        }
+
+        my $meta;
+        if ( defined($state->{opt}{r}) and $state->{opt}{r} =~ /^1\.(\d+)/ )
+        {
+            $meta = $updater->getmeta($filename, $1);
+        } else {
+            $meta = $updater->getmeta($filename);
+        }
+
+        next unless ( $meta->{revision} );
+
+        my $oldmeta = $meta;
+
+        my $wrev = revparse($filename);
+
+        # If the working copy is an old revision, lets get that version too for comparison.
+        if ( defined($wrev) and $wrev != $meta->{revision} )
+        {
+            $oldmeta = $updater->getmeta($filename, $wrev);
+        }
+
+        #$log->debug("Target revision is $meta->{revision}, current working revision is $wrev");
+
+        # Files are up to date if the working copy and repo copy have the same revision, and the working copy is unmodified _and_ the user hasn't specified -C
+        next if ( defined ( $wrev ) and defined($meta->{revision}) and $wrev == $meta->{revision} and $state->{entries}{$filename}{unchanged} and not exists ( $state->{opt}{C} ) );
+
+        if ( $meta->{filehash} eq "deleted" )
+        {
+            my ( $filepart, $dirpart ) = filenamesplit($filename);
+
+            $log->info("Removing '$filename' from working copy (no longer in the repo)");
+
+            print "E cvs update: `$filename' is no longer in the repository\n";
+            print "Removed $dirpart\n";
+            print "$filepart\n";
+        }
+        elsif ( not defined ( $state->{entries}{$filename}{modified_hash} ) or $state->{entries}{$filename}{modified_hash} eq $oldmeta->{filehash} )
+        {
+            $log->info("Updating '$filename'");
+            # normal update, just send the new revision (either U=Update, or A=Add, or R=Remove)
+            print "MT +updated\n";
+            print "MT text U\n";
+            print "MT fname $filename\n";
+            print "MT newline\n";
+            print "MT -updated\n";
+
+            my ( $filepart, $dirpart ) = filenamesplit($filename);
+            $dirpart =~ s/^$state->{directory}//;
+
+            if ( defined ( $wrev ) )
+            {
+                # instruct client we're sending a file to put in this path as a replacement
+                print "Update-existing $dirpart\n";
+                $log->debug("Updating existing file 'Update-existing $dirpart'");
+            } else {
+                # instruct client we're sending a file to put in this path as a new file
+                print "Created $dirpart\n";
+                $log->debug("Creating new file 'Created $dirpart'");
+            }
+            print $state->{CVSROOT} . "/$state->{module}/$filename\n";
+
+            # this is an "entries" line
+            $log->debug("/$filepart/1.$meta->{revision}///");
+            print "/$filepart/1.$meta->{revision}///\n";
+
+            # permissions
+            $log->debug("SEND : u=$meta->{mode},g=$meta->{mode},o=$meta->{mode}");
+            print "u=$meta->{mode},g=$meta->{mode},o=$meta->{mode}\n";
+
+            # transmit file
+            transmitfile($meta->{filehash});
+        } else {
+            my ( $filepart, $dirpart ) = filenamesplit($meta->{name});
+
+            my $dir = tempdir( DIR => $TEMP_DIR, CLEANUP => 1 ) . "/";
+
+            chdir $dir;
+            my $file_local = $filepart . ".mine";
+            system("ln","-s",$state->{entries}{$filename}{modified_filename}, $file_local);
+            my $file_old = $filepart . "." . $oldmeta->{revision};
+            transmitfile($oldmeta->{filehash}, $file_old);
+            my $file_new = $filepart . "." . $meta->{revision};
+            transmitfile($meta->{filehash}, $file_new);
+
+            # we need to merge with the local changes ( M=successful merge, C=conflict merge )
+            $log->info("Merging $file_local, $file_old, $file_new");
+
+            $log->debug("Temporary directory for merge is $dir");
+
+            my $return = system("merge", $file_local, $file_old, $file_new);
+            $return >>= 8;
+
+            if ( $return == 0 )
+            {
+                $log->info("Merged successfully");
+                print "M M $filename\n";
+                $log->debug("Update-existing $dirpart");
+                print "Update-existing $dirpart\n";
+                $log->debug($state->{CVSROOT} . "/$state->{module}/$filename");
+                print $state->{CVSROOT} . "/$state->{module}/$filename\n";
+                $log->debug("/$filepart/1.$meta->{revision}///");
+                print "/$filepart/1.$meta->{revision}///\n";
+            }
+            elsif ( $return == 1 )
+            {
+                $log->info("Merged with conflicts");
+                print "M C $filename\n";
+                print "Update-existing $dirpart\n";
+                print $state->{CVSROOT} . "/$state->{module}/$filename\n";
+                print "/$filepart/1.$meta->{revision}/+//\n";
+            }
+            else
+            {
+                $log->warn("Merge failed");
+                next;
+            }
+
+            # permissions
+            $log->debug("SEND : u=$meta->{mode},g=$meta->{mode},o=$meta->{mode}");
+            print "u=$meta->{mode},g=$meta->{mode},o=$meta->{mode}\n";
+
+            # transmit file, format is single integer on a line by itself (file
+            # size) followed by the file contents
+            # TODO : we should copy files in blocks
+            my $data = `cat $file_local`;
+            $log->debug("File size : " . length($data));
+            print length($data) . "\n";
+            print $data;
+
+            chdir "/";
+        }
+
+    }
+
+    print "ok\n";
+}
+
+sub req_ci
+{
+    my ( $cmd, $data ) = @_;
+
+    argsplit("ci");
+
+    #$log->debug("State : " . Dumper($state));
+
+    $log->info("req_ci : " . ( defined($data) ? $data : "[NULL]" ));
+
+    if ( -e $state->{CVSROOT} . "/index" )
+    {
+        print "error 1 Index already exists in git repo\n";
+        exit;
+    }
+
+    my $lockfile = "$state->{CVSROOT}/refs/heads/$state->{module}.lock";
+    unless ( sysopen(LOCKFILE,$lockfile,O_EXCL|O_CREAT|O_WRONLY) )
+    {
+        print "error 1 Lock file '$lockfile' already exists, please try again\n";
+        exit;
+    }
+
+    # Grab a handle to the SQLite db and do any necessary updates
+    my $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
+    $updater->update();
+
+    my $tmpdir = tempdir ( DIR => $TEMP_DIR );
+    my ( undef, $file_index ) = tempfile ( DIR => $TEMP_DIR, OPEN => 0 );
+    $log->info("Lock successful, basing commit on '$tmpdir', index file is '$file_index'");
+
+    $ENV{GIT_DIR} = $state->{CVSROOT} . "/";
+    $ENV{GIT_INDEX_FILE} = $file_index;
+
+    chdir $tmpdir;
+
+    # populate the temporary index based
+    system("git-read-tree", $state->{module});
+    unless ($? == 0)
+    {
+	die "Error running git-read-tree $state->{module} $file_index $!";
+    }
+    $log->info("Created index '$file_index' with for head $state->{module} - exit status $?");
+
+
+    my @committedfiles = ();
+
+    # foreach file specified on the commandline ...
+    foreach my $filename ( @{$state->{args}} )
+    {
+        $filename = filecleanup($filename);
+
+        next unless ( exists $state->{entries}{$filename}{modified_filename} or not $state->{entries}{$filename}{unchanged} );
+
+        my $meta = $updater->getmeta($filename);
+
+        my $wrev = revparse($filename);
+
+        my ( $filepart, $dirpart ) = filenamesplit($filename);
+
+        # do a checkout of the file if it part of this tree
+        if ($wrev) {
+            system('git-checkout-index', '-f', '-u', $filename);
+            unless ($? == 0) {
+                die "Error running git-checkout-index -f -u $filename : $!";
+            }
+        }
+
+        my $addflag = 0;
+        my $rmflag = 0;
+        $rmflag = 1 if ( defined($wrev) and $wrev < 0 );
+        $addflag = 1 unless ( -e $filename );
+
+        # Do up to date checking
+        unless ( $addflag or $wrev == $meta->{revision} or ( $rmflag and -$wrev == $meta->{revision} ) )
+        {
+            # fail everything if an up to date check fails
+            print "error 1 Up to date check failed for $filename\n";
+            close LOCKFILE;
+            unlink($lockfile);
+            chdir "/";
+            exit;
+        }
+
+        push @committedfiles, $filename;
+        $log->info("Committing $filename");
+
+        system("mkdir","-p",$dirpart) unless ( -d $dirpart );
+
+        unless ( $rmflag )
+        {
+            $log->debug("rename $state->{entries}{$filename}{modified_filename} $filename");
+            rename $state->{entries}{$filename}{modified_filename},$filename;
+
+            # Calculate modes to remove
+            my $invmode = "";
+            foreach ( qw (r w x) ) { $invmode .= $_ unless ( $state->{entries}{$filename}{modified_mode} =~ /$_/ ); }
+
+            $log->debug("chmod u+" . $state->{entries}{$filename}{modified_mode} . "-" . $invmode . " $filename");
+            system("chmod","u+" .  $state->{entries}{$filename}{modified_mode} . "-" . $invmode, $filename);
+        }
+
+        if ( $rmflag )
+        {
+            $log->info("Removing file '$filename'");
+            unlink($filename);
+            system("git-update-index", "--remove", $filename);
+        }
+        elsif ( $addflag )
+        {
+            $log->info("Adding file '$filename'");
+            system("git-update-index", "--add", $filename);
+        } else {
+            $log->info("Updating file '$filename'");
+            system("git-update-index", $filename);
+        }
+    }
+
+    unless ( scalar(@committedfiles) > 0 )
+    {
+        print "E No files to commit\n";
+        print "ok\n";
+        close LOCKFILE;
+        unlink($lockfile);
+        chdir "/";
+        return;
+    }
+
+    my $treehash = `git-write-tree`;
+    my $parenthash = `cat $ENV{GIT_DIR}refs/heads/$state->{module}`;
+    chomp $treehash;
+    chomp $parenthash;
+
+    $log->debug("Treehash : $treehash, Parenthash : $parenthash");
+
+    # write our commit message out if we have one ...
+    my ( $msg_fh, $msg_filename ) = tempfile( DIR => $TEMP_DIR );
+    print $msg_fh $state->{opt}{m};# if ( exists ( $state->{opt}{m} ) );
+    print $msg_fh "\n\nvia git-CVS emulator\n";
+    close $msg_fh;
+
+    my $commithash = `git-commit-tree $treehash -p $parenthash < $msg_filename`;
+    $log->info("Commit hash : $commithash");
+
+    unless ( $commithash =~ /[a-zA-Z0-9]{40}/ )
+    {
+        $log->warn("Commit failed (Invalid commit hash)");
+        print "error 1 Commit failed (unknown reason)\n";
+        close LOCKFILE;
+        unlink($lockfile);
+        chdir "/";
+        exit;
+    }
+
+    open FILE, ">", "$ENV{GIT_DIR}refs/heads/$state->{module}";
+    print FILE $commithash;
+    close FILE;
+
+    $updater->update();
+
+    # foreach file specified on the commandline ...
+    foreach my $filename ( @committedfiles )
+    {
+        $filename = filecleanup($filename);
+
+        my $meta = $updater->getmeta($filename);
+
+        my ( $filepart, $dirpart ) = filenamesplit($filename);
+
+        $log->debug("Checked-in $dirpart : $filename");
+
+        if ( $meta->{filehash} eq "deleted" )
+        {
+            print "Remove-entry $dirpart\n";
+            print "$filename\n";
+        } else {
+            print "Checked-in $dirpart\n";
+            print "$filename\n";
+            print "/$filepart/1.$meta->{revision}///\n";
+        }
+    }
+
+    close LOCKFILE;
+    unlink($lockfile);
+    chdir "/";
+
+    print "ok\n";
+}
+
+sub req_status
+{
+    my ( $cmd, $data ) = @_;
+
+    argsplit("status");
+
+    $log->info("req_status : " . ( defined($data) ? $data : "[NULL]" ));
+    #$log->debug("status state : " . Dumper($state));
+
+    # Grab a handle to the SQLite db and do any necessary updates
+    my $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
+    $updater->update();
+
+    # if no files were specified, we need to work out what files we should be providing status on ...
+    argsfromdir($updater) if ( scalar ( @{$state->{args}} ) == 0 );
+
+    # foreach file specified on the commandline ...
+    foreach my $filename ( @{$state->{args}} )
+    {
+        $filename = filecleanup($filename);
+
+        my $meta = $updater->getmeta($filename);
+        my $oldmeta = $meta;
+
+        my $wrev = revparse($filename);
+
+        # If the working copy is an old revision, lets get that version too for comparison.
+        if ( defined($wrev) and $wrev != $meta->{revision} )
+        {
+            $oldmeta = $updater->getmeta($filename, $wrev);
+        }
+
+        # TODO : All possible statuses aren't yet implemented
+        my $status;
+        # Files are up to date if the working copy and repo copy have the same revision, and the working copy is unmodified
+        $status = "Up-to-date" if ( defined ( $wrev ) and defined($meta->{revision}) and $wrev == $meta->{revision}
+                                    and
+                                    ( ( $state->{entries}{$filename}{unchanged} and ( not defined ( $state->{entries}{$filename}{conflict} ) or $state->{entries}{$filename}{conflict} !~ /^\+=/ ) )
+                                      or ( defined($state->{entries}{$filename}{modified_hash}) and $state->{entries}{$filename}{modified_hash} eq $meta->{filehash} ) )
+                                   );
+
+        # Need checkout if the working copy has an older revision than the repo copy, and the working copy is unmodified
+        $status ||= "Needs Checkout" if ( defined ( $wrev ) and defined ( $meta->{revision} ) and $meta->{revision} > $wrev
+                                          and
+                                          ( $state->{entries}{$filename}{unchanged}
+                                            or ( defined($state->{entries}{$filename}{modified_hash}) and $state->{entries}{$filename}{modified_hash} eq $oldmeta->{filehash} ) )
+                                        );
+
+        # Need checkout if it exists in the repo but doesn't have a working copy
+        $status ||= "Needs Checkout" if ( not defined ( $wrev ) and defined ( $meta->{revision} ) );
+
+        # Locally modified if working copy and repo copy have the same revision but there are local changes
+        $status ||= "Locally Modified" if ( defined ( $wrev ) and defined($meta->{revision}) and $wrev == $meta->{revision} and $state->{entries}{$filename}{modified_filename} );
+
+        # Needs Merge if working copy revision is less than repo copy and there are local changes
+        $status ||= "Needs Merge" if ( defined ( $wrev ) and defined ( $meta->{revision} ) and $meta->{revision} > $wrev and $state->{entries}{$filename}{modified_filename} );
+
+        $status ||= "Locally Added" if ( defined ( $state->{entries}{$filename}{revision} ) and not defined ( $meta->{revision} ) );
+        $status ||= "Locally Removed" if ( defined ( $wrev ) and defined ( $meta->{revision} ) and -$wrev == $meta->{revision} );
+        $status ||= "Unresolved Conflict" if ( defined ( $state->{entries}{$filename}{conflict} ) and $state->{entries}{$filename}{conflict} =~ /^\+=/ );
+        $status ||= "File had conflicts on merge" if ( 0 );
+
+        $status ||= "Unknown";
+
+        print "M ===================================================================\n";
+        print "M File: $filename\tStatus: $status\n";
+        if ( defined($state->{entries}{$filename}{revision}) )
+        {
+            print "M Working revision:\t" . $state->{entries}{$filename}{revision} . "\n";
+        } else {
+            print "M Working revision:\tNo entry for $filename\n";
+        }
+        if ( defined($meta->{revision}) )
+        {
+            print "M Repository revision:\t1." . $meta->{revision} . "\t$state->{repository}/$filename,v\n";
+            print "M Sticky Tag:\t\t(none)\n";
+            print "M Sticky Date:\t\t(none)\n";
+            print "M Sticky Options:\t\t(none)\n";
+        } else {
+            print "M Repository revision:\tNo revision control file\n";
+        }
+        print "M\n";
+    }
+
+    print "ok\n";
+}
+
+sub req_diff
+{
+    my ( $cmd, $data ) = @_;
+
+    argsplit("diff");
+
+    $log->debug("req_diff : " . ( defined($data) ? $data : "[NULL]" ));
+    #$log->debug("status state : " . Dumper($state));
+
+    my ($revision1, $revision2);
+    if ( defined ( $state->{opt}{r} ) and ref $state->{opt}{r} eq "ARRAY" )
+    {
+        $revision1 = $state->{opt}{r}[0];
+        $revision2 = $state->{opt}{r}[1];
+    } else {
+        $revision1 = $state->{opt}{r};
+    }
+
+    $revision1 =~ s/^1\.// if ( defined ( $revision1 ) );
+    $revision2 =~ s/^1\.// if ( defined ( $revision2 ) );
+
+    $log->debug("Diffing revisions " . ( defined($revision1) ? $revision1 : "[NULL]" ) . " and " . ( defined($revision2) ? $revision2 : "[NULL]" ) );
+
+    # Grab a handle to the SQLite db and do any necessary updates
+    my $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
+    $updater->update();
+
+    # if no files were specified, we need to work out what files we should be providing status on ...
+    argsfromdir($updater) if ( scalar ( @{$state->{args}} ) == 0 );
+
+    # foreach file specified on the commandline ...
+    foreach my $filename ( @{$state->{args}} )
+    {
+        $filename = filecleanup($filename);
+
+        my ( $fh, $file1, $file2, $meta1, $meta2, $filediff );
+
+        my $wrev = revparse($filename);
+
+        # We need _something_ to diff against
+        next unless ( defined ( $wrev ) );
+
+        # if we have a -r switch, use it
+        if ( defined ( $revision1 ) )
+        {
+            ( undef, $file1 ) = tempfile( DIR => $TEMP_DIR, OPEN => 0 );
+            $meta1 = $updater->getmeta($filename, $revision1);
+            unless ( defined ( $meta1 ) and $meta1->{filehash} ne "deleted" )
+            {
+                print "E File $filename at revision 1.$revision1 doesn't exist\n";
+                next;
+            }
+            transmitfile($meta1->{filehash}, $file1);
+        }
+        # otherwise we just use the working copy revision
+        else
+        {
+            ( undef, $file1 ) = tempfile( DIR => $TEMP_DIR, OPEN => 0 );
+            $meta1 = $updater->getmeta($filename, $wrev);
+            transmitfile($meta1->{filehash}, $file1);
+        }
+
+        # if we have a second -r switch, use it too
+        if ( defined ( $revision2 ) )
+        {
+            ( undef, $file2 ) = tempfile( DIR => $TEMP_DIR, OPEN => 0 );
+            $meta2 = $updater->getmeta($filename, $revision2);
+
+            unless ( defined ( $meta2 ) and $meta2->{filehash} ne "deleted" )
+            {
+                print "E File $filename at revision 1.$revision2 doesn't exist\n";
+                next;
+            }
+
+            transmitfile($meta2->{filehash}, $file2);
+        }
+        # otherwise we just use the working copy
+        else
+        {
+            $file2 = $state->{entries}{$filename}{modified_filename};
+        }
+
+        # if we have been given -r, and we don't have a $file2 yet, lets get one
+        if ( defined ( $revision1 ) and not defined ( $file2 ) )
+        {
+            ( undef, $file2 ) = tempfile( DIR => $TEMP_DIR, OPEN => 0 );
+            $meta2 = $updater->getmeta($filename, $wrev);
+            transmitfile($meta2->{filehash}, $file2);
+        }
+
+        # We need to have retrieved something useful
+        next unless ( defined ( $meta1 ) );
+
+        # Files to date if the working copy and repo copy have the same revision, and the working copy is unmodified
+        next if ( not defined ( $meta2 ) and $wrev == $meta1->{revision}
+                  and
+                   ( ( $state->{entries}{$filename}{unchanged} and ( not defined ( $state->{entries}{$filename}{conflict} ) or $state->{entries}{$filename}{conflict} !~ /^\+=/ ) )
+                     or ( defined($state->{entries}{$filename}{modified_hash}) and $state->{entries}{$filename}{modified_hash} eq $meta1->{filehash} ) )
+                  );
+
+        # Apparently we only show diffs for locally modified files
+        next unless ( defined($meta2) or defined ( $state->{entries}{$filename}{modified_filename} ) );
+
+        print "M Index: $filename\n";
+        print "M ===================================================================\n";
+        print "M RCS file: $state->{CVSROOT}/$state->{module}/$filename,v\n";
+        print "M retrieving revision 1.$meta1->{revision}\n" if ( defined ( $meta1 ) );
+        print "M retrieving revision 1.$meta2->{revision}\n" if ( defined ( $meta2 ) );
+        print "M diff ";
+        foreach my $opt ( keys %{$state->{opt}} )
+        {
+            if ( ref $state->{opt}{$opt} eq "ARRAY" )
+            {
+                foreach my $value ( @{$state->{opt}{$opt}} )
+                {
+                    print "-$opt $value ";
+                }
+            } else {
+                print "-$opt ";
+                print "$state->{opt}{$opt} " if ( defined ( $state->{opt}{$opt} ) );
+            }
+        }
+        print "$filename\n";
+
+        $log->info("Diffing $filename -r $meta1->{revision} -r " . ( $meta2->{revision} or "workingcopy" ));
+
+        ( $fh, $filediff ) = tempfile ( DIR => $TEMP_DIR );
+
+        if ( exists $state->{opt}{u} )
+        {
+            system("diff -u -L '$filename revision 1.$meta1->{revision}' -L '$filename " . ( defined($meta2->{revision}) ? "revision 1.$meta2->{revision}" : "working copy" ) . "' $file1 $file2 > $filediff");
+        } else {
+            system("diff $file1 $file2 > $filediff");
+        }
+
+        while ( <$fh> )
+        {
+            print "M $_";
+        }
+        close $fh;
+    }
+
+    print "ok\n";
+}
+
+sub req_log
+{
+    my ( $cmd, $data ) = @_;
+
+    argsplit("log");
+
+    $log->debug("req_log : " . ( defined($data) ? $data : "[NULL]" ));
+    #$log->debug("log state : " . Dumper($state));
+
+    my ( $minrev, $maxrev );
+    if ( defined ( $state->{opt}{r} ) and $state->{opt}{r} =~ /([\d.]+)?(::?)([\d.]+)?/ )
+    {
+        my $control = $2;
+        $minrev = $1;
+        $maxrev = $3;
+        $minrev =~ s/^1\.// if ( defined ( $minrev ) );
+        $maxrev =~ s/^1\.// if ( defined ( $maxrev ) );
+        $minrev++ if ( defined($minrev) and $control eq "::" );
+    }
+
+    # Grab a handle to the SQLite db and do any necessary updates
+    my $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
+    $updater->update();
+
+    # if no files were specified, we need to work out what files we should be providing status on ...
+    argsfromdir($updater) if ( scalar ( @{$state->{args}} ) == 0 );
+
+    # foreach file specified on the commandline ...
+    foreach my $filename ( @{$state->{args}} )
+    {
+        $filename = filecleanup($filename);
+
+        my $headmeta = $updater->getmeta($filename);
+
+        my $revisions = $updater->getlog($filename);
+        my $totalrevisions = scalar(@$revisions);
+
+        if ( defined ( $minrev ) )
+        {
+            $log->debug("Removing revisions less than $minrev");
+            while ( scalar(@$revisions) > 0 and $revisions->[-1]{revision} < $minrev )
+            {
+                pop @$revisions;
+            }
+        }
+        if ( defined ( $maxrev ) )
+        {
+            $log->debug("Removing revisions greater than $maxrev");
+            while ( scalar(@$revisions) > 0 and $revisions->[0]{revision} > $maxrev )
+            {
+                shift @$revisions;
+            }
+        }
+
+        next unless ( scalar(@$revisions) );
+
+        print "M \n";
+        print "M RCS file: $state->{CVSROOT}/$state->{module}/$filename,v\n";
+        print "M Working file: $filename\n";
+        print "M head: 1.$headmeta->{revision}\n";
+        print "M branch:\n";
+        print "M locks: strict\n";
+        print "M access list:\n";
+        print "M symbolic names:\n";
+        print "M keyword substitution: kv\n";
+        print "M total revisions: $totalrevisions;\tselected revisions: " . scalar(@$revisions) . "\n";
+        print "M description:\n";
+
+        foreach my $revision ( @$revisions )
+        {
+            print "M ----------------------------\n";
+            print "M revision 1.$revision->{revision}\n";
+            # reformat the date for log output
+            $revision->{modified} = sprintf('%04d/%02d/%02d %s', $3, $DATE_LIST->{$2}, $1, $4 ) if ( $revision->{modified} =~ /(\d+)\s+(\w+)\s+(\d+)\s+(\S+)/ and defined($DATE_LIST->{$2}) );
+            $revision->{author} =~ s/\s+.*//;
+            $revision->{author} =~ s/^(.{8}).*/$1/;
+            print "M date: $revision->{modified};  author: $revision->{author};  state: " . ( $revision->{filehash} eq "deleted" ? "dead" : "Exp" ) . ";  lines: +2 -3\n";
+            my $commitmessage = $updater->commitmessage($revision->{commithash});
+            $commitmessage =~ s/^/M /mg;
+            print $commitmessage . "\n";
+        }
+        print "M =============================================================================\n";
+    }
+
+    print "ok\n";
+}
+
+sub req_annotate
+{
+    my ( $cmd, $data ) = @_;
+
+    argsplit("annotate");
+
+    $log->info("req_annotate : " . ( defined($data) ? $data : "[NULL]" ));
+    #$log->debug("status state : " . Dumper($state));
+
+    # Grab a handle to the SQLite db and do any necessary updates
+    my $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
+    $updater->update();
+
+    # if no files were specified, we need to work out what files we should be providing annotate on ...
+    argsfromdir($updater) if ( scalar ( @{$state->{args}} ) == 0 );
+
+    # we'll need a temporary checkout dir
+    my $tmpdir = tempdir ( DIR => $TEMP_DIR );
+    my ( undef, $file_index ) = tempfile ( DIR => $TEMP_DIR, OPEN => 0 );
+    $log->info("Temp checkoutdir creation successful, basing annotate session work on '$tmpdir', index file is '$file_index'");
+
+    $ENV{GIT_DIR} = $state->{CVSROOT} . "/";
+    $ENV{GIT_INDEX_FILE} = $file_index;
+
+    chdir $tmpdir;
+
+    # foreach file specified on the commandline ...
+    foreach my $filename ( @{$state->{args}} )
+    {
+        $filename = filecleanup($filename);
+
+        my $meta = $updater->getmeta($filename);
+
+        next unless ( $meta->{revision} );
+
+	# get all the commits that this file was in
+	# in dense format -- aka skip dead revisions
+        my $revisions   = $updater->gethistorydense($filename);
+	my $lastseenin  = $revisions->[0][2];
+
+	# populate the temporary index based on the latest commit were we saw
+	# the file -- but do it cheaply without checking out any files
+	# TODO: if we got a revision from the client, use that instead
+	# to look up the commithash in sqlite (still good to default to
+	# the current head as we do now)
+	system("git-read-tree", $lastseenin);
+	unless ($? == 0)
+	{
+	    die "Error running git-read-tree $lastseenin $file_index $!";
+	}
+	$log->info("Created index '$file_index' with commit $lastseenin - exit status $?");
+
+        # do a checkout of the file
+        system('git-checkout-index', '-f', '-u', $filename);
+        unless ($? == 0) {
+            die "Error running git-checkout-index -f -u $filename : $!";
+        }
+
+        $log->info("Annotate $filename");
+
+        # Prepare a file with the commits from the linearized
+        # history that annotate should know about. This prevents
+        # git-jsannotate telling us about commits we are hiding
+        # from the client.
+
+        open(ANNOTATEHINTS, ">$tmpdir/.annotate_hints") or die "Error opening > $tmpdir/.annotate_hints $!";
+        for (my $i=0; $i < @$revisions; $i++)
+        {
+            print ANNOTATEHINTS $revisions->[$i][2];
+            if ($i+1 < @$revisions) { # have we got a parent?
+                print ANNOTATEHINTS ' ' . $revisions->[$i+1][2];
+            }
+            print ANNOTATEHINTS "\n";
+        }
+
+        print ANNOTATEHINTS "\n";
+        close ANNOTATEHINTS;
+
+        my $annotatecmd = 'git-annotate';
+        open(ANNOTATE, "-|", $annotatecmd, '-l', '-S', "$tmpdir/.annotate_hints", $filename)
+	    or die "Error invoking $annotatecmd -l -S $tmpdir/.annotate_hints $filename : $!";
+        my $metadata = {};
+        print "E Annotations for $filename\n";
+        print "E ***************\n";
+        while ( <ANNOTATE> )
+        {
+            if (m/^([a-zA-Z0-9]{40})\t\([^\)]*\)(.*)$/i)
+            {
+                my $commithash = $1;
+                my $data = $2;
+                unless ( defined ( $metadata->{$commithash} ) )
+                {
+                    $metadata->{$commithash} = $updater->getmeta($filename, $commithash);
+                    $metadata->{$commithash}{author} =~ s/\s+.*//;
+                    $metadata->{$commithash}{author} =~ s/^(.{8}).*/$1/;
+                    $metadata->{$commithash}{modified} = sprintf("%02d-%s-%02d", $1, $2, $3) if ( $metadata->{$commithash}{modified} =~ /^(\d+)\s(\w+)\s\d\d(\d\d)/ );
+                }
+                printf("M 1.%-5d      (%-8s %10s): %s\n",
+                    $metadata->{$commithash}{revision},
+                    $metadata->{$commithash}{author},
+                    $metadata->{$commithash}{modified},
+                    $data
+                );
+            } else {
+                $log->warn("Error in annotate output! LINE: $_");
+                print "E Annotate error \n";
+                next;
+            }
+        }
+        close ANNOTATE;
+    }
+
+    # done; get out of the tempdir
+    chdir "/";
+
+    print "ok\n";
+
+}
+
+# This method takes the state->{arguments} array and produces two new arrays.
+# The first is $state->{args} which is everything before the '--' argument, and
+# the second is $state->{files} which is everything after it.
+sub argsplit
+{
+    return unless( defined($state->{arguments}) and ref $state->{arguments} eq "ARRAY" );
+
+    my $type = shift;
+
+    $state->{args} = [];
+    $state->{files} = [];
+    $state->{opt} = {};
+
+    if ( defined($type) )
+    {
+        my $opt = {};
+        $opt = { A => 0, N => 0, P => 0, R => 0, c => 0, f => 0, l => 0, n => 0, p => 0, s => 0, r => 1, D => 1, d => 1, k => 1, j => 1, } if ( $type eq "co" );
+        $opt = { v => 0, l => 0, R => 0 } if ( $type eq "status" );
+        $opt = { A => 0, P => 0, C => 0, d => 0, f => 0, l => 0, R => 0, p => 0, k => 1, r => 1, D => 1, j => 1, I => 1, W => 1 } if ( $type eq "update" );
+        $opt = { l => 0, R => 0, k => 1, D => 1, D => 1, r => 2 } if ( $type eq "diff" );
+        $opt = { c => 0, R => 0, l => 0, f => 0, F => 1, m => 1, r => 1 } if ( $type eq "ci" );
+        $opt = { k => 1, m => 1 } if ( $type eq "add" );
+        $opt = { f => 0, l => 0, R => 0 } if ( $type eq "remove" );
+        $opt = { l => 0, b => 0, h => 0, R => 0, t => 0, N => 0, S => 0, r => 1, d => 1, s => 1, w => 1 } if ( $type eq "log" );
+
+
+        while ( scalar ( @{$state->{arguments}} ) > 0 )
+        {
+            my $arg = shift @{$state->{arguments}};
+
+            next if ( $arg eq "--" );
+            next unless ( $arg =~ /\S/ );
+
+            # if the argument looks like a switch
+            if ( $arg =~ /^-(\w)(.*)/ )
+            {
+                # if it's a switch that takes an argument
+                if ( $opt->{$1} )
+                {
+                    # If this switch has already been provided
+                    if ( $opt->{$1} > 1 and exists ( $state->{opt}{$1} ) )
+                    {
+                        $state->{opt}{$1} = [ $state->{opt}{$1} ];
+                        if ( length($2) > 0 )
+                        {
+                            push @{$state->{opt}{$1}},$2;
+                        } else {
+                            push @{$state->{opt}{$1}}, shift @{$state->{arguments}};
+                        }
+                    } else {
+                        # if there's extra data in the arg, use that as the argument for the switch
+                        if ( length($2) > 0 )
+                        {
+                            $state->{opt}{$1} = $2;
+                        } else {
+                            $state->{opt}{$1} = shift @{$state->{arguments}};
+                        }
+                    }
+                } else {
+                    $state->{opt}{$1} = undef;
+                }
+            }
+            else
+            {
+                push @{$state->{args}}, $arg;
+            }
+        }
+    }
+    else
+    {
+        my $mode = 0;
+
+        foreach my $value ( @{$state->{arguments}} )
+        {
+            if ( $value eq "--" )
+            {
+                $mode++;
+                next;
+            }
+            push @{$state->{args}}, $value if ( $mode == 0 );
+            push @{$state->{files}}, $value if ( $mode == 1 );
+        }
+    }
+}
+
+# This method uses $state->{directory} to populate $state->{args} with a list of filenames
+sub argsfromdir
+{
+    my $updater = shift;
+
+    $state->{args} = [];
+
+    foreach my $file ( @{$updater->gethead} )
+    {
+        next if ( $file->{filehash} eq "deleted" and not defined ( $state->{entries}{$file->{name}} ) );
+        next unless ( $file->{name} =~ s/^$state->{directory}// );
+        push @{$state->{args}}, $file->{name};
+    }
+}
+
+# This method cleans up the $state variable after a command that uses arguments has run
+sub statecleanup
+{
+    $state->{files} = [];
+    $state->{args} = [];
+    $state->{arguments} = [];
+    $state->{entries} = {};
+}
+
+sub revparse
+{
+    my $filename = shift;
+
+    return undef unless ( defined ( $state->{entries}{$filename}{revision} ) );
+
+    return $1 if ( $state->{entries}{$filename}{revision} =~ /^1\.(\d+)/ );
+    return -$1 if ( $state->{entries}{$filename}{revision} =~ /^-1\.(\d+)/ );
+
+    return undef;
+}
+
+# This method takes a file hash and does a CVS "file transfer" which transmits the
+# size of the file, and then the file contents.
+# If a second argument $targetfile is given, the file is instead written out to
+# a file by the name of $targetfile
+sub transmitfile
+{
+    my $filehash = shift;
+    my $targetfile = shift;
+
+    if ( defined ( $filehash ) and $filehash eq "deleted" )
+    {
+        $log->warn("filehash is 'deleted'");
+        return;
+    }
+
+    die "Need filehash" unless ( defined ( $filehash ) and $filehash =~ /^[a-zA-Z0-9]{40}$/ );
+
+    my $type = `git-cat-file -t $filehash`;
+    chomp $type;
+
+    die ( "Invalid type '$type' (expected 'blob')" ) unless ( defined ( $type ) and $type eq "blob" );
+
+    my $size = `git-cat-file -s $filehash`;
+    chomp $size;
+
+    $log->debug("transmitfile($filehash) size=$size, type=$type");
+
+    if ( open my $fh, '-|', "git-cat-file", "blob", $filehash )
+    {
+        if ( defined ( $targetfile ) )
+        {
+            open NEWFILE, ">", $targetfile or die("Couldn't open '$targetfile' for writing : $!");
+            print NEWFILE $_ while ( <$fh> );
+            close NEWFILE;
+        } else {
+            print "$size\n";
+            print while ( <$fh> );
+        }
+        close $fh or die ("Couldn't close filehandle for transmitfile()");
+    } else {
+        die("Couldn't execute git-cat-file");
+    }
+}
+
+# This method takes a file name, and returns ( $dirpart, $filepart ) which
+# refers to the directory porition and the file portion of the filename
+# respectively
+sub filenamesplit
+{
+    my $filename = shift;
+
+    my ( $filepart, $dirpart ) = ( $filename, "." );
+    ( $filepart, $dirpart ) = ( $2, $1 ) if ( $filename =~ /(.*)\/(.*)/ );
+    $dirpart .= "/";
+
+    return ( $filepart, $dirpart );
+}
+
+sub filecleanup
+{
+    my $filename = shift;
+
+    return undef unless(defined($filename));
+    if ( $filename =~ /^\// )
+    {
+        print "E absolute filenames '$filename' not supported by server\n";
+        return undef;
+    }
+
+    $filename =~ s/^\.\///g;
+    $filename = $state->{directory} . $filename;
+
+    return $filename;
+}
+
+package GITCVS::log;
+
+####
+#### Copyright The Open University UK - 2006.
+####
+#### Authors: Martyn Smith    <martyn@catalyst.net.nz>
+####          Martin Langhoff <martin@catalyst.net.nz>
+####
+####
+
+use strict;
+use warnings;
+
+=head1 NAME
+
+GITCVS::log
+
+=head1 DESCRIPTION
+
+This module provides very crude logging with a similar interface to
+Log::Log4perl
+
+=head1 METHODS
+
+=cut
+
+=head2 new
+
+Creates a new log object, optionally you can specify a filename here to
+indicate the file to log to. If no log file is specified, you can specifiy one
+later with method setfile, or indicate you no longer want logging with method
+nofile.
+
+Until one of these methods is called, all log calls will buffer messages ready
+to write out.
+
+=cut
+sub new
+{
+    my $class = shift;
+    my $filename = shift;
+
+    my $self = {};
+
+    bless $self, $class;
+
+    if ( defined ( $filename ) )
+    {
+        open $self->{fh}, ">>", $filename or die("Couldn't open '$filename' for writing : $!");
+    }
+
+    return $self;
+}
+
+=head2 setfile
+
+This methods takes a filename, and attempts to open that file as the log file.
+If successful, all buffered data is written out to the file, and any further
+logging is written directly to the file.
+
+=cut
+sub setfile
+{
+    my $self = shift;
+    my $filename = shift;
+
+    if ( defined ( $filename ) )
+    {
+        open $self->{fh}, ">>", $filename or die("Couldn't open '$filename' for writing : $!");
+    }
+
+    return unless ( defined ( $self->{buffer} ) and ref $self->{buffer} eq "ARRAY" );
+
+    while ( my $line = shift @{$self->{buffer}} )
+    {
+        print {$self->{fh}} $line;
+    }
+}
+
+=head2 nofile
+
+This method indicates no logging is going to be used. It flushes any entries in
+the internal buffer, and sets a flag to ensure no further data is put there.
+
+=cut
+sub nofile
+{
+    my $self = shift;
+
+    $self->{nolog} = 1;
+
+    return unless ( defined ( $self->{buffer} ) and ref $self->{buffer} eq "ARRAY" );
+
+    $self->{buffer} = [];
+}
+
+=head2 _logopen
+
+Internal method. Returns true if the log file is open, false otherwise.
+
+=cut
+sub _logopen
+{
+    my $self = shift;
+
+    return 1 if ( defined ( $self->{fh} ) and ref $self->{fh} eq "GLOB" );
+    return 0;
+}
+
+=head2 debug info warn fatal
+
+These four methods are wrappers to _log. They provide the actual interface for
+logging data.
+
+=cut
+sub debug { my $self = shift; $self->_log("debug", @_); }
+sub info  { my $self = shift; $self->_log("info" , @_); }
+sub warn  { my $self = shift; $self->_log("warn" , @_); }
+sub fatal { my $self = shift; $self->_log("fatal", @_); }
+
+=head2 _log
+
+This is an internal method called by the logging functions. It generates a
+timestamp and pushes the logged line either to file, or internal buffer.
+
+=cut
+sub _log
+{
+    my $self = shift;
+    my $level = shift;
+
+    return if ( $self->{nolog} );
+
+    my @time = localtime;
+    my $timestring = sprintf("%4d-%02d-%02d %02d:%02d:%02d : %-5s",
+        $time[5] + 1900,
+        $time[4] + 1,
+        $time[3],
+        $time[2],
+        $time[1],
+        $time[0],
+        uc $level,
+    );
+
+    if ( $self->_logopen )
+    {
+        print {$self->{fh}} $timestring . " - " . join(" ",@_) . "\n";
+    } else {
+        push @{$self->{buffer}}, $timestring . " - " . join(" ",@_) . "\n";
+    }
+}
+
+=head2 DESTROY
+
+This method simply closes the file handle if one is open
+
+=cut
+sub DESTROY
+{
+    my $self = shift;
+
+    if ( $self->_logopen )
+    {
+        close $self->{fh};
+    }
+}
+
+package GITCVS::updater;
+
+####
+#### Copyright The Open University UK - 2006.
+####
+#### Authors: Martyn Smith    <martyn@catalyst.net.nz>
+####          Martin Langhoff <martin@catalyst.net.nz>
+####
+####
+
+use strict;
+use warnings;
+use DBI;
+
+=head1 METHODS
+
+=cut
+
+=head2 new
+
+=cut
+sub new
+{
+    my $class = shift;
+    my $config = shift;
+    my $module = shift;
+    my $log = shift;
+
+    die "Need to specify a git repository" unless ( defined($config) and -d $config );
+    die "Need to specify a module" unless ( defined($module) );
+
+    $class = ref($class) || $class;
+
+    my $self = {};
+
+    bless $self, $class;
+
+    $self->{dbdir} = $config . "/";
+    die "Database dir '$self->{dbdir}' isn't a directory" unless ( defined($self->{dbdir}) and -d $self->{dbdir} );
+
+    $self->{module} = $module;
+    $self->{file} = $self->{dbdir} . "/gitcvs.$module.sqlite";
+
+    $self->{git_path} = $config . "/";
+
+    $self->{log} = $log;
+
+    die "Git repo '$self->{git_path}' doesn't exist" unless ( -d $self->{git_path} );
+
+    $self->{dbh} = DBI->connect("dbi:SQLite:dbname=" . $self->{file},"","");
+
+    $self->{tables} = {};
+    foreach my $table ( $self->{dbh}->tables )
+    {
+        $table =~ s/^"//;
+        $table =~ s/"$//;
+        $self->{tables}{$table} = 1;
+    }
+
+    # Construct the revision table if required
+    unless ( $self->{tables}{revision} )
+    {
+        $self->{dbh}->do("
+            CREATE TABLE revision (
+                name       TEXT NOT NULL,
+                revision   INTEGER NOT NULL,
+                filehash   TEXT NOT NULL,
+                commithash TEXT NOT NULL,
+                author     TEXT NOT NULL,
+                modified   TEXT NOT NULL,
+                mode       TEXT NOT NULL
+            )
+        ");
+    }
+
+    # Construct the revision table if required
+    unless ( $self->{tables}{head} )
+    {
+        $self->{dbh}->do("
+            CREATE TABLE head (
+                name       TEXT NOT NULL,
+                revision   INTEGER NOT NULL,
+                filehash   TEXT NOT NULL,
+                commithash TEXT NOT NULL,
+                author     TEXT NOT NULL,
+                modified   TEXT NOT NULL,
+                mode       TEXT NOT NULL
+            )
+        ");
+    }
+
+    # Construct the properties table if required
+    unless ( $self->{tables}{properties} )
+    {
+        $self->{dbh}->do("
+            CREATE TABLE properties (
+                key        TEXT NOT NULL PRIMARY KEY,
+                value      TEXT
+            )
+        ");
+    }
+
+    # Construct the commitmsgs table if required
+    unless ( $self->{tables}{commitmsgs} )
+    {
+        $self->{dbh}->do("
+            CREATE TABLE commitmsgs (
+                key        TEXT NOT NULL PRIMARY KEY,
+                value      TEXT
+            )
+        ");
+    }
+
+    return $self;
+}
+
+=head2 update
+
+=cut
+sub update
+{
+    my $self = shift;
+
+    # first lets get the commit list
+    $ENV{GIT_DIR} = $self->{git_path};
+
+    # prepare database queries
+    my $db_insert_rev = $self->{dbh}->prepare_cached("INSERT INTO revision (name, revision, filehash, commithash, modified, author, mode) VALUES (?,?,?,?,?,?,?)",{},1);
+    my $db_insert_mergelog = $self->{dbh}->prepare_cached("INSERT INTO commitmsgs (key, value) VALUES (?,?)",{},1);
+    my $db_delete_head = $self->{dbh}->prepare_cached("DELETE FROM head",{},1);
+    my $db_insert_head = $self->{dbh}->prepare_cached("INSERT INTO head (name, revision, filehash, commithash, modified, author, mode) VALUES (?,?,?,?,?,?,?)",{},1);
+
+    my $commitinfo = `git-cat-file commit $self->{module} 2>&1`;
+    unless ( $commitinfo =~ /tree\s+[a-zA-Z0-9]{40}/ )
+    {
+        die("Invalid module '$self->{module}'");
+    }
+
+
+    my $git_log;
+    my $lastcommit = $self->_get_prop("last_commit");
+
+    # Start exclusive lock here...
+    $self->{dbh}->begin_work() or die "Cannot lock database for BEGIN";
+
+    # TODO: log processing is memory bound
+    # if we can parse into a 2nd file that is in reverse order
+    # we can probably do something really efficient
+    my @git_log_params = ('--parents', '--topo-order');
+
+    if (defined $lastcommit) {
+        push @git_log_params, "$lastcommit..$self->{module}";
+    } else {
+        push @git_log_params, $self->{module};
+    }
+    open(GITLOG, '-|', 'git-log', @git_log_params) or die "Cannot call git-log: $!";
+
+    my @commits;
+
+    my %commit = ();
+
+    while ( <GITLOG> )
+    {
+        chomp;
+        if (m/^commit\s+(.*)$/) {
+            # on ^commit lines put the just seen commit in the stack
+            # and prime things for the next one
+            if (keys %commit) {
+                my %copy = %commit;
+                unshift @commits, \%copy;
+                %commit = ();
+            }
+            my @parents = split(m/\s+/, $1);
+            $commit{hash} = shift @parents;
+            $commit{parents} = \@parents;
+        } elsif (m/^(\w+?):\s+(.*)$/ && !exists($commit{message})) {
+            # on rfc822-like lines seen before we see any message,
+            # lowercase the entry and put it in the hash as key-value
+            $commit{lc($1)} = $2;
+        } else {
+            # message lines - skip initial empty line
+            # and trim whitespace
+            if (!exists($commit{message}) && m/^\s*$/) {
+                # define it to mark the end of headers
+                $commit{message} = '';
+                next;
+            }
+            s/^\s+//; s/\s+$//; # trim ws
+            $commit{message} .= $_ . "\n";
+        }
+    }
+    close GITLOG;
+
+    unshift @commits, \%commit if ( keys %commit );
+
+    # Now all the commits are in the @commits bucket
+    # ordered by time DESC. for each commit that needs processing,
+    # determine whether it's following the last head we've seen or if
+    # it's on its own branch, grab a file list, and add whatever's changed
+    # NOTE: $lastcommit refers to the last commit from previous run
+    #       $lastpicked is the last commit we picked in this run
+    my $lastpicked;
+    my $head = {};
+    if (defined $lastcommit) {
+        $lastpicked = $lastcommit;
+    }
+
+    my $committotal = scalar(@commits);
+    my $commitcount = 0;
+
+    # Load the head table into $head (for cached lookups during the update process)
+    foreach my $file ( @{$self->gethead()} )
+    {
+        $head->{$file->{name}} = $file;
+    }
+
+    foreach my $commit ( @commits )
+    {
+        $self->{log}->debug("GITCVS::updater - Processing commit $commit->{hash} (" . (++$commitcount) . " of $committotal)");
+        if (defined $lastpicked)
+        {
+            if (!in_array($lastpicked, @{$commit->{parents}}))
+            {
+                # skip, we'll see this delta
+                # as part of a merge later
+                # warn "skipping off-track  $commit->{hash}\n";
+                next;
+            } elsif (@{$commit->{parents}} > 1) {
+                # it is a merge commit, for each parent that is
+                # not $lastpicked, see if we can get a log
+                # from the merge-base to that parent to put it
+                # in the message as a merge summary.
+                my @parents = @{$commit->{parents}};
+                foreach my $parent (@parents) {
+                    # git-merge-base can potentially (but rarely) throw
+                    # several candidate merge bases. let's assume
+                    # that the first one is the best one.
+                    if ($parent eq $lastpicked) {
+                        next;
+                    }
+                    open my $p, 'git-merge-base '. $lastpicked . ' '
+                    . $parent . '|';
+                    my @output = (<$p>);
+                    close $p;
+                    my $base = join('', @output);
+                    chomp $base;
+                    if ($base) {
+                        my @merged;
+                        # print "want to log between  $base $parent \n";
+                        open(GITLOG, '-|', 'git-log', "$base..$parent")
+                        or die "Cannot call git-log: $!";
+                        my $mergedhash;
+                        while (<GITLOG>) {
+                            chomp;
+                            if (!defined $mergedhash) {
+                                if (m/^commit\s+(.+)$/) {
+                                    $mergedhash = $1;
+                                } else {
+                                    next;
+                                }
+                            } else {
+                                # grab the first line that looks non-rfc822
+                                # aka has content after leading space
+                                if (m/^\s+(\S.*)$/) {
+                                    my $title = $1;
+                                    $title = substr($title,0,100); # truncate
+                                    unshift @merged, "$mergedhash $title";
+                                    undef $mergedhash;
+                                }
+                            }
+                        }
+                        close GITLOG;
+                        if (@merged) {
+                            $commit->{mergemsg} = $commit->{message};
+                            $commit->{mergemsg} .= "\nSummary of merged commits:\n\n";
+                            foreach my $summary (@merged) {
+                                $commit->{mergemsg} .= "\t$summary\n";
+                            }
+                            $commit->{mergemsg} .= "\n\n";
+                            # print "Message for $commit->{hash} \n$commit->{mergemsg}";
+                        }
+                    }
+                }
+            }
+        }
+
+        # convert the date to CVS-happy format
+        $commit->{date} = "$2 $1 $4 $3 $5" if ( $commit->{date} =~ /^\w+\s+(\w+)\s+(\d+)\s+(\d+:\d+:\d+)\s+(\d+)\s+([+-]\d+)$/ );
+
+        if ( defined ( $lastpicked ) )
+        {
+            my $filepipe = open(FILELIST, '-|', 'git-diff-tree', '-r', $lastpicked, $commit->{hash}) or die("Cannot call git-diff-tree : $!");
+            while ( <FILELIST> )
+            {
+                unless ( /^:\d{6}\s+\d{3}(\d)\d{2}\s+[a-zA-Z0-9]{40}\s+([a-zA-Z0-9]{40})\s+(\w)\s+(.*)$/o )
+                {
+                    die("Couldn't process git-diff-tree line : $_");
+                }
+
+                # $log->debug("File mode=$1, hash=$2, change=$3, name=$4");
+
+                my $git_perms = "";
+                $git_perms .= "r" if ( $1 & 4 );
+                $git_perms .= "w" if ( $1 & 2 );
+                $git_perms .= "x" if ( $1 & 1 );
+                $git_perms = "rw" if ( $git_perms eq "" );
+
+                if ( $3 eq "D" )
+                {
+                    #$log->debug("DELETE   $4");
+                    $head->{$4} = {
+                        name => $4,
+                        revision => $head->{$4}{revision} + 1,
+                        filehash => "deleted",
+                        commithash => $commit->{hash},
+                        modified => $commit->{date},
+                        author => $commit->{author},
+                        mode => $git_perms,
+                    };
+                    $db_insert_rev->execute($4, $head->{$4}{revision}, $2, $commit->{hash}, $commit->{date}, $commit->{author}, $git_perms);
+                }
+                elsif ( $3 eq "M" )
+                {
+                    #$log->debug("MODIFIED $4");
+                    $head->{$4} = {
+                        name => $4,
+                        revision => $head->{$4}{revision} + 1,
+                        filehash => $2,
+                        commithash => $commit->{hash},
+                        modified => $commit->{date},
+                        author => $commit->{author},
+                        mode => $git_perms,
+                    };
+                    $db_insert_rev->execute($4, $head->{$4}{revision}, $2, $commit->{hash}, $commit->{date}, $commit->{author}, $git_perms);
+                }
+                elsif ( $3 eq "A" )
+                {
+                    #$log->debug("ADDED    $4");
+                    $head->{$4} = {
+                        name => $4,
+                        revision => 1,
+                        filehash => $2,
+                        commithash => $commit->{hash},
+                        modified => $commit->{date},
+                        author => $commit->{author},
+                        mode => $git_perms,
+                    };
+                    $db_insert_rev->execute($4, $head->{$4}{revision}, $2, $commit->{hash}, $commit->{date}, $commit->{author}, $git_perms);
+                }
+                else
+                {
+                    $log->warn("UNKNOWN FILE CHANGE mode=$1, hash=$2, change=$3, name=$4");
+                    die;
+                }
+            }
+            close FILELIST;
+        } else {
+            # this is used to detect files removed from the repo
+            my $seen_files = {};
+
+            my $filepipe = open(FILELIST, '-|', 'git-ls-tree', '-r', $commit->{hash}) or die("Cannot call git-ls-tree : $!");
+            while ( <FILELIST> )
+            {
+                unless ( /^(\d+)\s+(\w+)\s+([a-zA-Z0-9]+)\s+(.*)$/o )
+                {
+                    die("Couldn't process git-ls-tree line : $_");
+                }
+
+                my ( $git_perms, $git_type, $git_hash, $git_filename ) = ( $1, $2, $3, $4 );
+
+                $seen_files->{$git_filename} = 1;
+
+                my ( $oldhash, $oldrevision, $oldmode ) = (
+                    $head->{$git_filename}{filehash},
+                    $head->{$git_filename}{revision},
+                    $head->{$git_filename}{mode}
+                );
+
+                if ( $git_perms =~ /^\d\d\d(\d)\d\d/o )
+                {
+                    $git_perms = "";
+                    $git_perms .= "r" if ( $1 & 4 );
+                    $git_perms .= "w" if ( $1 & 2 );
+                    $git_perms .= "x" if ( $1 & 1 );
+                } else {
+                    $git_perms = "rw";
+                }
+
+                # unless the file exists with the same hash, we need to update it ...
+                unless ( defined($oldhash) and $oldhash eq $git_hash and defined($oldmode) and $oldmode eq $git_perms )
+                {
+                    my $newrevision = ( $oldrevision or 0 ) + 1;
+
+                    $head->{$git_filename} = {
+                        name => $git_filename,
+                        revision => $newrevision,
+                        filehash => $git_hash,
+                        commithash => $commit->{hash},
+                        modified => $commit->{date},
+                        author => $commit->{author},
+                        mode => $git_perms,
+                    };
+
+
+                    $db_insert_rev->execute($git_filename, $newrevision, $git_hash, $commit->{hash}, $commit->{date}, $commit->{author}, $git_perms);
+                }
+            }
+            close FILELIST;
+
+            # Detect deleted files
+            foreach my $file ( keys %$head )
+            {
+                unless ( exists $seen_files->{$file} or $head->{$file}{filehash} eq "deleted" )
+                {
+                    $head->{$file}{revision}++;
+                    $head->{$file}{filehash} = "deleted";
+                    $head->{$file}{commithash} = $commit->{hash};
+                    $head->{$file}{modified} = $commit->{date};
+                    $head->{$file}{author} = $commit->{author};
+
+                    $db_insert_rev->execute($file, $head->{$file}{revision}, $head->{$file}{filehash}, $commit->{hash}, $commit->{date}, $commit->{author}, $head->{$file}{mode});
+                }
+            }
+            # END : "Detect deleted files"
+        }
+
+
+        if (exists $commit->{mergemsg})
+        {
+            $db_insert_mergelog->execute($commit->{hash}, $commit->{mergemsg});
+        }
+
+        $lastpicked = $commit->{hash};
+
+        $self->_set_prop("last_commit", $commit->{hash});
+    }
+
+    $db_delete_head->execute();
+    foreach my $file ( keys %$head )
+    {
+        $db_insert_head->execute(
+            $file,
+            $head->{$file}{revision},
+            $head->{$file}{filehash},
+            $head->{$file}{commithash},
+            $head->{$file}{modified},
+            $head->{$file}{author},
+            $head->{$file}{mode},
+        );
+    }
+    # invalidate the gethead cache
+    $self->{gethead_cache} = undef;
+
+
+    # Ending exclusive lock here
+    $self->{dbh}->commit() or die "Failed to commit changes to SQLite";
+}
+
+sub _headrev
+{
+    my $self = shift;
+    my $filename = shift;
+
+    my $db_query = $self->{dbh}->prepare_cached("SELECT filehash, revision, mode FROM head WHERE name=?",{},1);
+    $db_query->execute($filename);
+    my ( $hash, $revision, $mode ) = $db_query->fetchrow_array;
+
+    return ( $hash, $revision, $mode );
+}
+
+sub _get_prop
+{
+    my $self = shift;
+    my $key = shift;
+
+    my $db_query = $self->{dbh}->prepare_cached("SELECT value FROM properties WHERE key=?",{},1);
+    $db_query->execute($key);
+    my ( $value ) = $db_query->fetchrow_array;
+
+    return $value;
+}
+
+sub _set_prop
+{
+    my $self = shift;
+    my $key = shift;
+    my $value = shift;
+
+    my $db_query = $self->{dbh}->prepare_cached("UPDATE properties SET value=? WHERE key=?",{},1);
+    $db_query->execute($value, $key);
+
+    unless ( $db_query->rows )
+    {
+        $db_query = $self->{dbh}->prepare_cached("INSERT INTO properties (key, value) VALUES (?,?)",{},1);
+        $db_query->execute($key, $value);
+    }
+
+    return $value;
+}
+
+=head2 gethead
+
+=cut
+
+sub gethead
+{
+    my $self = shift;
+
+    return $self->{gethead_cache} if ( defined ( $self->{gethead_cache} ) );
+
+    my $db_query = $self->{dbh}->prepare_cached("SELECT name, filehash, mode, revision, modified, commithash, author FROM head",{},1);
+    $db_query->execute();
+
+    my $tree = [];
+    while ( my $file = $db_query->fetchrow_hashref )
+    {
+        push @$tree, $file;
+    }
+
+    $self->{gethead_cache} = $tree;
+
+    return $tree;
+}
+
+=head2 getlog
+
+=cut
+
+sub getlog
+{
+    my $self = shift;
+    my $filename = shift;
+
+    my $db_query = $self->{dbh}->prepare_cached("SELECT name, filehash, author, mode, revision, modified, commithash FROM revision WHERE name=? ORDER BY revision DESC",{},1);
+    $db_query->execute($filename);
+
+    my $tree = [];
+    while ( my $file = $db_query->fetchrow_hashref )
+    {
+        push @$tree, $file;
+    }
+
+    return $tree;
+}
+
+=head2 getmeta
+
+This function takes a filename (with path) argument and returns a hashref of
+metadata for that file.
+
+=cut
+
+sub getmeta
+{
+    my $self = shift;
+    my $filename = shift;
+    my $revision = shift;
+
+    my $db_query;
+    if ( defined($revision) and $revision =~ /^\d+$/ )
+    {
+        $db_query = $self->{dbh}->prepare_cached("SELECT * FROM revision WHERE name=? AND revision=?",{},1);
+        $db_query->execute($filename, $revision);
+    }
+    elsif ( defined($revision) and $revision =~ /^[a-zA-Z0-9]{40}$/ )
+    {
+        $db_query = $self->{dbh}->prepare_cached("SELECT * FROM revision WHERE name=? AND commithash=?",{},1);
+        $db_query->execute($filename, $revision);
+    } else {
+        $db_query = $self->{dbh}->prepare_cached("SELECT * FROM head WHERE name=?",{},1);
+        $db_query->execute($filename);
+    }
+
+    return $db_query->fetchrow_hashref;
+}
+
+=head2 commitmessage
+
+this function takes a commithash and returns the commit message for that commit
+
+=cut
+sub commitmessage
+{
+    my $self = shift;
+    my $commithash = shift;
+
+    die("Need commithash") unless ( defined($commithash) and $commithash =~ /^[a-zA-Z0-9]{40}$/ );
+
+    my $db_query;
+    $db_query = $self->{dbh}->prepare_cached("SELECT value FROM commitmsgs WHERE key=?",{},1);
+    $db_query->execute($commithash);
+
+    my ( $message ) = $db_query->fetchrow_array;
+
+    if ( defined ( $message ) )
+    {
+        $message .= " " if ( $message =~ /\n$/ );
+        return $message;
+    }
+
+    my @lines = safe_pipe_capture("git-cat-file", "commit", $commithash);
+    shift @lines while ( $lines[0] =~ /\S/ );
+    $message = join("",@lines);
+    $message .= " " if ( $message =~ /\n$/ );
+    return $message;
+}
+
+=head2 gethistory
+
+This function takes a filename (with path) argument and returns an arrayofarrays
+containing revision,filehash,commithash ordered by revision descending
+
+=cut
+sub gethistory
+{
+    my $self = shift;
+    my $filename = shift;
+
+    my $db_query;
+    $db_query = $self->{dbh}->prepare_cached("SELECT revision, filehash, commithash FROM revision WHERE name=? ORDER BY revision DESC",{},1);
+    $db_query->execute($filename);
+
+    return $db_query->fetchall_arrayref;
+}
+
+=head2 gethistorydense
+
+This function takes a filename (with path) argument and returns an arrayofarrays
+containing revision,filehash,commithash ordered by revision descending.
+
+This version of gethistory skips deleted entries -- so it is useful for annotate.
+The 'dense' part is a reference to a '--dense' option available for git-rev-list
+and other git tools that depend on it.
+
+=cut
+sub gethistorydense
+{
+    my $self = shift;
+    my $filename = shift;
+
+    my $db_query;
+    $db_query = $self->{dbh}->prepare_cached("SELECT revision, filehash, commithash FROM revision WHERE name=? AND filehash!='deleted' ORDER BY revision DESC",{},1);
+    $db_query->execute($filename);
+
+    return $db_query->fetchall_arrayref;
+}
+
+=head2 in_array()
+
+from Array::PAT - mimics the in_array() function
+found in PHP. Yuck but works for small arrays.
+
+=cut
+sub in_array
+{
+    my ($check, @array) = @_;
+    my $retval = 0;
+    foreach my $test (@array){
+        if($check eq $test){
+            $retval =  1;
+        }
+    }
+    return $retval;
+}
+
+=head2 safe_pipe_capture
+
+an alterative to `command` that allows input to be passed as an array
+to work around shell problems with weird characters in arguments
+
+=cut
+sub safe_pipe_capture {
+
+    my @output;
+
+    if (my $pid = open my $child, '-|') {
+        @output = (<$child>);
+        close $child or die join(' ',@_).": $! $?";
+    } else {
+        exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
+    }
+    return wantarray ? @output : join('',@output);
+}
+
+
+1;
-- 
cgit v0.10.2-6-g49f6


From d27d5b3c5b97ca30dfc5c448dc8cdae914131051 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Wed, 22 Feb 2006 03:47:20 -0800
Subject: gitview: ls-remote invocation shellquote safety.

This will allow you to point GIT_DIR at directories with funny names.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/contrib/gitview/gitview b/contrib/gitview/gitview
index 5c338c0..4b52eb7 100755
--- a/contrib/gitview/gitview
+++ b/contrib/gitview/gitview
@@ -454,11 +454,7 @@ class GitView:
 
 		self.bt_sha1 = { }
 		ls_remote = re.compile('^(.{40})\trefs/([^^]+)(?:\\^(..))?$');
-		git_dir = os.getenv("GIT_DIR")
-		if (git_dir == None):
-			git_dir = ".git"
-
-		fp = os.popen('git ls-remote ' + git_dir)
+		fp = os.popen('git ls-remote "${GIT_DIR-.git}"')
 		while 1:
 			line = string.strip(fp.readline())
 			if line == '':
-- 
cgit v0.10.2-6-g49f6


From 26125f6b9be8dac31f56bf3da60dfa0df6dc0b59 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Wed, 22 Feb 2006 11:16:38 -0800
Subject: detect broken alternates.

The real problem triggered an earlier fix was that an alternate
entry was pointing at a removed directory.  Complaining on
object/pack directory that cannot be opendir-ed produces noise
in an ancient repository that does not have object/pack
directory and has never been packed.

Detect the real user error and report it.  Also if opendir
failed for other reasons (e.g. no read permissions), report that
as well.

Spotted by Andrew Vasquez <andrew.vasquez@qlogic.com>.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/sha1_file.c b/sha1_file.c
index f08b1d6..c08da35 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -247,6 +247,7 @@ static void link_alt_odb_entries(const char *alt, const char *ep, int sep,
 		for ( ; cp < ep && *cp != sep; cp++)
 			;
 		if (last != cp) {
+			struct stat st;
 			struct alternate_object_database *alt;
 			/* 43 = 40-byte + 2 '/' + terminating NUL */
 			int pfxlen = cp - last;
@@ -269,9 +270,19 @@ static void link_alt_odb_entries(const char *alt, const char *ep, int sep,
 			}
 			else
 				memcpy(ent->base, last, pfxlen);
+
 			ent->name = ent->base + pfxlen + 1;
-			ent->base[pfxlen] = ent->base[pfxlen + 3] = '/';
-			ent->base[entlen-1] = 0;
+			ent->base[pfxlen + 3] = '/';
+			ent->base[pfxlen] = ent->base[entlen-1] = 0;
+
+			/* Detect cases where alternate disappeared */
+			if (stat(ent->base, &st) || !S_ISDIR(st.st_mode)) {
+				error("object directory %s does not exist; "
+				      "check .git/objects/info/alternates.",
+				      ent->base);
+				goto bad;
+			}
+			ent->base[pfxlen] = '/';
 
 			/* Prevent the common mistake of listing the same
 			 * thing twice, or object directory itself.
@@ -552,7 +563,9 @@ static void prepare_packed_git_one(char *objdir, int local)
 	len = strlen(path);
 	dir = opendir(path);
 	if (!dir) {
-		fprintf(stderr, "unable to open object pack directory: %s: %s\n", path, strerror(errno));
+		if (errno != ENOENT)
+			error("unable to open object pack directory: %s: %s\n",
+			      path, strerror(errno));
 		return;
 	}
 	path[len++] = '/';
-- 
cgit v0.10.2-6-g49f6


From 3f9ac8d259fb919e001671c5e403e5fceaabf0d8 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Wed, 15 Feb 2006 17:34:29 -0800
Subject: pack-objects: reuse data from existing packs.

When generating a new pack, notice if we have already needed
objects in existing packs.  If an object is stored deltified,
and its base object is also what we are going to pack, then
reuse the existing deltified representation unconditionally,
bypassing all the expensive find_deltas() and try_deltas()
calls.

Also, notice if what we are going to write out exactly match
what is already in an existing pack (either deltified or just
compressed).  In such a case, we can just copy it instead of
going through the usual uncompressing & recompressing cycle.

Without this patch, in linux-2.6 repository with about 1500
loose objects and a single mega pack:

    $ git-rev-list --objects v2.6.16-rc3 >RL
    $ wc -l RL
    184141 RL
    $ time git-pack-objects p <RL
    Generating pack...
    Done counting 184141 objects.
    Packing 184141 objects....................
    a1fc7b3e537fcb9b3c46b7505df859f0a11e79d2

    real    12m4.323s
    user    11m2.560s
    sys     0m55.950s

With this patch, the same input:

    $ time ../git.junio/git-pack-objects q <RL
    Generating pack...
    Done counting 184141 objects.
    Packing 184141 objects.....................
    a1fc7b3e537fcb9b3c46b7505df859f0a11e79d2
    Total 184141, written 184141, reused 182441

    real    1m2.608s
    user    0m55.090s
    sys     0m1.830s

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/pack-objects.c b/pack-objects.c
index c5a5e61..70fb2af 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -9,15 +9,31 @@ static const char pack_usage[] = "git-pack-objects [-q] [--non-empty] [--local]
 
 struct object_entry {
 	unsigned char sha1[20];
-	unsigned long size;
-	unsigned long offset;
-	unsigned int depth;
-	unsigned int hash;
+	unsigned long size;	/* uncompressed size */
+	unsigned long offset;	/* offset into the final pack file (nonzero if already written) */
+	unsigned int depth;	/* delta depth */
+	unsigned int hash;	/* name hint hash */
 	enum object_type type;
-	unsigned long delta_size;
-	struct object_entry *delta;
+	unsigned long delta_size;	/* delta data size (uncompressed) */
+	struct object_entry *delta;	/* delta base object */
+	struct packed_git *in_pack; 	/* already in pack */
+	enum object_type in_pack_type;	/* could be delta */
+	unsigned int in_pack_offset;
 };
 
+/*
+ * Objects we are going to pack are colected in objects array (dynamically
+ * expanded).  nr_objects & nr_alloc controls this array.  They are stored
+ * in the order we see -- typically rev-list --objects order that gives us
+ * nice "minimum seek" order.
+ *
+ * sorted-by-sha ans sorted-by-type are arrays of pointers that point at
+ * elements in the objects array.  The former is used to build the pack
+ * index (lists object names in the ascending order to help offset lookup),
+ * and the latter is used to group similar things together by try_delta()
+ * heuristics.
+ */
+
 static unsigned char object_list_sha1[20];
 static int non_empty = 0;
 static int local = 0;
@@ -29,6 +45,135 @@ static const char *base_name;
 static unsigned char pack_file_sha1[20];
 static int progress = 1;
 
+/*
+ * The object names in objects array are hashed with this hashtable,
+ * to help looking up the entry by object name.  Binary search from
+ * sorted_by_sha is also possible but this was easier to code and faster.
+ * This hashtable is built after all the objects are seen.
+ */
+static int *object_ix = NULL;
+static int object_ix_hashsz = 0;
+
+/*
+ * Pack index for existing packs give us easy access to the offsets into
+ * corresponding pack file where each object's data starts, but the entries
+ * do not store the size of the compressed representation (uncompressed
+ * size is easily available by examining the pack entry header).  We build
+ * a hashtable of existing packs (pack_revindex), and keep reverse index
+ * here -- pack index file is sorted by object name mapping to offset; this
+ * pack_revindex[].revindex array is an ordered list of offsets, so if you
+ * know the offset of an object, next offset is where its packed
+ * representation ends.
+ */
+struct pack_revindex {
+	struct packed_git *p;
+	unsigned long *revindex;
+} *pack_revindex = NULL;
+static int pack_revindex_hashsz = 0;
+
+/*
+ * stats
+ */
+static int written = 0;
+static int reused = 0;
+
+static int pack_revindex_ix(struct packed_git *p)
+{
+	unsigned int ui = (unsigned int) p;
+	int i;
+
+	ui = ui ^ (ui >> 16); /* defeat structure alignment */
+	i = (int)(ui % pack_revindex_hashsz);
+	while (pack_revindex[i].p) {
+		if (pack_revindex[i].p == p)
+			return i;
+		if (++i == pack_revindex_hashsz)
+			i = 0;
+	}
+	return -1 - i;
+}
+
+static void prepare_pack_ix(void)
+{
+	int num;
+	struct packed_git *p;
+	for (num = 0, p = packed_git; p; p = p->next)
+		num++;
+	if (!num)
+		return;
+	pack_revindex_hashsz = num * 11;
+	pack_revindex = xcalloc(sizeof(*pack_revindex), pack_revindex_hashsz);
+	for (p = packed_git; p; p = p->next) {
+		num = pack_revindex_ix(p);
+		num = - 1 - num;
+		pack_revindex[num].p = p;
+	}
+	/* revindex elements are lazily initialized */
+}
+
+static int cmp_offset(const void *a_, const void *b_)
+{
+	unsigned long a = *(unsigned long *) a_;
+	unsigned long b = *(unsigned long *) b_;
+	if (a < b)
+		return -1;
+	else if (a == b)
+		return 0;
+	else
+		return 1;
+}
+
+/*
+ * Ordered list of offsets of objects in the pack.
+ */
+static void prepare_pack_revindex(struct pack_revindex *rix)
+{
+	struct packed_git *p = rix->p;
+	int num_ent = num_packed_objects(p);
+	int i;
+	void *index = p->index_base + 256;
+
+	rix->revindex = xmalloc(sizeof(unsigned long) * (num_ent + 1));
+	for (i = 0; i < num_ent; i++) {
+		long hl = *((long *)(index + 24 * i));
+		rix->revindex[i] = ntohl(hl);
+	}
+	/* This knows the pack format -- the 20-byte trailer
+	 * follows immediately after the last object data.
+	 */
+	rix->revindex[num_ent] = p->pack_size - 20;
+	qsort(rix->revindex, num_ent, sizeof(unsigned long), cmp_offset);
+}
+
+static unsigned long find_packed_object_size(struct packed_git *p,
+					     unsigned long ofs)
+{
+	int num;
+	int lo, hi;
+	struct pack_revindex *rix;
+	unsigned long *revindex;
+	num = pack_revindex_ix(p);
+	if (num < 0)
+		die("internal error: pack revindex uninitialized");
+	rix = &pack_revindex[num];
+	if (!rix->revindex)
+		prepare_pack_revindex(rix);
+	revindex = rix->revindex;
+	lo = 0;
+	hi = num_packed_objects(p) + 1;
+	do {
+		int mi = (lo + hi) / 2;
+		if (revindex[mi] == ofs) {
+			return revindex[mi+1] - ofs;
+		}
+		else if (ofs < revindex[mi])
+			hi = mi;
+		else
+			lo = mi + 1;
+	} while (lo < hi);
+	die("internal error: pack revindex corrupt");
+}
+
 static void *delta_against(void *buf, unsigned long size, struct object_entry *entry)
 {
 	unsigned long othersize, delta_size;
@@ -78,35 +223,52 @@ static unsigned long write_object(struct sha1file *f, struct object_entry *entry
 {
 	unsigned long size;
 	char type[10];
-	void *buf = read_sha1_file(entry->sha1, type, &size);
+	void *buf;
 	unsigned char header[10];
 	unsigned hdrlen, datalen;
 	enum object_type obj_type;
 
-	if (!buf)
-		die("unable to read %s", sha1_to_hex(entry->sha1));
-	if (size != entry->size)
-		die("object %s size inconsistency (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
-
-	/*
-	 * The object header is a byte of 'type' followed by zero or
-	 * more bytes of length.  For deltas, the 20 bytes of delta sha1
-	 * follows that.
-	 */
 	obj_type = entry->type;
-	if (entry->delta) {
-		buf = delta_against(buf, size, entry);
-		size = entry->delta_size;
-		obj_type = OBJ_DELTA;
+	if (!entry->in_pack ||
+	    (obj_type != entry->in_pack_type)) {
+		buf = read_sha1_file(entry->sha1, type, &size);
+		if (!buf)
+			die("unable to read %s", sha1_to_hex(entry->sha1));
+		if (size != entry->size)
+			die("object %s size inconsistency (%lu vs %lu)",
+			    sha1_to_hex(entry->sha1), size, entry->size);
+		if (entry->delta) {
+			buf = delta_against(buf, size, entry);
+			size = entry->delta_size;
+			obj_type = OBJ_DELTA;
+		}
+		/*
+		 * The object header is a byte of 'type' followed by zero or
+		 * more bytes of length.  For deltas, the 20 bytes of delta
+		 * sha1 follows that.
+		 */
+		hdrlen = encode_header(obj_type, size, header);
+		sha1write(f, header, hdrlen);
+
+		if (entry->delta) {
+			sha1write(f, entry->delta, 20);
+			hdrlen += 20;
+		}
+		datalen = sha1write_compressed(f, buf, size);
+		free(buf);
 	}
-	hdrlen = encode_header(obj_type, size, header);
-	sha1write(f, header, hdrlen);
-	if (entry->delta) {
-		sha1write(f, entry->delta, 20);
-		hdrlen += 20;
+	else {
+		struct packed_git *p = entry->in_pack;
+		use_packed_git(p);
+
+		datalen = find_packed_object_size(p, entry->in_pack_offset);
+		buf = p->pack_base + entry->in_pack_offset;
+		sha1write(f, buf, datalen);
+		unuse_packed_git(p);
+		hdrlen = 0; /* not really */
+		reused++;
 	}
-	datalen = sha1write_compressed(f, buf, size);
-	free(buf);
+	written++;
 	return hdrlen + datalen;
 }
 
@@ -148,8 +310,6 @@ static void write_pack_file(void)
 		offset = write_one(f, objects + i, offset);
 
 	sha1close(f, pack_file_sha1, 1);
-	mb = offset >> 20;
-	offset &= 0xfffff;
 }
 
 static void write_index_file(void)
@@ -196,18 +356,21 @@ static int add_object_entry(unsigned char *sha1, unsigned int hash)
 {
 	unsigned int idx = nr_objects;
 	struct object_entry *entry;
-
-	if (incremental || local) {
-		struct packed_git *p;
-
-		for (p = packed_git; p; p = p->next) {
-			struct pack_entry e;
-
-			if (find_pack_entry_one(sha1, &e, p)) {
-				if (incremental)
-					return 0;
-				if (local && !p->pack_local)
-					return 0;
+	struct packed_git *p;
+	unsigned int found_offset;
+	struct packed_git *found_pack;
+
+	found_pack = NULL;
+	for (p = packed_git; p; p = p->next) {
+		struct pack_entry e;
+		if (find_pack_entry_one(sha1, &e, p)) {
+			if (incremental)
+				return 0;
+			if (local && !p->pack_local)
+				return 0;
+			if (!found_pack) {
+				found_offset = e.offset;
+				found_pack = e.p;
 			}
 		}
 	}
@@ -221,30 +384,107 @@ static int add_object_entry(unsigned char *sha1, unsigned int hash)
 	memset(entry, 0, sizeof(*entry));
 	memcpy(entry->sha1, sha1, 20);
 	entry->hash = hash;
+	if (found_pack) {
+		entry->in_pack = found_pack;
+		entry->in_pack_offset = found_offset;
+	}
 	nr_objects = idx+1;
 	return 1;
 }
 
+static int locate_object_entry_hash(unsigned char *sha1)
+{
+	int i;
+	unsigned int ui;
+	memcpy(&ui, sha1, sizeof(unsigned int));
+	i = ui % object_ix_hashsz;
+	while (0 < object_ix[i]) {
+		if (!memcmp(sha1, objects[object_ix[i]-1].sha1, 20))
+			return i;
+		if (++i == object_ix_hashsz)
+			i = 0;
+	}
+	return -1 - i;
+}
+
+static struct object_entry *locate_object_entry(unsigned char *sha1)
+{
+	int i = locate_object_entry_hash(sha1);
+	if (0 <= i)
+		return &objects[object_ix[i]-1];
+	return NULL;
+}
+
 static void check_object(struct object_entry *entry)
 {
 	char type[20];
 
-	if (!sha1_object_info(entry->sha1, type, &entry->size)) {
-		if (!strcmp(type, "commit")) {
-			entry->type = OBJ_COMMIT;
-		} else if (!strcmp(type, "tree")) {
-			entry->type = OBJ_TREE;
-		} else if (!strcmp(type, "blob")) {
-			entry->type = OBJ_BLOB;
-		} else if (!strcmp(type, "tag")) {
-			entry->type = OBJ_TAG;
-		} else
-			die("unable to pack object %s of type %s",
-			    sha1_to_hex(entry->sha1), type);
+	if (entry->in_pack) {
+		/* Check if it is delta, and the base is also an object
+		 * we are going to pack.  If so we will reuse the existing
+		 * delta.
+		 */
+		unsigned char base[20];
+		unsigned long size;
+		struct object_entry *base_entry;
+		if (!check_reuse_pack_delta(entry->in_pack,
+					    entry->in_pack_offset,
+					    base, &size,
+					    &entry->in_pack_type) &&
+		    (base_entry = locate_object_entry(base))) {
+			/* We do not know depth at this point, but it
+			 * does not matter.  Getting delta_chain_length
+			 * with packed_object_info_detail() is not so
+			 * expensive, so we could do that later if we
+			 * wanted to.  Calling sha1_object_info to get
+			 * the true size (and later an uncompressed
+			 * representation) of deeply deltified object
+			 * is quite expensive.
+			 */
+			entry->depth = 1;
+			/* uncompressed size */
+			entry->size = entry->delta_size = size;
+			entry->delta = base_entry;
+			entry->type = OBJ_DELTA;
+			return;
+		}
+		/* Otherwise we would do the usual */
 	}
-	else
+
+	if (sha1_object_info(entry->sha1, type, &entry->size))
 		die("unable to get type of object %s",
 		    sha1_to_hex(entry->sha1));
+
+	if (!strcmp(type, "commit")) {
+		entry->type = OBJ_COMMIT;
+	} else if (!strcmp(type, "tree")) {
+		entry->type = OBJ_TREE;
+	} else if (!strcmp(type, "blob")) {
+		entry->type = OBJ_BLOB;
+	} else if (!strcmp(type, "tag")) {
+		entry->type = OBJ_TAG;
+	} else
+		die("unable to pack object %s of type %s",
+		    sha1_to_hex(entry->sha1), type);
+}
+
+static void hash_objects(void)
+{
+	int i;
+	struct object_entry *oe;
+
+	object_ix_hashsz = nr_objects * 2;
+	object_ix = xcalloc(sizeof(int), object_ix_hashsz);
+	for (i = 0, oe = objects; i < nr_objects; i++, oe++) {
+		int ix = locate_object_entry_hash(oe->sha1);
+		if (0 <= ix) {
+			error("the same object '%s' added twice",
+			      sha1_to_hex(oe->sha1));
+			continue;
+		}
+		ix = -1 - ix;
+		object_ix[ix] = i + 1;
+	}
 }
 
 static void get_object_details(void)
@@ -252,6 +492,8 @@ static void get_object_details(void)
 	int i;
 	struct object_entry *entry = objects;
 
+	hash_objects();
+	prepare_pack_ix();
 	for (i = 0; i < nr_objects; i++)
 		check_object(entry++);
 }
@@ -382,6 +624,13 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 			eye_candy -= nr_objects / 20;
 			fputc('.', stderr);
 		}
+
+		if (entry->delta)
+			/* This happens if we decided to reuse existing
+			 * delta from a pack.
+			 */
+			continue;
+
 		free(n->data);
 		n->entry = entry;
 		n->data = read_sha1_file(entry->sha1, type, &size);
@@ -411,10 +660,12 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 
 static void prepare_pack(int window, int depth)
 {
-	get_object_details();
-
 	if (progress)
 		fprintf(stderr, "Packing %d objects", nr_objects);
+	get_object_details();
+	if (progress)
+		fprintf(stderr, ".");
+
 	sorted_by_type = create_sorted_list(type_size_sort);
 	if (window && depth)
 		find_deltas(sorted_by_type, window+1, depth);
@@ -599,5 +850,7 @@ int main(int argc, char **argv)
 			puts(sha1_to_hex(object_list_sha1));
 		}
 	}
+	fprintf(stderr, "Total %d, written %d, reused %d\n",
+		nr_objects, written, reused);
 	return 0;
 }
diff --git a/pack.h b/pack.h
index 9dafa2b..694e0c5 100644
--- a/pack.h
+++ b/pack.h
@@ -29,5 +29,7 @@ struct pack_header {
 };
 
 extern int verify_pack(struct packed_git *, int);
-
+extern int check_reuse_pack_delta(struct packed_git *, unsigned long,
+				  unsigned char *, unsigned long *,
+				  enum object_type *);
 #endif
diff --git a/sha1_file.c b/sha1_file.c
index c08da35..f4b1089 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -841,6 +841,25 @@ static unsigned long unpack_object_header(struct packed_git *p, unsigned long of
 	return offset;
 }
 
+int check_reuse_pack_delta(struct packed_git *p, unsigned long offset,
+			   unsigned char *base, unsigned long *sizep,
+			   enum object_type *kindp)
+{
+	unsigned long ptr;
+	int status = -1;
+
+	use_packed_git(p);
+	ptr = offset;
+	ptr = unpack_object_header(p, ptr, kindp, sizep);
+	if (*kindp != OBJ_DELTA)
+		goto done;
+	memcpy(base, p->pack_base + ptr, 20);
+	status = 0;
+ done:
+	unuse_packed_git(p);
+	return status;
+}
+
 void packed_object_info_detail(struct pack_entry *e,
 			       char *type,
 			       unsigned long *size,
-- 
cgit v0.10.2-6-g49f6


From ab7cd7bb8c02dc40ca3a909653e8f56226f9e440 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Thu, 16 Feb 2006 11:55:51 -0800
Subject: pack-objects: finishing touches.

This introduces --no-reuse-delta option to disable reusing of
existing delta, which is a large part of the optimization
introduced by this series.  This may become necessary if
repeated repacking makes delta chain too long.  With this, the
output of the command becomes identical to that of the older
implementation.  But the performance suffers greatly.

It still allows reusing non-deltified representations; there is
no point uncompressing and recompressing the whole text.

It also adds a couple more statistics output, while squelching
it under -q flag, which the last round forgot to do.

  $ time old-git-pack-objects --stdout >/dev/null <RL
  Generating pack...
  Done counting 184141 objects.
  Packing 184141 objects....................
  real    12m8.530s       user    11m1.450s       sys     0m57.920s
  $ time git-pack-objects --stdout >/dev/null <RL
  Generating pack...
  Done counting 184141 objects.
  Packing 184141 objects.....................
  Total 184141, written 184141 (delta 138297), reused 178833 (delta 134081)
  real    0m59.549s       user    0m56.670s       sys     0m2.400s
  $ time git-pack-objects --stdout --no-reuse-delta >/dev/null <RL
  Generating pack...
  Done counting 184141 objects.
  Packing 184141 objects.....................
  Total 184141, written 184141 (delta 134833), reused 47904 (delta 0)
  real    11m13.830s      user    9m45.240s       sys     0m44.330s

There is one remaining issue when --no-reuse-delta option is not
used.  It can create delta chains that are deeper than specified.

    A<--B<--C<--D   E   F   G

Suppose we have a delta chain A to D (A is stored in full either
in a pack or as a loose object. B is depth1 delta relative to A,
C is depth2 delta relative to B...) with loose objects E, F, G.
And we are going to pack all of them.

B, C and D are left as delta against A, B and C respectively.
So A, E, F, and G are examined for deltification, and let's say
we decided to keep E expanded, and store the rest as deltas like
this:

    E<--F<--G<--A

Oops.  We ended up making D a bit too deep, didn't we?  B, C and
D form a chain on top of A!

This is because we did not know what the final depth of A would
be, when we checked objects and decided to keep the existing
delta.  Unfortunately, deferring the decision until just before
the deltification is not an option.  To be able to make B, C,
and D candidates for deltification with the rest, we need to
know the type and final unexpanded size of them, but the major
part of the optimization comes from the fact that we do not read
the delta data to do so -- getting the final size is quite an
expensive operation.

To prevent this from happening, we should keep A from being
deltified.  But how would we tell that, cheaply?

To do this most precisely, after check_object() runs, each
object that is used as the base object of some existing delta
needs to be marked with the maximum depth of the objects we
decided to keep deltified (in this case, D is depth 3 relative
to A, so if no other delta chain that is longer than 3 based on
A exists, mark A with 3).  Then when attempting to deltify A, we
would take that number into account to see if the final delta
chain that leads to D becomes too deep.

However, this is a bit cumbersome to compute, so we would cheat
and reduce the maximum depth for A arbitrarily to depth/4 in
this implementation.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt
index 2d67d39..4cb2e83 100644
--- a/Documentation/git-pack-objects.txt
+++ b/Documentation/git-pack-objects.txt
@@ -8,7 +8,10 @@ git-pack-objects - Create a packed archive of objects.
 
 SYNOPSIS
 --------
-'git-pack-objects' [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list
+[verse]
+'git-pack-objects' [-q] [--no-reuse-delta] [--non-empty]
+	[--local] [--incremental] [--window=N] [--depth=N]
+	{--stdout | base-name} < object-list
 
 
 DESCRIPTION
@@ -32,6 +35,10 @@ Placing both in the pack/ subdirectory of $GIT_OBJECT_DIRECTORY (or
 any of the directories on $GIT_ALTERNATE_OBJECT_DIRECTORIES)
 enables git to read from such an archive.
 
+In a packed archive, an object is either stored as a compressed
+whole, or as a difference from some other object.  The latter is
+often called a delta.
+
 
 OPTIONS
 -------
@@ -74,6 +81,18 @@ base-name::
         Only create a packed archive if it would contain at
         least one object.
 
+-q::
+	This flag makes the command not to report its progress
+	on the standard error stream.
+
+--no-reuse-delta::
+	When creating a packed archive in a repository that
+	has existing packs, the command reuses existing deltas.
+	This sometimes results in a slightly suboptimal pack.
+	This flag tells the command not to reuse existing deltas
+	but compute them from scratch.
+
+
 Author
 ------
 Written by Linus Torvalds <torvalds@osdl.org>
diff --git a/pack-objects.c b/pack-objects.c
index 70fb2af..38e1c99 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -5,7 +5,7 @@
 #include "csum-file.h"
 #include <sys/time.h>
 
-static const char pack_usage[] = "git-pack-objects [-q] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list";
+static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list";
 
 struct object_entry {
 	unsigned char sha1[20];
@@ -14,10 +14,11 @@ struct object_entry {
 	unsigned int depth;	/* delta depth */
 	unsigned int hash;	/* name hint hash */
 	enum object_type type;
+	unsigned char edge;	/* reused delta chain points at this entry. */
+	enum object_type in_pack_type;	/* could be delta */
 	unsigned long delta_size;	/* delta data size (uncompressed) */
 	struct object_entry *delta;	/* delta base object */
 	struct packed_git *in_pack; 	/* already in pack */
-	enum object_type in_pack_type;	/* could be delta */
 	unsigned int in_pack_offset;
 };
 
@@ -36,6 +37,7 @@ struct object_entry {
 
 static unsigned char object_list_sha1[20];
 static int non_empty = 0;
+static int no_reuse_delta = 0;
 static int local = 0;
 static int incremental = 0;
 static struct object_entry **sorted_by_sha, **sorted_by_type;
@@ -75,7 +77,9 @@ static int pack_revindex_hashsz = 0;
  * stats
  */
 static int written = 0;
+static int written_delta = 0;
 static int reused = 0;
+static int reused_delta = 0;
 
 static int pack_revindex_ix(struct packed_git *p)
 {
@@ -227,10 +231,23 @@ static unsigned long write_object(struct sha1file *f, struct object_entry *entry
 	unsigned char header[10];
 	unsigned hdrlen, datalen;
 	enum object_type obj_type;
+	int to_reuse = 0;
 
 	obj_type = entry->type;
-	if (!entry->in_pack ||
-	    (obj_type != entry->in_pack_type)) {
+	if (! entry->in_pack)
+		to_reuse = 0;	/* can't reuse what we don't have */
+	else if (obj_type == OBJ_DELTA)
+		to_reuse = 1;	/* check_object() decided it for us */
+	else if (obj_type != entry->in_pack_type)
+		to_reuse = 0;	/* pack has delta which is unusable */
+	else if (entry->delta)
+		to_reuse = 0;	/* we want to pack afresh */
+	else
+		to_reuse = 1;	/* we have it in-pack undeltified,
+				 * and we do not need to deltify it.
+				 */
+
+	if (! to_reuse) {
 		buf = read_sha1_file(entry->sha1, type, &size);
 		if (!buf)
 			die("unable to read %s", sha1_to_hex(entry->sha1));
@@ -266,8 +283,12 @@ static unsigned long write_object(struct sha1file *f, struct object_entry *entry
 		sha1write(f, buf, datalen);
 		unuse_packed_git(p);
 		hdrlen = 0; /* not really */
+		if (obj_type == OBJ_DELTA)
+			reused_delta++;
 		reused++;
 	}
+	if (obj_type == OBJ_DELTA)
+		written_delta++;
 	written++;
 	return hdrlen + datalen;
 }
@@ -294,7 +315,6 @@ static void write_pack_file(void)
 	int i;
 	struct sha1file *f;
 	unsigned long offset;
-	unsigned long mb;
 	struct pack_header hdr;
 
 	if (!base_name)
@@ -357,10 +377,9 @@ static int add_object_entry(unsigned char *sha1, unsigned int hash)
 	unsigned int idx = nr_objects;
 	struct object_entry *entry;
 	struct packed_git *p;
-	unsigned int found_offset;
-	struct packed_git *found_pack;
+	unsigned int found_offset = 0;
+	struct packed_git *found_pack = NULL;
 
-	found_pack = NULL;
 	for (p = packed_git; p; p = p->next) {
 		struct pack_entry e;
 		if (find_pack_entry_one(sha1, &e, p)) {
@@ -420,32 +439,39 @@ static void check_object(struct object_entry *entry)
 	char type[20];
 
 	if (entry->in_pack) {
+		unsigned char base[20];
+		unsigned long size;
+		struct object_entry *base_entry;
+
+		/* We want in_pack_type even if we do not reuse delta.
+		 * There is no point not reusing non-delta representations.
+		 */
+		check_reuse_pack_delta(entry->in_pack,
+				       entry->in_pack_offset,
+				       base, &size,
+				       &entry->in_pack_type);
+
 		/* Check if it is delta, and the base is also an object
 		 * we are going to pack.  If so we will reuse the existing
 		 * delta.
 		 */
-		unsigned char base[20];
-		unsigned long size;
-		struct object_entry *base_entry;
-		if (!check_reuse_pack_delta(entry->in_pack,
-					    entry->in_pack_offset,
-					    base, &size,
-					    &entry->in_pack_type) &&
+		if (!no_reuse_delta &&
+		    entry->in_pack_type == OBJ_DELTA &&
 		    (base_entry = locate_object_entry(base))) {
-			/* We do not know depth at this point, but it
-			 * does not matter.  Getting delta_chain_length
-			 * with packed_object_info_detail() is not so
-			 * expensive, so we could do that later if we
-			 * wanted to.  Calling sha1_object_info to get
-			 * the true size (and later an uncompressed
-			 * representation) of deeply deltified object
-			 * is quite expensive.
+
+			/* Depth value does not matter - find_deltas()
+			 * will never consider reused delta as the
+			 * base object to deltify other objects
+			 * against, in order to avoid circular deltas.
 			 */
-			entry->depth = 1;
-			/* uncompressed size */
+
+			/* uncompressed size of the delta data */
 			entry->size = entry->delta_size = size;
 			entry->delta = base_entry;
 			entry->type = OBJ_DELTA;
+
+			base_entry->edge = 1;
+
 			return;
 		}
 		/* Otherwise we would do the usual */
@@ -568,6 +594,13 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
 	if (cur_entry->type != old_entry->type)
 		return -1;
 
+	/* If the current object is at edge, take the depth the objects
+	 * that depend on the current object into account -- otherwise
+	 * they would become too deep.
+	 */
+	if (cur_entry->edge)
+		max_depth /= 4;
+
 	size = cur_entry->size;
 	if (size < 50)
 		return -1;
@@ -627,7 +660,7 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 
 		if (entry->delta)
 			/* This happens if we decided to reuse existing
-			 * delta from a pack.
+			 * delta from a pack.  "!no_reuse_delta &&" is implied.
 			 */
 			continue;
 
@@ -636,6 +669,7 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 		n->data = read_sha1_file(entry->sha1, type, &size);
 		if (size != entry->size)
 			die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
+
 		j = window;
 		while (--j > 0) {
 			unsigned int other_idx = idx + j;
@@ -664,7 +698,7 @@ static void prepare_pack(int window, int depth)
 		fprintf(stderr, "Packing %d objects", nr_objects);
 	get_object_details();
 	if (progress)
-		fprintf(stderr, ".");
+		fputc('.', stderr);
 
 	sorted_by_type = create_sorted_list(type_size_sort);
 	if (window && depth)
@@ -694,8 +728,9 @@ static int reuse_cached_pack(unsigned char *sha1, int pack_to_stdout)
 		}
 	}
 
-	fprintf(stderr, "Reusing %d objects pack %s\n", nr_objects,
-		sha1_to_hex(sha1));
+	if (progress)
+		fprintf(stderr, "Reusing %d objects pack %s\n", nr_objects,
+			sha1_to_hex(sha1));
 
 	if (pack_to_stdout) {
 		if (copy_fd(ifd, 1))
@@ -775,6 +810,10 @@ int main(int argc, char **argv)
 				progress = 0;
 				continue;
 			}
+			if (!strcmp("--no-reuse-delta", arg)) {
+				no_reuse_delta = 1;
+				continue;
+			}
 			if (!strcmp("--stdout", arg)) {
 				pack_to_stdout = 1;
 				continue;
@@ -850,7 +889,8 @@ int main(int argc, char **argv)
 			puts(sha1_to_hex(object_list_sha1));
 		}
 	}
-	fprintf(stderr, "Total %d, written %d, reused %d\n",
-		nr_objects, written, reused);
+	if (progress)
+		fprintf(stderr, "Total %d, written %d (delta %d), reused %d (delta %d)\n",
+			nr_objects, written, written_delta, reused, reused_delta);
 	return 0;
 }
-- 
cgit v0.10.2-6-g49f6


From 4181bda1567fc749623eac1c81c2b07c882d63b3 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Thu, 16 Feb 2006 11:57:18 -0800
Subject: git-repack: allow passing a couple of flags to pack-objects.

A new flag -q makes underlying pack-objects less chatty.
A new flag -f forces delta to be recomputed from scratch.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt
index 9060fe8..6c0f792 100644
--- a/Documentation/git-repack.txt
+++ b/Documentation/git-repack.txt
@@ -9,7 +9,7 @@ objects into pack files.
 
 SYNOPSIS
 --------
-'git-repack' [-a] [-d] [-l] [-n]
+'git-repack' [-a] [-d] [-f] [-l] [-n] [-q]
 
 DESCRIPTION
 -----------
@@ -43,6 +43,14 @@ OPTIONS
         Pass the `--local` option to `git pack-objects`, see
         gitlink:git-pack-objects[1].
 
+-f::
+        Pass the `--no-reuse-delta` option to `git pack-objects`, see
+        gitlink:git-pack-objects[1].
+
+-q::
+        Pass the `-q` option to `git pack-objects`, see
+        gitlink:git-pack-objects[1].
+
 -n::
         Do not update the server information with
         `git update-server-info`.
diff --git a/git-repack.sh b/git-repack.sh
index 1fafb6e..3d6fec1 100755
--- a/git-repack.sh
+++ b/git-repack.sh
@@ -3,17 +3,20 @@
 # Copyright (c) 2005 Linus Torvalds
 #
 
-USAGE='[-a] [-d] [-l] [-n]'
+USAGE='[-a] [-d] [-f] [-l] [-n] [-q]'
 . git-sh-setup
 	
-no_update_info= all_into_one= remove_redundant= local=
+no_update_info= all_into_one= remove_redundant=
+local= quiet= no_reuse_delta=
 while case "$#" in 0) break ;; esac
 do
 	case "$1" in
 	-n)	no_update_info=t ;;
 	-a)	all_into_one=t ;;
 	-d)	remove_redundant=t ;;
-	-l)	local=t ;;
+	-q)	quiet=-q ;;
+	-f)	no_reuse_delta=--no-reuse-delta ;;
+	-l)	local=--local ;;
 	*)	usage ;;
 	esac
 	shift
@@ -39,9 +42,7 @@ case ",$all_into_one," in
 	    find . -type f \( -name '*.pack' -o -name '*.idx' \) -print`
 	;;
 esac
-if [ "$local" ]; then
-	pack_objects="$pack_objects --local"
-fi
+pack_objects="$pack_objects $local $quiet $no_reuse_delta"
 name=$(git-rev-list --objects $rev_list $(git-rev-parse $rev_parse) 2>&1 |
 	git-pack-objects --non-empty $pack_objects .tmp-pack) ||
 	exit 1
-- 
cgit v0.10.2-6-g49f6


From 15b4d577ae2e0117b7b5a4add2217442a8458812 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Fri, 17 Feb 2006 20:58:45 -0800
Subject: pack-objects: avoid delta chains that are too long.

This tries to rework the solution for the excess delta chain
problem. An earlier commit worked it around ``cheaply'', but
repeated repacking risks unbound growth of delta chains.

This version counts the length of delta chain we are reusing
from the existing pack, and makes sure a base object that has
sufficiently long delta chain does not get deltified.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/pack-objects.c b/pack-objects.c
index 38e1c99..0c9f4c9 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -10,16 +10,22 @@ static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--no
 struct object_entry {
 	unsigned char sha1[20];
 	unsigned long size;	/* uncompressed size */
-	unsigned long offset;	/* offset into the final pack file (nonzero if already written) */
+	unsigned long offset;	/* offset into the final pack file;
+				 * nonzero if already written.
+				 */
 	unsigned int depth;	/* delta depth */
+	unsigned int delta_limit;	/* base adjustment for in-pack delta */
 	unsigned int hash;	/* name hint hash */
 	enum object_type type;
-	unsigned char edge;	/* reused delta chain points at this entry. */
 	enum object_type in_pack_type;	/* could be delta */
 	unsigned long delta_size;	/* delta data size (uncompressed) */
 	struct object_entry *delta;	/* delta base object */
 	struct packed_git *in_pack; 	/* already in pack */
 	unsigned int in_pack_offset;
+	struct object_entry *delta_child; /* delitified objects who bases me */
+	struct object_entry *delta_sibling; /* other deltified objects who
+					     * uses the same base as me
+					     */
 };
 
 /*
@@ -470,7 +476,8 @@ static void check_object(struct object_entry *entry)
 			entry->delta = base_entry;
 			entry->type = OBJ_DELTA;
 
-			base_entry->edge = 1;
+			entry->delta_sibling = base_entry->delta_child;
+			base_entry->delta_child = entry;
 
 			return;
 		}
@@ -513,15 +520,32 @@ static void hash_objects(void)
 	}
 }
 
+static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
+{
+	struct object_entry *child = me->delta_child;
+	unsigned int m = n;
+	while (child) {
+		unsigned int c = check_delta_limit(child, n + 1);
+		if (m < c)
+			m = c;
+		child = child->delta_sibling;
+	}
+	return m;
+}
+
 static void get_object_details(void)
 {
 	int i;
-	struct object_entry *entry = objects;
+	struct object_entry *entry;
 
 	hash_objects();
 	prepare_pack_ix();
-	for (i = 0; i < nr_objects; i++)
-		check_object(entry++);
+	for (i = 0, entry = objects; i < nr_objects; i++, entry++)
+		check_object(entry);
+	for (i = 0, entry = objects; i < nr_objects; i++, entry++)
+		if (!entry->delta && entry->delta_child)
+			entry->delta_limit =
+				check_delta_limit(entry, 1);
 }
 
 typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *);
@@ -598,8 +622,11 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
 	 * that depend on the current object into account -- otherwise
 	 * they would become too deep.
 	 */
-	if (cur_entry->edge)
-		max_depth /= 4;
+	if (cur_entry->delta_child) {
+		if (max_depth <= cur_entry->delta_limit)
+			return 0;
+		max_depth -= cur_entry->delta_limit;
+	}
 
 	size = cur_entry->size;
 	if (size < 50)
-- 
cgit v0.10.2-6-g49f6


From 2fb4a21074cdd62ecdbb1fd491f743483275e2a4 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sat, 18 Feb 2006 01:20:06 -0800
Subject: Make "empty ident" error message a bit more helpful.

It appears that some people who did not care about having bogus
names in their own commit messages are bitten by the recent
change to require a sane environment [*1*].

While it was a good idea to prevent people from using bogus
names to create commits and doing sign-offs, the error message
is not very informative.  This patch attempts to warn things
upfront and hint people how to fix their environments.

[Footnote]

*1* The thread is this one.

    http://marc.theaimsgroup.com/?t=113868084800004

    Especially this message.

    http://marc.theaimsgroup.com/?m=113932830015032

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/ident.c b/ident.c
index 23b8cfc..09d4d71 100644
--- a/ident.c
+++ b/ident.c
@@ -46,6 +46,15 @@ static void copy_gecos(struct passwd *w, char *name, int sz)
 
 }
 
+static const char au_env[] = "GIT_AUTHOR_NAME";
+static const char co_env[] = "GIT_COMMITTER_NAME";
+static const char env_hint[] =
+"\n*** Environment problem:\n"
+"*** Your name cannot be determined from your system services (gecos).\n"
+"*** You would need to set %s and %s\n"
+"*** environment variables; otherwise you won't be able to perform\n"
+"*** certain operations because of \"empty ident\" errors.\n\n";
+
 int setup_ident(void)
 {
 	int len;
@@ -57,6 +66,11 @@ int setup_ident(void)
 	/* Get the name ("gecos") */
 	copy_gecos(pw, git_default_name, sizeof(git_default_name));
 
+	if (!*git_default_name) {
+		if (!getenv(au_env) || !getenv(co_env))
+			fprintf(stderr, env_hint, au_env, co_env);
+	}
+
 	/* Make up a fake email address (name + '@' + hostname [+ '.' + domainname]) */
 	len = strlen(pw->pw_name);
 	if (len > sizeof(git_default_email)/2)
-- 
cgit v0.10.2-6-g49f6


From 589e4f93c7fc31d73da3d0764c71d939c9332442 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sat, 18 Feb 2006 20:31:05 -0800
Subject: Delay "empty ident" errors until they really matter.

Previous one warned people upfront to encourage fixing their
environment early, but some people just use repositories and git
tools read-only without making any changes, and in such a case
there is not much point insisting on them having a usable ident.

This round attempts to move the error until either "git-var"
asks for the ident explicitly or "commit-tree" wants to use it.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/cache.h b/cache.h
index c255421..9f4adf5 100644
--- a/cache.h
+++ b/cache.h
@@ -246,8 +246,8 @@ void datestamp(char *buf, int bufsize);
 unsigned long approxidate(const char *);
 
 extern int setup_ident(void);
-extern const char *git_author_info(void);
-extern const char *git_committer_info(void);
+extern const char *git_author_info(int);
+extern const char *git_committer_info(int);
 
 struct checkout {
 	const char *base_dir;
diff --git a/commit-tree.c b/commit-tree.c
index b1c8dca..88871b0 100644
--- a/commit-tree.c
+++ b/commit-tree.c
@@ -118,8 +118,8 @@ int main(int argc, char **argv)
 		add_buffer(&buffer, &size, "parent %s\n", sha1_to_hex(parent_sha1[i]));
 
 	/* Person/date information */
-	add_buffer(&buffer, &size, "author %s\n", git_author_info());
-	add_buffer(&buffer, &size, "committer %s\n\n", git_committer_info());
+	add_buffer(&buffer, &size, "author %s\n", git_author_info(1));
+	add_buffer(&buffer, &size, "committer %s\n\n", git_committer_info(1));
 
 	/* And add the comment */
 	while (fgets(comment, sizeof(comment), stdin) != NULL)
diff --git a/ident.c b/ident.c
index 09d4d71..7c81fe8 100644
--- a/ident.c
+++ b/ident.c
@@ -46,15 +46,6 @@ static void copy_gecos(struct passwd *w, char *name, int sz)
 
 }
 
-static const char au_env[] = "GIT_AUTHOR_NAME";
-static const char co_env[] = "GIT_COMMITTER_NAME";
-static const char env_hint[] =
-"\n*** Environment problem:\n"
-"*** Your name cannot be determined from your system services (gecos).\n"
-"*** You would need to set %s and %s\n"
-"*** environment variables; otherwise you won't be able to perform\n"
-"*** certain operations because of \"empty ident\" errors.\n\n";
-
 int setup_ident(void)
 {
 	int len;
@@ -66,11 +57,6 @@ int setup_ident(void)
 	/* Get the name ("gecos") */
 	copy_gecos(pw, git_default_name, sizeof(git_default_name));
 
-	if (!*git_default_name) {
-		if (!getenv(au_env) || !getenv(co_env))
-			fprintf(stderr, env_hint, au_env, co_env);
-	}
-
 	/* Make up a fake email address (name + '@' + hostname [+ '.' + domainname]) */
 	len = strlen(pw->pw_name);
 	if (len > sizeof(git_default_email)/2)
@@ -170,8 +156,18 @@ static int copy(char *buf, int size, int offset, const char *src)
 	return offset;
 }
 
+static const char au_env[] = "GIT_AUTHOR_NAME";
+static const char co_env[] = "GIT_COMMITTER_NAME";
+static const char *env_hint =
+"\n*** Environment problem:\n"
+"*** Your name cannot be determined from your system services (gecos).\n"
+"*** You would need to set %s and %s\n"
+"*** environment variables; otherwise you won't be able to perform\n"
+"*** certain operations because of \"empty ident\" errors.\n"
+"*** Alternatively, you can use user.name configuration variable.\n\n";
+
 static const char *get_ident(const char *name, const char *email,
-			     const char *date_str)
+			     const char *date_str, int error_on_no_name)
 {
 	static char buffer[1000];
 	char date[50];
@@ -182,9 +178,14 @@ static const char *get_ident(const char *name, const char *email,
 	if (!email)
 		email = git_default_email;
 
-	if (!*name || !*email)
-		die("empty ident %s <%s> not allowed",
-		    name, email);
+	if (!*name) {
+		if (name == git_default_name && env_hint) {
+			fprintf(stderr, env_hint, au_env, co_env);
+			env_hint = NULL; /* warn only once, for "git-var -l" */
+		}
+		if (error_on_no_name)
+			die("empty ident %s <%s> not allowed", name, email);
+	}
 
 	strcpy(date, git_default_date);
 	if (date_str)
@@ -201,16 +202,18 @@ static const char *get_ident(const char *name, const char *email,
 	return buffer;
 }
 
-const char *git_author_info(void)
+const char *git_author_info(int error_on_no_name)
 {
 	return get_ident(getenv("GIT_AUTHOR_NAME"),
 			 getenv("GIT_AUTHOR_EMAIL"),
-			 getenv("GIT_AUTHOR_DATE"));
+			 getenv("GIT_AUTHOR_DATE"),
+			 error_on_no_name);
 }
 
-const char *git_committer_info(void)
+const char *git_committer_info(int error_on_no_name)
 {
 	return get_ident(getenv("GIT_COMMITTER_NAME"),
 			 getenv("GIT_COMMITTER_EMAIL"),
-			 getenv("GIT_COMMITTER_DATE"));
+			 getenv("GIT_COMMITTER_DATE"),
+			 error_on_no_name);
 }
diff --git a/var.c b/var.c
index 59da56d..a57a33b 100644
--- a/var.c
+++ b/var.c
@@ -12,7 +12,7 @@ static const char var_usage[] = "git-var [-l | <variable>]";
 
 struct git_var {
 	const char *name;
-	const char *(*read)(void);
+	const char *(*read)(int);
 };
 static struct git_var git_vars[] = {
 	{ "GIT_COMMITTER_IDENT", git_committer_info },
@@ -24,7 +24,7 @@ static void list_vars(void)
 {
 	struct git_var *ptr;
 	for(ptr = git_vars; ptr->read; ptr++) {
-		printf("%s=%s\n", ptr->name, ptr->read());
+		printf("%s=%s\n", ptr->name, ptr->read(0));
 	}
 }
 
@@ -35,7 +35,7 @@ static const char *read_var(const char *var)
 	val = NULL;
 	for(ptr = git_vars; ptr->read; ptr++) {
 		if (strcmp(var, ptr->name) == 0) {
-			val = ptr->read();
+			val = ptr->read(1);
 			break;
 		}
 	}
-- 
cgit v0.10.2-6-g49f6


From d64e6b04291e2313343866a6b206caf13313f1f9 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sat, 18 Feb 2006 20:51:26 -0800
Subject: Keep Porcelainish from failing by broken ident after making changes.

"empty ident not allowed" error makes commit-tree fail, so we
are already safer in that we would not end up with commit
objects that have bogus names on the author or committer fields.
However, before commit-tree is called there are already changes
made to the index file and the working tree.  The operation can
be resumed after fixing the environment problem, but when this
triggers to a newcomer with unusable gecos, the first question
becomes "what did I lose and how would I recover".

This patch modifies some Porcelainish commands to verify
GIT_COMMITTER_IDENT as soon as we know we are going to make some
commits before doing much damage to prevent confusion.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/git-am.sh b/git-am.sh
index 98b9215..85ecada 100755
--- a/git-am.sh
+++ b/git-am.sh
@@ -1,11 +1,13 @@
 #!/bin/sh
 #
-#
+# Copyright (c) 2005, 2006 Junio C Hamano
 
 USAGE='[--signoff] [--dotest=<dir>] [--utf8] [--binary] [--3way] <mbox>
   or, when resuming [--skip | --resolved]'
 . git-sh-setup
 
+git var GIT_COMMITTER_IDENT >/dev/null || exit
+
 stop_here () {
     echo "$1" >"$dotest/next"
     exit 1
diff --git a/git-applymbox.sh b/git-applymbox.sh
index 61c8c02..5569fdc 100755
--- a/git-applymbox.sh
+++ b/git-applymbox.sh
@@ -21,6 +21,8 @@
 USAGE='[-u] [-k] [-q] [-m] (-c .dotest/<num> | mbox) [signoff]'
 . git-sh-setup
 
+git var GIT_COMMITTER_IDENT >/dev/null || exit
+
 keep_subject= query_apply= continue= utf8= resume=t
 while case "$#" in 0) break ;; esac
 do
diff --git a/git-merge.sh b/git-merge.sh
index 74f0761..2b4a603 100755
--- a/git-merge.sh
+++ b/git-merge.sh
@@ -142,6 +142,8 @@ case "$#,$common,$no_commit" in
 1,*,)
 	# We are not doing octopus, not fast forward, and have only
 	# one common.  See if it is really trivial.
+	git var GIT_COMMITTER_IDENT >/dev/null || exit
+
 	echo "Trying really trivial in-index merge..."
 	git-update-index --refresh 2>/dev/null
 	if git-read-tree --trivial -m -u $common $head "$1" &&
@@ -179,6 +181,9 @@ case "$#,$common,$no_commit" in
 	;;
 esac
 
+# We are going to make a new commit.
+git var GIT_COMMITTER_IDENT >/dev/null || exit
+
 case "$use_strategies" in
 '')
 	case "$#" in
diff --git a/git-resolve.sh b/git-resolve.sh
index 9263070..b53ede8 100755
--- a/git-resolve.sh
+++ b/git-resolve.sh
@@ -50,6 +50,9 @@ case "$common" in
 	;;
 esac
 
+# We are going to make a new commit.
+git var GIT_COMMITTER_IDENT >/dev/null || exit
+
 # Find an optimum merge base if there are more than one candidates.
 LF='
 '
-- 
cgit v0.10.2-6-g49f6


From b2504a0d2ff5a51feb516f7732beb9549b5db454 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Wed, 22 Feb 2006 16:00:08 -0500
Subject: nicer eye candies for pack-objects

This provides a stable and simpler progress reporting mechanism that
updates progress as often as possible but accurately not updating more
than once a second.  The deltification phase is also made more
interesting to watch (since repacking a big repository and only seeing a
dot appear once every many seconds is rather boring and doesn't provide
much food for anticipation).

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/pack-objects.c b/pack-objects.c
index 0c9f4c9..5e1e14c 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -4,6 +4,7 @@
 #include "pack.h"
 #include "csum-file.h"
 #include <sys/time.h>
+#include <signal.h>
 
 static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list";
 
@@ -661,17 +662,22 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
 	return 0;
 }
 
+static volatile int progress_update = 0;
+static void progress_interval(int signum)
+{
+	signal(SIGALRM, progress_interval);
+	progress_update = 1;
+}
+
 static void find_deltas(struct object_entry **list, int window, int depth)
 {
 	int i, idx;
 	unsigned int array_size = window * sizeof(struct unpacked);
 	struct unpacked *array = xmalloc(array_size);
-	int eye_candy;
 
 	memset(array, 0, array_size);
 	i = nr_objects;
 	idx = 0;
-	eye_candy = i - (nr_objects / 20);
 
 	while (--i >= 0) {
 		struct object_entry *entry = list[i];
@@ -680,9 +686,10 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 		char type[10];
 		int j;
 
-		if (progress && i <= eye_candy) {
-			eye_candy -= nr_objects / 20;
-			fputc('.', stderr);
+		if (progress_update || i == 0) {
+			fprintf(stderr, "Deltifying (%d %d%%)\r",
+				nr_objects-i, (nr_objects-i) * 100/nr_objects);
+			progress_update = 0;
 		}
 
 		if (entry->delta)
@@ -714,6 +721,9 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 			idx = 0;
 	}
 
+	if (progress)
+		fputc('\n', stderr);
+
 	for (i = 0; i < window; ++i)
 		free(array[i].data);
 	free(array);
@@ -721,17 +731,10 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 
 static void prepare_pack(int window, int depth)
 {
-	if (progress)
-		fprintf(stderr, "Packing %d objects", nr_objects);
 	get_object_details();
-	if (progress)
-		fputc('.', stderr);
-
 	sorted_by_type = create_sorted_list(type_size_sort);
 	if (window && depth)
 		find_deltas(sorted_by_type, window+1, depth);
-	if (progress)
-		fputc('\n', stderr);
 	write_pack_file();
 }
 
@@ -796,10 +799,6 @@ int main(int argc, char **argv)
 	int window = 10, depth = 10, pack_to_stdout = 0;
 	struct object_entry **list;
 	int i;
-	struct timeval prev_tv;
-	int eye_candy = 0;
-	int eye_candy_incr = 500;
-
 
 	setup_git_directory();
 
@@ -856,30 +855,25 @@ int main(int argc, char **argv)
 		usage(pack_usage);
 
 	prepare_packed_git();
+
 	if (progress) {
+		struct itimerval v;
+		v.it_interval.tv_sec = 1;
+		v.it_interval.tv_usec = 0;
+		v.it_value = v.it_interval;
+		signal(SIGALRM, progress_interval);
+		setitimer(ITIMER_REAL, &v, NULL);
 		fprintf(stderr, "Generating pack...\n");
-		gettimeofday(&prev_tv, NULL);
 	}
+
 	while (fgets(line, sizeof(line), stdin) != NULL) {
 		unsigned int hash;
 		char *p;
 		unsigned char sha1[20];
 
-		if (progress && (eye_candy <= nr_objects)) {
+		if (progress_update) {
 			fprintf(stderr, "Counting objects...%d\r", nr_objects);
-			if (eye_candy && (50 <= eye_candy_incr)) {
-				struct timeval tv;
-				int time_diff;
-				gettimeofday(&tv, NULL);
-				time_diff = (tv.tv_sec - prev_tv.tv_sec);
-				time_diff <<= 10;
-				time_diff += (tv.tv_usec - prev_tv.tv_usec);
-				if ((1 << 9) < time_diff)
-					eye_candy_incr += 50;
-				else if (50 < eye_candy_incr)
-					eye_candy_incr -= 50;
-			}
-			eye_candy += eye_candy_incr;
+			progress_update = 0;
 		}
 		if (get_sha1_hex(line, sha1))
 			die("expected sha1, got garbage:\n %s", line);
-- 
cgit v0.10.2-6-g49f6


From 5e8dc750ee56d8c295ecd7478a6bd5d148cb7177 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Wed, 22 Feb 2006 17:41:32 -0500
Subject: also adds progress when actually writing a pack

If that pack is big, it takes significant time to write and might
benefit from some more eye candies as well.  This is however disabled
when the pack is written to stdout since in that case the output is
usually piped into unpack_objects which already does its own progress
reporting.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/pack-objects.c b/pack-objects.c
index 5e1e14c..dc928b3 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -53,6 +53,7 @@ static int nr_objects = 0, nr_alloc = 0;
 static const char *base_name;
 static unsigned char pack_file_sha1[20];
 static int progress = 1;
+static volatile int progress_update = 0;
 
 /*
  * The object names in objects array are hashed with this hashtable,
@@ -333,8 +334,14 @@ static void write_pack_file(void)
 	hdr.hdr_entries = htonl(nr_objects);
 	sha1write(f, &hdr, sizeof(hdr));
 	offset = sizeof(hdr);
-	for (i = 0; i < nr_objects; i++)
+	for (i = 0; i < nr_objects; i++) {
 		offset = write_one(f, objects + i, offset);
+		if (progress_update) {
+			fprintf(stderr, "Writing (%d %d%%)\r",
+				i+1, (i+1) * 100/nr_objects);
+			progress_update = 0;
+		}
+	}
 
 	sha1close(f, pack_file_sha1, 1);
 }
@@ -662,7 +669,6 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
 	return 0;
 }
 
-static volatile int progress_update = 0;
 static void progress_interval(int signum)
 {
 	signal(SIGALRM, progress_interval);
@@ -735,7 +741,6 @@ static void prepare_pack(int window, int depth)
 	sorted_by_type = create_sorted_list(type_size_sort);
 	if (window && depth)
 		find_deltas(sorted_by_type, window+1, depth);
-	write_pack_file();
 }
 
 static int reuse_cached_pack(unsigned char *sha1, int pack_to_stdout)
@@ -905,6 +910,14 @@ int main(int argc, char **argv)
 		;
 	else {
 		prepare_pack(window, depth);
+		if (progress && pack_to_stdout) {
+			/* the other end usually displays progress itself */
+			struct itimerval v = {{0,},};
+			setitimer(ITIMER_REAL, &v, NULL);
+			signal(SIGALRM, SIG_IGN );
+			progress_update = 0;
+		}
+		write_pack_file();
 		if (!pack_to_stdout) {
 			write_index_file();
 			puts(sha1_to_hex(object_list_sha1));
-- 
cgit v0.10.2-6-g49f6


From 183bdb2cccff792f11fd9e825df67af446aff171 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Wed, 22 Feb 2006 16:02:59 -0800
Subject: pack-objects eye-candy: finishing touches.

This updates the progress output to match "every one second or
every percent whichever comes early" used by unpack-objects, as
discussed on the list.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/pack-objects.c b/pack-objects.c
index dc928b3..8f352aa 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -324,11 +324,19 @@ static void write_pack_file(void)
 	struct sha1file *f;
 	unsigned long offset;
 	struct pack_header hdr;
+	unsigned last_percent = 999;
+	int do_progress = 0;
 
 	if (!base_name)
 		f = sha1fd(1, "<stdout>");
-	else
-		f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "pack");
+	else {
+		f = sha1create("%s-%s.%s", base_name,
+			       sha1_to_hex(object_list_sha1), "pack");
+		do_progress = progress;
+	}
+	if (do_progress)
+		fprintf(stderr, "Writing %d objects.\n", nr_objects);
+
 	hdr.hdr_signature = htonl(PACK_SIGNATURE);
 	hdr.hdr_version = htonl(PACK_VERSION);
 	hdr.hdr_entries = htonl(nr_objects);
@@ -336,12 +344,18 @@ static void write_pack_file(void)
 	offset = sizeof(hdr);
 	for (i = 0; i < nr_objects; i++) {
 		offset = write_one(f, objects + i, offset);
-		if (progress_update) {
-			fprintf(stderr, "Writing (%d %d%%)\r",
-				i+1, (i+1) * 100/nr_objects);
-			progress_update = 0;
+		if (do_progress) {
+			unsigned percent = written * 100 / nr_objects;
+			if (progress_update || percent != last_percent) {
+				fprintf(stderr, "%4u%% (%u/%u) done\r",
+					percent, written, nr_objects);
+				progress_update = 0;
+				last_percent = percent;
+			}
 		}
 	}
+	if (do_progress)
+		fputc('\n', stderr);
 
 	sha1close(f, pack_file_sha1, 1);
 }
@@ -680,10 +694,14 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 	int i, idx;
 	unsigned int array_size = window * sizeof(struct unpacked);
 	struct unpacked *array = xmalloc(array_size);
+	unsigned processed = 0;
+	unsigned last_percent = 999;
 
 	memset(array, 0, array_size);
 	i = nr_objects;
 	idx = 0;
+	if (progress)
+		fprintf(stderr, "Deltifying %d objects.\n", nr_objects);
 
 	while (--i >= 0) {
 		struct object_entry *entry = list[i];
@@ -692,10 +710,15 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 		char type[10];
 		int j;
 
-		if (progress_update || i == 0) {
-			fprintf(stderr, "Deltifying (%d %d%%)\r",
-				nr_objects-i, (nr_objects-i) * 100/nr_objects);
-			progress_update = 0;
+		processed++;
+		if (progress) {
+			unsigned percent = processed * 100 / nr_objects;
+			if (percent != last_percent || progress_update) {
+				fprintf(stderr, "%4u%% (%u/%u) done\r",
+					percent, processed, nr_objects);
+				progress_update = 0;
+				last_percent = percent;
+			}
 		}
 
 		if (entry->delta)
-- 
cgit v0.10.2-6-g49f6


From 6dc78e696b8597204b903073da932fc5ed0f419e Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Wed, 22 Feb 2006 13:10:37 -0800
Subject: git-fetch: follow tag only when tracking remote branch.

Unless --no-tags flag was given, git-fetch tried to always
follow remote tags that point at the commits we picked up.

It is not very useful to pick up tags from remote unless storing
the fetched branch head in a local tracking branch.  This is
especially true if the fetch is done to merge the remote branch
into our current branch as one-shot basis (i.e. "please pull"),
and is even harmful if the remote repository has many irrelevant
tags.

This proposed update disables the automated tag following unless
we are storing the a fetched branch head in a local tracking
branch.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/git-fetch.sh b/git-fetch.sh
index b4325d9..fcc24f8 100755
--- a/git-fetch.sh
+++ b/git-fetch.sh
@@ -368,20 +368,25 @@ fetch_main "$reflist"
 # automated tag following
 case "$no_tags$tags" in
 '')
-	taglist=$(IFS=" " &&
-	git-ls-remote $upload_pack --tags "$remote" |
-	sed -ne 's|^\([0-9a-f]*\)[ 	]\(refs/tags/.*\)^{}$|\1 \2|p' |
-	while read sha1 name
-	do
-		test -f "$GIT_DIR/$name" && continue
-	  	git-check-ref-format "$name" || {
-			echo >&2 "warning: tag ${name} ignored"
-			continue
-		}
-		git-cat-file -t "$sha1" >/dev/null 2>&1 || continue
-		echo >&2 "Auto-following $name"
-		echo ".${name}:${name}"
-	done)
+	case "$reflist" in
+	*:refs/*)
+		# effective only when we are following remote branch
+		# using local tracking branch.
+		taglist=$(IFS=" " &&
+		git-ls-remote $upload_pack --tags "$remote" |
+		sed -ne 's|^\([0-9a-f]*\)[ 	]\(refs/tags/.*\)^{}$|\1 \2|p' |
+		while read sha1 name
+		do
+			test -f "$GIT_DIR/$name" && continue
+			git-check-ref-format "$name" || {
+				echo >&2 "warning: tag ${name} ignored"
+				continue
+			}
+			git-cat-file -t "$sha1" >/dev/null 2>&1 || continue
+			echo >&2 "Auto-following $name"
+			echo ".${name}:${name}"
+		done)
+	esac
 	case "$taglist" in
 	'') ;;
 	?*)
-- 
cgit v0.10.2-6-g49f6


From d4a1cab541be0c276b38285c8b33050ea411eacf Mon Sep 17 00:00:00 2001
From: Carl Worth <cworth@cworth.org>
Date: Tue, 21 Feb 2006 15:04:51 -0800
Subject: Add new git-rm command with documentation

This adds a git-rm command which provides convenience similar to
git-add, (and a bit more since it takes care of the rm as well if
given -f).

Like git-add, git-rm expands the given path names through
git-ls-files. This means it only acts on files listed in the
index. And it does act recursively on directories by default, (no -r
needed as in the case of rm itself). When it recurses, it does not
remove empty directories that are left behind.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/.gitignore b/.gitignore
index d7e8d2a..94f66d5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -84,6 +84,7 @@ git-resolve
 git-rev-list
 git-rev-parse
 git-revert
+git-rm
 git-send-email
 git-send-pack
 git-sh-setup
diff --git a/Documentation/git-rm.txt b/Documentation/git-rm.txt
new file mode 100644
index 0000000..401bfb2
--- /dev/null
+++ b/Documentation/git-rm.txt
@@ -0,0 +1,89 @@
+git-rm(1)
+=========
+
+NAME
+----
+git-rm - Remove files from the working tree and from the index.
+
+SYNOPSIS
+--------
+'git-rm' [-f] [-n] [-v] [--] <file>...
+
+DESCRIPTION
+-----------
+A convenience wrapper for git-update-index --remove. For those coming
+from cvs, git-rm provides an operation similar to "cvs rm" or "cvs
+remove".
+
+
+OPTIONS
+-------
+<file>...::
+	Files to remove from the index and optionally, from the
+	working tree as well.
+
+-f::
+	Remove files from the working tree as well as from the index.
+
+-n::
+        Don't actually remove the file(s), just show if they exist in
+        the index.
+
+-v::
+        Be verbose.
+
+--::
+	This option can be used to separate command-line options from
+	the list of files, (useful when filenames might be mistaken
+	for command-line options).
+
+
+DISCUSSION
+----------
+
+The list of <file> given to the command is fed to `git-ls-files`
+command to list files that are registered in the index and
+are not ignored/excluded by `$GIT_DIR/info/exclude` file or
+`.gitignore` file in each directory.  This means two things:
+
+. You can put the name of a directory on the command line, and the
+  command will remove all files in it and its subdirectories (the
+  directories themselves are never removed from the working tree);
+
+. Giving the name of a file that is not in the index does not
+  remove that file.
+
+
+EXAMPLES
+--------
+git-rm Documentation/\\*.txt::
+
+	Removes all `\*.txt` files from the index that are under the
+	`Documentation` directory and any of its subdirectories. The
+	files are not removed from the working tree.
++
+Note that the asterisk `\*` is quoted from the shell in this
+example; this lets the command include the files from
+subdirectories of `Documentation/` directory.
+
+git-rm -f git-*.sh::
+
+	Remove all git-*.sh scripts that are in the index. The files
+	are removed from the index, and (because of the -f option),
+	from the working tree as well. Because this example lets the
+	shell expand the asterisk (i.e. you are listing the files
+	explicitly), it does not remove `subdir/git-foo.sh`.
+
+
+Author
+------
+Written by Linus Torvalds <torvalds@osdl.org>
+
+Documentation
+--------------
+Documentation by Junio C Hamano and the git-list <git@vger.kernel.org>.
+
+GIT
+---
+Part of the gitlink:git[7] suite
+
diff --git a/Makefile b/Makefile
index 0c04882..5d2ec1f 100644
--- a/Makefile
+++ b/Makefile
@@ -120,7 +120,7 @@ SCRIPT_SH = \
 	git-merge-one-file.sh git-parse-remote.sh \
 	git-prune.sh git-pull.sh git-push.sh git-rebase.sh \
 	git-repack.sh git-request-pull.sh git-reset.sh \
-	git-resolve.sh git-revert.sh git-sh-setup.sh \
+	git-resolve.sh git-revert.sh git-rm.sh git-sh-setup.sh \
 	git-tag.sh git-verify-tag.sh git-whatchanged.sh \
 	git-applymbox.sh git-applypatch.sh git-am.sh \
 	git-merge.sh git-merge-stupid.sh git-merge-octopus.sh \
diff --git a/git-rm.sh b/git-rm.sh
new file mode 100755
index 0000000..0a3f546
--- /dev/null
+++ b/git-rm.sh
@@ -0,0 +1,67 @@
+#!/bin/sh
+
+USAGE='[-f] [-n] [-v] [--] <file>...'
+SUBDIRECTORY_OK='Yes'
+. git-sh-setup
+
+index_remove_option=--force-remove
+remove_files=
+show_only=
+verbose=
+while : ; do
+  case "$1" in
+    -f)
+	remove_files=true
+	index_remote_option=--force
+	;;
+    -n)
+	show_only=true
+	;;
+    -v)
+	verbose=--verbose
+	;;
+    --)
+	shift; break
+	;;
+    -*)
+	usage
+	;;
+    *)
+	break
+	;;
+  esac
+  shift
+done
+
+# This is typo-proofing. If some paths match and some do not, we want
+# to do nothing.
+case "$#" in
+0)	;;
+*)
+	git-ls-files --error-unmatch -- "$@" >/dev/null || {
+		echo >&2 "Maybe you misspelled it?"
+		exit 1
+	}
+	;;
+esac
+
+files=$(
+    if test -f "$GIT_DIR/info/exclude" ; then
+	git-ls-files \
+	    --exclude-from="$GIT_DIR/info/exclude" \
+	    --exclude-per-directory=.gitignore -- "$@"
+    else
+	git-ls-files \
+	--exclude-per-directory=.gitignore -- "$@"
+    fi | sort | uniq
+)
+
+case "$show_only" in
+true)
+	echo $files
+	;;
+*)
+	[[ "$remove_files" = "true" ]] && rm -- $files
+	git-update-index $index_remove_option $verbose $files
+	;;
+esac
diff --git a/t/t3600-rm.sh b/t/t3600-rm.sh
new file mode 100755
index 0000000..8415732
--- /dev/null
+++ b/t/t3600-rm.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+#
+# Copyright (c) 2006 Carl D. Worth
+#
+
+test_description='Test of the various options to git-rm.'
+
+. ./test-lib.sh
+
+# Setup some files to be removed
+touch foo bar
+git-add foo bar
+# Need one to test --
+touch -- -q
+git update-index --add -- -q
+git-commit -m "add foo, bar, and -q"
+
+test_expect_success \
+    'Pre-check that foo is in index before git-rm foo' \
+    'git-ls-files --error-unmatch foo'
+
+test_expect_success \
+    'Test that git-rm foo succeeds' \
+    'git-rm foo'
+
+test_expect_failure \
+    'Post-check that foo is not in index after git-rm foo' \
+    'git-ls-files --error-unmatch foo'
+
+test_expect_success \
+    'Test that "git-rm -f bar" works' \
+    'git-rm -f bar'
+
+test_expect_failure \
+    'Post-check that bar no longer exists' \
+    '[ -f bar ]'
+
+test_expect_success \
+    'Test that "git-rm -- -q" works to delete a file named -q' \
+    'git-rm -- -q'
+
+test_done
-- 
cgit v0.10.2-6-g49f6


From 3844cdc8f19dcd848003586d6b98c9f2bd36a7d0 Mon Sep 17 00:00:00 2001
From: Carl Worth <cworth@cworth.org>
Date: Wed, 22 Feb 2006 16:37:27 -0800
Subject: git-rm: Fix to properly handle files with spaces, tabs, newlines,
 etc.

New tests are added to the git-rm test case to cover this as well.

Signed-off-by: Carl Worth <cworth@cworth.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/git-rm.sh b/git-rm.sh
index 0a3f546..fda4541 100755
--- a/git-rm.sh
+++ b/git-rm.sh
@@ -4,7 +4,6 @@ USAGE='[-f] [-n] [-v] [--] <file>...'
 SUBDIRECTORY_OK='Yes'
 . git-sh-setup
 
-index_remove_option=--force-remove
 remove_files=
 show_only=
 verbose=
@@ -12,7 +11,6 @@ while : ; do
   case "$1" in
     -f)
 	remove_files=true
-	index_remote_option=--force
 	;;
     -n)
 	show_only=true
@@ -45,23 +43,28 @@ case "$#" in
 	;;
 esac
 
-files=$(
-    if test -f "$GIT_DIR/info/exclude" ; then
-	git-ls-files \
-	    --exclude-from="$GIT_DIR/info/exclude" \
-	    --exclude-per-directory=.gitignore -- "$@"
-    else
-	git-ls-files \
+if test -f "$GIT_DIR/info/exclude"
+then
+	git-ls-files -z \
+	--exclude-from="$GIT_DIR/info/exclude" \
 	--exclude-per-directory=.gitignore -- "$@"
-    fi | sort | uniq
-)
-
-case "$show_only" in
-true)
-	echo $files
+else
+	git-ls-files -z \
+	--exclude-per-directory=.gitignore -- "$@"
+fi |
+case "$show_only,$remove_files" in
+true,*)
+	xargs -0 echo
+	;;
+*,true)
+	xargs -0 sh -c "
+		while [ \$# -gt 0 ]; do
+			file=\$1; shift
+			rm -- \"\$file\" && git-update-index --remove $verbose \"\$file\"
+		done
+	" inline
 	;;
 *)
-	[[ "$remove_files" = "true" ]] && rm -- $files
-	git-update-index $index_remove_option $verbose $files
+	git-update-index --force-remove $verbose -z --stdin
 	;;
 esac
diff --git a/t/t3600-rm.sh b/t/t3600-rm.sh
index 8415732..cabfadd 100755
--- a/t/t3600-rm.sh
+++ b/t/t3600-rm.sh
@@ -7,36 +7,54 @@ test_description='Test of the various options to git-rm.'
 
 . ./test-lib.sh
 
-# Setup some files to be removed
-touch foo bar
-git-add foo bar
-# Need one to test --
-touch -- -q
-git update-index --add -- -q
-git-commit -m "add foo, bar, and -q"
+# Setup some files to be removed, some with funny characters
+touch -- foo bar baz 'space embedded' 'tab	embedded' 'newline
+embedded' -q
+git-add -- foo bar baz 'space embedded' 'tab	embedded' 'newline
+embedded' -q
+git-commit -m "add files"
 
 test_expect_success \
-    'Pre-check that foo is in index before git-rm foo' \
-    'git-ls-files --error-unmatch foo'
+    'Pre-check that foo exists and is in index before git-rm foo' \
+    '[ -f foo ] && git-ls-files --error-unmatch foo'
 
 test_expect_success \
     'Test that git-rm foo succeeds' \
     'git-rm foo'
 
-test_expect_failure \
-    'Post-check that foo is not in index after git-rm foo' \
-    'git-ls-files --error-unmatch foo'
+test_expect_success \
+    'Post-check that foo exists but is not in index after git-rm foo' \
+    '[ -f foo ] && ! git-ls-files --error-unmatch foo'
+
+test_expect_success \
+    'Pre-check that bar exists and is in index before "git-rm -f bar"' \
+    '[ -f bar ] && git-ls-files --error-unmatch bar'
 
 test_expect_success \
-    'Test that "git-rm -f bar" works' \
+    'Test that "git-rm -f bar" succeeds' \
     'git-rm -f bar'
 
-test_expect_failure \
-    'Post-check that bar no longer exists' \
-    '[ -f bar ]'
+test_expect_success \
+    'Post-check that bar does not exist and is not in index after "git-rm -f bar"' \
+    '! [ -f bar ] && ! git-ls-files --error-unmatch bar'
 
 test_expect_success \
-    'Test that "git-rm -- -q" works to delete a file named -q' \
+    'Test that "git-rm -- -q" succeeds (remove a file that looks like an option)' \
     'git-rm -- -q'
 
+test_expect_success \
+    "Test that \"git-rm -f\" succeeds with embedded space, tab, or newline characters." \
+    "git-rm -f 'space embedded' 'tab	embedded' 'newline
+embedded'"
+
+chmod u-w .
+test_expect_failure \
+    'Test that "git-rm -f" fails if its rm fails' \
+    'git-rm -f baz'
+chmod u+w .
+
+test_expect_success \
+    'When the rm in "git-rm -f" fails, it should not remove the file from the index' \
+    'git-ls-files --error-unmatch baz'
+
 test_done
-- 
cgit v0.10.2-6-g49f6


From 744633cbf23d3ed505f96ffc212b30c96b79fb90 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Wed, 22 Feb 2006 19:02:39 -0800
Subject: checkout - eye candy.

This implements "eye candy" similar to the pack-object/unpack-object
to entertain users while a large tree is being checked out after
a clone or a pull.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/git-clone.sh b/git-clone.sh
index d184ceb..54c9e4e 100755
--- a/git-clone.sh
+++ b/git-clone.sh
@@ -253,7 +253,7 @@ Pull: $head_points_at:$origin" &&
 
 	case "$no_checkout" in
 	'')
-		git checkout
+		git-read-tree -m -u -v HEAD HEAD
 	esac
 fi
 
diff --git a/git-merge.sh b/git-merge.sh
index 2b4a603..c258ea7 100755
--- a/git-merge.sh
+++ b/git-merge.sh
@@ -130,7 +130,7 @@ case "$#,$common,$no_commit" in
 	echo "Updating from $head to $1."
 	git-update-index --refresh 2>/dev/null
 	new_head=$(git-rev-parse --verify "$1^0") &&
-	git-read-tree -u -m $head "$new_head" &&
+	git-read-tree -u -v -m $head "$new_head" &&
 	finish "$new_head" "Fast forward"
 	dropsave
 	exit 0
@@ -146,7 +146,7 @@ case "$#,$common,$no_commit" in
 
 	echo "Trying really trivial in-index merge..."
 	git-update-index --refresh 2>/dev/null
-	if git-read-tree --trivial -m -u $common $head "$1" &&
+	if git-read-tree --trivial -m -u -v $common $head "$1" &&
 	   result_tree=$(git-write-tree)
 	then
 	    echo "Wonderful."
diff --git a/read-tree.c b/read-tree.c
index 5580f15..4e0cdda 100644
--- a/read-tree.c
+++ b/read-tree.c
@@ -9,6 +9,8 @@
 
 #include "object.h"
 #include "tree.h"
+#include <sys/time.h>
+#include <signal.h>
 
 static int merge = 0;
 static int update = 0;
@@ -16,6 +18,8 @@ static int index_only = 0;
 static int nontrivial_merge = 0;
 static int trivial_merges_only = 0;
 static int aggressive = 0;
+static int verbose_update = 0;
+static volatile int progress_update = 0;
 
 static int head_idx = -1;
 static int merge_size = 0;
@@ -267,6 +271,12 @@ static void unlink_entry(char *name)
 	}
 }
 
+static void progress_interval(int signum)
+{
+	signal(SIGALRM, progress_interval);
+	progress_update = 1;
+}
+
 static void check_updates(struct cache_entry **src, int nr)
 {
 	static struct checkout state = {
@@ -276,8 +286,49 @@ static void check_updates(struct cache_entry **src, int nr)
 		.refresh_cache = 1,
 	};
 	unsigned short mask = htons(CE_UPDATE);
+	unsigned last_percent = 200, cnt = 0, total = 0;
+
+	if (update && verbose_update) {
+		struct itimerval v;
+
+		for (total = cnt = 0; cnt < nr; cnt++) {
+			struct cache_entry *ce = src[cnt];
+			if (!ce->ce_mode || ce->ce_flags & mask)
+				total++;
+		}
+
+		/* Don't bother doing this for very small updates */
+		if (total < 250)
+			total = 0;
+
+		if (total) {
+			v.it_interval.tv_sec = 1;
+			v.it_interval.tv_usec = 0;
+			v.it_value = v.it_interval;
+			signal(SIGALRM, progress_interval);
+			setitimer(ITIMER_REAL, &v, NULL);
+			fprintf(stderr, "Checking files out...\n");
+			progress_update = 1;
+		}
+		cnt = 0;
+	}
+
 	while (nr--) {
 		struct cache_entry *ce = *src++;
+
+		if (total) {
+			if (!ce->ce_mode || ce->ce_flags & mask) {
+				unsigned percent;
+				cnt++;
+				percent = (cnt * 100) / total;
+				if (percent != last_percent ||
+				    progress_update) {
+					fprintf(stderr, "%4u%% (%u/%u) done\r",
+						percent, cnt, total);
+					last_percent = percent;
+				}
+			}
+		}
 		if (!ce->ce_mode) {
 			if (update)
 				unlink_entry(ce->name);
@@ -289,6 +340,10 @@ static void check_updates(struct cache_entry **src, int nr)
 				checkout_entry(ce, &state);
 		}
 	}
+	if (total) {
+		fputc('\n', stderr);
+		signal(SIGALRM, SIG_IGN);
+	}
 }
 
 static int unpack_trees(merge_fn_t fn)
@@ -680,6 +735,11 @@ int main(int argc, char **argv)
 			continue;
 		}
 
+		if (!strcmp(arg, "-v")) {
+			verbose_update = 1;
+			continue;
+		}
+
 		/* "-i" means "index only", meaning that a merge will
 		 * not even look at the working tree.
 		 */
-- 
cgit v0.10.2-6-g49f6


From bd2afde8a38b97391b22afebf15c583fb0fffacb Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Wed, 22 Feb 2006 17:47:10 -0800
Subject: Give no terminating LF to error() function.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/commit.c b/commit.c
index 67e11d7..512b5d7 100644
--- a/commit.c
+++ b/commit.c
@@ -212,7 +212,8 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
 	if (memcmp(bufptr, "tree ", 5))
 		return error("bogus commit object %s", sha1_to_hex(item->object.sha1));
 	if (get_sha1_hex(bufptr + 5, parent) < 0)
-		return error("bad tree pointer in commit %s\n", sha1_to_hex(item->object.sha1));
+		return error("bad tree pointer in commit %s",
+			     sha1_to_hex(item->object.sha1));
 	item->tree = lookup_tree(parent);
 	if (item->tree)
 		n_refs++;
diff --git a/http-fetch.c b/http-fetch.c
index ce3df5f..8fd9de0 100644
--- a/http-fetch.c
+++ b/http-fetch.c
@@ -130,7 +130,7 @@ static void start_object_request(struct object_request *obj_req)
 
 	if (obj_req->local < 0) {
 		obj_req->state = ABORTED;
-		error("Couldn't create temporary file %s for %s: %s\n",
+		error("Couldn't create temporary file %s for %s: %s",
 		      obj_req->tmpfile, obj_req->filename, strerror(errno));
 		return;
 	}
@@ -830,9 +830,9 @@ static int fetch_object(struct alt_base *repo, unsigned char *sha1)
 				    obj_req->errorstr, obj_req->curl_result,
 				    obj_req->http_code, hex);
 	} else if (obj_req->zret != Z_STREAM_END) {
-		ret = error("File %s (%s) corrupt\n", hex, obj_req->url);
+		ret = error("File %s (%s) corrupt", hex, obj_req->url);
 	} else if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
-		ret = error("File %s has bad hash\n", hex);
+		ret = error("File %s has bad hash", hex);
 	} else if (obj_req->rename < 0) {
 		ret = error("unable to write sha1 filename %s",
 			    obj_req->filename);
@@ -854,7 +854,7 @@ int fetch(unsigned char *sha1)
 		fetch_alternates(alt->base);
 		altbase = altbase->next;
 	}
-	return error("Unable to find %s under %s\n", sha1_to_hex(sha1),
+	return error("Unable to find %s under %s", sha1_to_hex(sha1),
 		     alt->base);
 }
 
diff --git a/read-tree.c b/read-tree.c
index 5580f15..0d93872 100644
--- a/read-tree.c
+++ b/read-tree.c
@@ -564,7 +564,7 @@ static int twoway_merge(struct cache_entry **src)
 	struct cache_entry *oldtree = src[1], *newtree = src[2];
 
 	if (merge_size != 2)
-		return error("Cannot do a twoway merge of %d trees\n",
+		return error("Cannot do a twoway merge of %d trees",
 			     merge_size);
 
 	if (current) {
@@ -616,7 +616,7 @@ static int oneway_merge(struct cache_entry **src)
 	struct cache_entry *a = src[1];
 
 	if (merge_size != 1)
-		return error("Cannot do a oneway merge of %d trees\n",
+		return error("Cannot do a oneway merge of %d trees",
 			     merge_size);
 
 	if (!a)
diff --git a/receive-pack.c b/receive-pack.c
index eae31e3..2a3db16 100644
--- a/receive-pack.c
+++ b/receive-pack.c
@@ -92,7 +92,7 @@ static int run_update_hook(const char *refname,
 	case -ERR_RUN_COMMAND_WAITPID_WRONG_PID:
 		return error("waitpid is confused");
 	case -ERR_RUN_COMMAND_WAITPID_SIGNAL:
-		return error("%s died of signal\n", update_hook);
+		return error("%s died of signal", update_hook);
 	case -ERR_RUN_COMMAND_WAITPID_NOEXIT:
 		return error("%s died strangely", update_hook);
 	default:
@@ -158,7 +158,7 @@ static int update(struct command *cmd)
 	if (run_update_hook(name, old_hex, new_hex)) {
 		unlink(lock_name);
 		cmd->error_string = "hook declined";
-		return error("hook declined to update %s\n", name);
+		return error("hook declined to update %s", name);
 	}
 	else if (rename(lock_name, name) < 0) {
 		unlink(lock_name);
diff --git a/refs.c b/refs.c
index d01fc39..826ae7a 100644
--- a/refs.c
+++ b/refs.c
@@ -268,7 +268,7 @@ static int write_ref_file(const char *filename,
 	char term = '\n';
 	if (write(fd, hex, 40) < 40 ||
 	    write(fd, &term, 1) < 1) {
-		error("Couldn't write %s\n", filename);
+		error("Couldn't write %s", filename);
 		close(fd);
 		return -1;
 	}
diff --git a/sha1_file.c b/sha1_file.c
index f4b1089..aa09b46 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -564,7 +564,7 @@ static void prepare_packed_git_one(char *objdir, int local)
 	dir = opendir(path);
 	if (!dir) {
 		if (errno != ENOENT)
-			error("unable to open object pack directory: %s: %s\n",
+			error("unable to open object pack directory: %s: %s",
 			      path, strerror(errno));
 		return;
 	}
@@ -1513,7 +1513,8 @@ int write_sha1_from_fd(const unsigned char *sha1, int fd, char *buffer,
 
 	local = mkstemp(tmpfile);
 	if (local < 0)
-		return error("Couldn't open %s for %s\n", tmpfile, sha1_to_hex(sha1));
+		return error("Couldn't open %s for %s",
+			     tmpfile, sha1_to_hex(sha1));
 
 	memset(&stream, 0, sizeof(stream));
 
@@ -1561,7 +1562,7 @@ int write_sha1_from_fd(const unsigned char *sha1, int fd, char *buffer,
 	}
 	if (memcmp(sha1, real_sha1, 20)) {
 		unlink(tmpfile);
-		return error("File %s has bad hash\n", sha1_to_hex(sha1));
+		return error("File %s has bad hash", sha1_to_hex(sha1));
 	}
 
 	return move_temp_to_file(tmpfile, sha1_file_name(sha1));
-- 
cgit v0.10.2-6-g49f6


From 09a5d72d8e8308fa5505e8c5d4715c0b6a5eabcb Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Wed, 22 Feb 2006 19:45:48 -0800
Subject: diffcore-rename: plug memory leak.

Spotted by Nicolas Pitre.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/diffcore-rename.c b/diffcore-rename.c
index 39d9126..ffd126a 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -176,8 +176,10 @@ static int estimate_similarity(struct diff_filespec *src,
 	/* A delta that has a lot of literal additions would have
 	 * big delta_size no matter what else it does.
 	 */
-	if (base_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE)
+	if (base_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE) {
+		free(delta);
 		return 0;
+	}
 
 	/* Estimate the edit size by interpreting delta. */
 	if (count_delta(delta, delta_size, &src_copied, &literal_added)) {
-- 
cgit v0.10.2-6-g49f6


From b925410d10fce5e0d4182847f99e8c2df048bde1 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Wed, 22 Feb 2006 21:45:45 -0800
Subject: pack-objects: thin pack micro-optimization.

Since we sort objects by type, hash, preferredness and then
size, after we have a delta against preferred base, there is no
point trying a delta with non-preferred base.  This seems to
save expensive calls to diff-delta and it also seems to save the
output space as well.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/pack-objects.c b/pack-objects.c
index ceb107f..af3bdf5 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -447,7 +447,7 @@ static int add_object_entry(const unsigned char *sha1, const char *name, int exc
 	struct packed_git *p;
 	unsigned int found_offset = 0;
 	struct packed_git *found_pack = NULL;
-	int ix;
+	int ix, status = 0;
 
 	if (!exclude) {
 		for (p = packed_git; p; p = p->next) {
@@ -493,6 +493,7 @@ static int add_object_entry(const unsigned char *sha1, const char *name, int exc
 			die("internal error in object hashing.");
 		object_ix[-1 - ix] = idx + 1;
 	}
+	status = 1;
 
  already_added:
 	if (exclude)
@@ -503,7 +504,7 @@ static int add_object_entry(const unsigned char *sha1, const char *name, int exc
 			entry->in_pack_offset = found_offset;
 		}
 	}
-	return 1;
+	return status;
 }
 
 static void add_pbase_tree(struct tree_desc *tree)
@@ -521,7 +522,10 @@ static void add_pbase_tree(struct tree_desc *tree)
 			continue;
 		if (sha1_object_info(sha1, type, &size))
 			continue;
-		add_object_entry(sha1, name, 1);
+
+		if (!add_object_entry(sha1, name, 1))
+			continue;
+
 		if (!strcmp(type, "tree")) {
 			struct tree_desc sub;
 			void *elem;
@@ -543,8 +547,8 @@ static void add_preferred_base(unsigned char *sha1)
 	tree.buf = elem;
 	if (!tree.buf)
 		return;
-	add_object_entry(sha1, "", 1);
-	add_pbase_tree(&tree);
+	if (add_object_entry(sha1, "", 1))
+		add_pbase_tree(&tree);
 	free(elem);
 }
 
@@ -774,7 +778,7 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
 				 * already have a delta based on preferred
 				 * one is pointless.
 				 */
-				return 0;
+				return -1;
 		}
 		else if (!old_preferred)
 			max_size = cur_entry->delta_size-1;
-- 
cgit v0.10.2-6-g49f6


From 1d6b38cc76c348e2477506ca9759fc241e3d0d46 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Wed, 22 Feb 2006 22:10:24 -0800
Subject: pack-objects: use full pathname to help hashing with "thin" pack.

This uses the same hashing algorithm to the "preferred base
tree" objects and the incoming pathnames, to group the same
files from different revs together, while spreading files with
the same basename in different directories.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/pack-objects.c b/pack-objects.c
index af3bdf5..3a16b7e 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -439,9 +439,37 @@ static void rehash_objects(void)
 	}
 }
 
-static int add_object_entry(const unsigned char *sha1, const char *name, int exclude)
+struct name_path {
+	struct name_path *up;
+	const char *elem;
+	int len;
+};
+
+static unsigned name_hash(struct name_path *path, const char *name)
+{
+	struct name_path *p = path;
+	const char *n = name + strlen(name);
+	unsigned hash = 0;
+
+	if (n != name && n[-1] == '\n')
+		n--;
+	while (name <= --n) {
+		unsigned char c = *n;
+		hash = hash * 11 + c;
+	}
+	for (p = path; p; p = p->up) {
+		hash = hash * 11 + '/';
+		n = p->elem + p->len;
+		while (p->elem <= --n) {
+			unsigned char c = *n;
+			hash = hash * 11 + c;
+		}
+	}
+	return hash;
+}
+
+static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclude)
 {
-	unsigned int hash = 0;
 	unsigned int idx = nr_objects;
 	struct object_entry *entry;
 	struct packed_git *p;
@@ -467,13 +495,6 @@ static int add_object_entry(const unsigned char *sha1, const char *name, int exc
 	if ((entry = locate_object_entry(sha1)) != NULL)
 		goto already_added;
 
-	while (*name) {
-		unsigned char c = *name++;
-		if (isspace(c))
-			continue;
-		hash = hash * 11 + c;
-	}
-
 	if (idx >= nr_alloc) {
 		unsigned int needed = (idx + 1024) * 3 / 2;
 		objects = xrealloc(objects, needed * sizeof(*entry));
@@ -507,12 +528,12 @@ static int add_object_entry(const unsigned char *sha1, const char *name, int exc
 	return status;
 }
 
-static void add_pbase_tree(struct tree_desc *tree)
+static void add_pbase_tree(struct tree_desc *tree, struct name_path *up)
 {
 	while (tree->size) {
 		const unsigned char *sha1;
 		const char *name;
-		unsigned mode;
+		unsigned mode, hash;
 		unsigned long size;
 		char type[20];
 
@@ -523,16 +544,22 @@ static void add_pbase_tree(struct tree_desc *tree)
 		if (sha1_object_info(sha1, type, &size))
 			continue;
 
-		if (!add_object_entry(sha1, name, 1))
+		hash = name_hash(up, name);
+		if (!add_object_entry(sha1, hash, 1))
 			continue;
 
 		if (!strcmp(type, "tree")) {
 			struct tree_desc sub;
 			void *elem;
+			struct name_path me;
+
 			elem = read_sha1_file(sha1, type, &sub.size);
 			sub.buf = elem;
 			if (sub.buf) {
-				add_pbase_tree(&sub);
+				me.up = up;
+				me.elem = name;
+				me.len = strlen(name);
+				add_pbase_tree(&sub, &me);
 				free(elem);
 			}
 		}
@@ -543,12 +570,13 @@ static void add_preferred_base(unsigned char *sha1)
 {
 	struct tree_desc tree;
 	void *elem;
+
 	elem = read_object_with_reference(sha1, "tree", &tree.size, NULL);
 	tree.buf = elem;
 	if (!tree.buf)
 		return;
-	if (add_object_entry(sha1, "", 1))
-		add_pbase_tree(&tree);
+	if (add_object_entry(sha1, name_hash(NULL, ""), 1))
+		add_pbase_tree(&tree, NULL);
 	free(elem);
 }
 
@@ -1031,7 +1059,7 @@ int main(int argc, char **argv)
 		}
 		if (get_sha1_hex(line, sha1))
 			die("expected sha1, got garbage:\n %s", line);
-		add_object_entry(sha1, line+40, 0);
+		add_object_entry(sha1, name_hash(NULL, line+41), 0);
 	}
 	if (progress)
 		fprintf(stderr, "Done counting %d objects.\n", nr_objects);
-- 
cgit v0.10.2-6-g49f6


From edd3ebfe27bf0df113846a3d4616ea538f8c04be Mon Sep 17 00:00:00 2001
From: Alex Riesen <raa.lkml@gmail.com>
Date: Thu, 23 Feb 2006 12:25:20 +0100
Subject: fix t5600-clone-fail-cleanup.sh on windows

In windows you cannot remove current or opened directory,
an opened file, a running program, a loaded library, etc...

[jc: signoffs?  With a minor quoting fix.]

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/git-clone.sh b/git-clone.sh
index d184ceb..be471d8 100755
--- a/git-clone.sh
+++ b/git-clone.sh
@@ -118,7 +118,7 @@ dir="$2"
 [ -e "$dir" ] && echo "$dir already exists." && usage
 mkdir -p "$dir" &&
 D=$(cd "$dir" && pwd) &&
-trap 'err=$?; rm -r $D; exit $err' exit
+trap 'err=$?; cd ..; rm -r "$D"; exit $err' exit
 case "$bare" in
 yes) GIT_DIR="$D" ;;
 *) GIT_DIR="$D/.git" ;;
-- 
cgit v0.10.2-6-g49f6


From a92c73eccc81b5bfc9e62866505ff1ea72492173 Mon Sep 17 00:00:00 2001
From: Alex Riesen <raa.lkml@gmail.com>
Date: Thu, 23 Feb 2006 11:26:46 +0100
Subject: PATCH: simplify calls to git programs in git-fmt-merge-msg

It also makes it work on ActiveState Perl.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/git-fmt-merge-msg.perl b/git-fmt-merge-msg.perl
index c13af48..dae383f 100755
--- a/git-fmt-merge-msg.perl
+++ b/git-fmt-merge-msg.perl
@@ -28,28 +28,13 @@ sub andjoin {
 }
 
 sub repoconfig {
-	my $val;
-	eval {
-		my $pid = open(my $fh, '-|');
-		if (!$pid) {
-			exec('git-repo-config', '--get', 'merge.summary');
-		}
-		($val) = <$fh>;
-		close $fh;
-	};
+	my ($val) = qx{git-repo-config --get merge.summary};
 	return $val;
 }
 
 sub current_branch {
-	my $fh;
-	my $pid = open($fh, '-|');
-	die "$!" unless defined $pid;
-	if (!$pid) {
-	    exec('git-symbolic-ref', 'HEAD') or die "$!";
-	}
-	my ($bra) = <$fh>;
+	my ($bra) = qx{git-symbolic-ref HEAD};
 	chomp($bra);
-	close $fh or die "$!";
 	$bra =~ s|^refs/heads/||;
 	if ($bra ne 'master') {
 		$bra = " into $bra";
@@ -61,18 +46,12 @@ sub current_branch {
 
 sub shortlog {
 	my ($tip) = @_;
-	my ($fh, @result);
-	my $pid = open($fh, '-|');
-	die "$!" unless defined $pid;
-	if (!$pid) {
-	    exec('git-log', '--topo-order',
-		 '--pretty=oneline', $tip, '^HEAD') or die "$!";
-	}
-	while (<$fh>) {
+	my @result;
+	foreach ( qx{git-log --topo-order --pretty=oneline $tip ^HEAD} ) {
 		s/^[0-9a-f]{40}\s+//;
 		push @result, $_;
 	}
-	close $fh or die "$!";
+	die "git-log failed\n" if $?;
 	return @result;
 }
 
-- 
cgit v0.10.2-6-g49f6


From 6d28644d691fa3967d24d988d51d863f22bbcc63 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Thu, 23 Feb 2006 22:14:47 -0800
Subject: git-am: do not allow empty commits by mistake.

Running "git-am --resolved" without doing anything can create an empty
commit. Prevent it.

Thanks for Eric W. Biederman for spotting this.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/git-am.sh b/git-am.sh
index 85ecada..7cc4ae5 100755
--- a/git-am.sh
+++ b/git-am.sh
@@ -300,7 +300,7 @@ do
 	    } >"$dotest/final-commit"
 	    ;;
 	*)
-		case "$resolved,$interactive" in
+		case "$resolved$interactive" in
 		tt)
 			# This is used only for interactive view option.
 			git-diff-index -p --cached HEAD >"$dotest/patch"
@@ -364,6 +364,12 @@ do
 		# trust what the user has in the index file and the
 		# working tree.
 		resolved=
+		changed="$(git-diff-index --cached --name-only HEAD)"
+		if test '' = "$changed"
+		then
+			echo "No changes - did you forget update-index?"
+			stop_here $this
+		fi
 		apply_status=0
 		;;
 	esac
@@ -374,7 +380,7 @@ do
 		then
 		    # Applying the patch to an earlier tree and merging the
 		    # result may have produced the same tree as ours.
-		    changed="$(git-diff-index --cached --name-only -z HEAD)"
+		    changed="$(git-diff-index --cached --name-only HEAD)"
 		    if test '' = "$changed"
 		    then
 			    echo No changes -- Patch already applied.
-- 
cgit v0.10.2-6-g49f6


From 3fe5489a2550118e0eb3f90e348cb78afeeb61e0 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@gmail.com>
Date: Fri, 24 Feb 2006 00:59:42 +0530
Subject: gitview: Display the lines joining commit nodes clearly.

Since i wanted to limit the graph box size i was resetting
the window after an index of 5. This result in line joining
commit nodes to pass over nodes which are not related. The
changes fixes the same

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@gmail.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/contrib/gitview/gitview b/contrib/gitview/gitview
index 4b52eb7..b04df74 100755
--- a/contrib/gitview/gitview
+++ b/contrib/gitview/gitview
@@ -823,6 +823,7 @@ class GitView:
 		self.colours = {}
 		self.nodepos = {}
 		self.incomplete_line = {}
+		self.commits = []
 
 		index = 0
 		last_colour = 0
@@ -840,12 +841,7 @@ class GitView:
 
 			commit = Commit(commit_lines)
 			if (commit != None ):
-				(out_line, last_colour, last_nodepos) = self.draw_graph(commit,
-										index, out_line,
-										last_colour,
-										last_nodepos)
-				self.index[commit.commit_sha1] = index
-				index += 1
+				self.commits.append(commit)
 
 			# Skip the '\0
 			commit_lines = []
@@ -854,6 +850,14 @@ class GitView:
 
 		fp.close()
 
+		for commit in self.commits:
+			(out_line, last_colour, last_nodepos) = self.draw_graph(commit,
+										index, out_line,
+										last_colour,
+										last_nodepos)
+			self.index[commit.commit_sha1] = index
+			index += 1
+
 		self.treeview.set_model(self.model)
 		self.treeview.show()
 
@@ -869,13 +873,6 @@ class GitView:
 			last_nodepos = 0
 
 		# Add the incomplete lines of the last cell in this
-		for sha1 in self.incomplete_line.keys():
-			if ( sha1 != commit.commit_sha1):
-				for pos in self.incomplete_line[sha1]:
-					in_line.append((pos, pos, self.colours[sha1]))
-			else:
-				del self.incomplete_line[sha1]
-
 		try:
 			colour = self.colours[commit.commit_sha1]
 		except KeyError:
@@ -897,6 +894,14 @@ class GitView:
 			self.colours[commit.parent_sha1[0]] = colour
 			self.nodepos[commit.parent_sha1[0]] = node_pos
 
+		for sha1 in self.incomplete_line.keys():
+			if ( sha1 != commit.commit_sha1):
+				self.draw_incomplete_line(sha1, node_pos,
+						out_line, in_line, index)
+			else:
+				del self.incomplete_line[sha1]
+
+
 		in_line.append((node_pos, self.nodepos[commit.parent_sha1[0]],
 					self.colours[commit.parent_sha1[0]]))
 
@@ -936,6 +941,23 @@ class GitView:
 		except KeyError:
 			self.incomplete_line[sha1] = [self.nodepos[sha1]]
 
+	def draw_incomplete_line(self, sha1, node_pos, out_line, in_line, index):
+		for idx, pos in enumerate(self.incomplete_line[sha1]):
+			if(pos == node_pos):
+				out_line.append((pos,
+					pos+0.5, self.colours[sha1]))
+				self.incomplete_line[sha1][idx] = pos = pos+0.5
+			try:
+				next_commit = self.commits[index+1]
+				if (next_commit.commit_sha1 == sha1 and pos != int(pos)):
+				# join the line back to the node point 
+				# This need to be done only if we modified it
+					in_line.append((pos, pos-0.5, self.colours[sha1]))
+					continue;
+			except IndexError:
+				pass
+			in_line.append((pos, pos, self.colours[sha1]))
+
 
 	def _go_clicked_cb(self, widget, revid):
 		"""Callback for when the go button for a parent is clicked."""
-- 
cgit v0.10.2-6-g49f6


From b76f6b627802d0a3c8bbf66fba0c090dbe56d509 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Thu, 23 Feb 2006 23:04:52 -0800
Subject: pack-objects: allow "thin" packs to exceed depth limits

When creating a new pack to be used in .git/objects/pack/
directory, we carefully count the depth of deltified objects to
be reused, so that the generated pack does not to exceed the
specified depth limit for runtime efficiency.  However, when we
are generating a thin pack that does not contain base objects,
such a pack can only be used during network transfer that is
expanded on the other end upon reception, so being careful and
artificially cutting the delta chain does not buy us anything
except increased bandwidth requirement.  This patch disables the
delta chain depth limit check when reusing an existing delta.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/pack-objects.c b/pack-objects.c
index 3a16b7e..2320bcf 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -663,10 +663,23 @@ static void get_object_details(void)
 	prepare_pack_ix();
 	for (i = 0, entry = objects; i < nr_objects; i++, entry++)
 		check_object(entry);
-	for (i = 0, entry = objects; i < nr_objects; i++, entry++)
-		if (!entry->delta && entry->delta_child)
-			entry->delta_limit =
-				check_delta_limit(entry, 1);
+
+	if (nr_objects == nr_result) {
+		/*
+		 * Depth of objects that depend on the entry -- this
+		 * is subtracted from depth-max to break too deep
+		 * delta chain because of delta data reusing.
+		 * However, we loosen this restriction when we know we
+		 * are creating a thin pack -- it will have to be
+		 * expanded on the other end anyway, so do not
+		 * artificially cut the delta chain and let it go as
+		 * deep as it wants.
+		 */
+		for (i = 0, entry = objects; i < nr_objects; i++, entry++)
+			if (!entry->delta && entry->delta_child)
+				entry->delta_limit =
+					check_delta_limit(entry, 1);
+	}
 }
 
 typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *);
-- 
cgit v0.10.2-6-g49f6


From eb38cc689e84a8fd01c1856e889fe8d3b4f1bfb4 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Thu, 23 Feb 2006 23:44:15 -0800
Subject: rev-list --objects-edge: remove duplicated edge commit output.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/rev-list.c b/rev-list.c
index 373549e..b5de075 100644
--- a/rev-list.c
+++ b/rev-list.c
@@ -440,8 +440,10 @@ static void mark_edge_parents_uninteresting(struct commit *commit)
 		if (!(parent->object.flags & UNINTERESTING))
 			continue;
 		mark_tree_uninteresting(parent->tree);
-		if (edge_hint)
+		if (edge_hint && !(parent->object.flags & SHOWN)) {
+			parent->object.flags |= SHOWN;
 			printf("-%s\n", sha1_to_hex(parent->object.sha1));
+		}
 	}
 }
 
-- 
cgit v0.10.2-6-g49f6


From e646de0d14bac20ef6e156c1742b9e62fb0b9020 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Wed, 22 Feb 2006 22:10:24 -0800
Subject: rev-list --objects: use full pathname to help hashing.

This helps to group the same files from different revs together,
while spreading files with the same basename in different
directories, to help pack-object.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/rev-list.c b/rev-list.c
index b5de075..dda6fca 100644
--- a/rev-list.c
+++ b/rev-list.c
@@ -63,6 +63,36 @@ static int no_merges = 0;
 static const char **paths = NULL;
 static int remove_empty_trees = 0;
 
+struct name_path {
+	struct name_path *up;
+	int elem_len;
+	const char *elem;
+};
+
+static char *path_name(struct name_path *path, const char *name)
+{
+	struct name_path *p;
+	char *n, *m;
+	int nlen = strlen(name);
+	int len = nlen + 1;
+
+	for (p = path; p; p = p->up) {
+		if (p->elem_len)
+			len += p->elem_len + 1;
+	}
+	n = xmalloc(len);
+	m = n + len - (nlen + 1);
+	strcpy(m, name);
+	for (p = path; p; p = p->up) {
+		if (p->elem_len) {
+			m -= p->elem_len + 1;
+			memcpy(m, p->elem, p->elem_len);
+			m[p->elem_len] = '/';
+		}
+	}
+	return n;
+}
+
 static void show_commit(struct commit *commit)
 {
 	commit->object.flags |= SHOWN;
@@ -174,17 +204,23 @@ static int process_commit(struct commit * commit)
 	return CONTINUE;
 }
 
-static struct object_list **add_object(struct object *obj, struct object_list **p, const char *name)
+static struct object_list **add_object(struct object *obj,
+				       struct object_list **p,
+				       struct name_path *path,
+				       const char *name)
 {
 	struct object_list *entry = xmalloc(sizeof(*entry));
 	entry->item = obj;
 	entry->next = *p;
-	entry->name = name;
+	entry->name = path_name(path, name);
 	*p = entry;
 	return &entry->next;
 }
 
-static struct object_list **process_blob(struct blob *blob, struct object_list **p, const char *name)
+static struct object_list **process_blob(struct blob *blob,
+					 struct object_list **p,
+					 struct name_path *path,
+					 const char *name)
 {
 	struct object *obj = &blob->object;
 
@@ -193,13 +229,17 @@ static struct object_list **process_blob(struct blob *blob, struct object_list *
 	if (obj->flags & (UNINTERESTING | SEEN))
 		return p;
 	obj->flags |= SEEN;
-	return add_object(obj, p, name);
+	return add_object(obj, p, path, name);
 }
 
-static struct object_list **process_tree(struct tree *tree, struct object_list **p, const char *name)
+static struct object_list **process_tree(struct tree *tree,
+					 struct object_list **p,
+					 struct name_path *path,
+					 const char *name)
 {
 	struct object *obj = &tree->object;
 	struct tree_entry_list *entry;
+	struct name_path me;
 
 	if (!tree_objects)
 		return p;
@@ -208,15 +248,18 @@ static struct object_list **process_tree(struct tree *tree, struct object_list *
 	if (parse_tree(tree) < 0)
 		die("bad tree object %s", sha1_to_hex(obj->sha1));
 	obj->flags |= SEEN;
-	p = add_object(obj, p, name);
+	p = add_object(obj, p, path, name);
+	me.up = path;
+	me.elem = name;
+	me.elem_len = strlen(name);
 	entry = tree->entries;
 	tree->entries = NULL;
 	while (entry) {
 		struct tree_entry_list *next = entry->next;
 		if (entry->directory)
-			p = process_tree(entry->item.tree, p, entry->name);
+			p = process_tree(entry->item.tree, p, &me, entry->name);
 		else
-			p = process_blob(entry->item.blob, p, entry->name);
+			p = process_blob(entry->item.blob, p, &me, entry->name);
 		free(entry);
 		entry = next;
 	}
@@ -231,7 +274,7 @@ static void show_commit_list(struct commit_list *list)
 	while (list) {
 		struct commit *commit = pop_most_recent_commit(&list, SEEN);
 
-		p = process_tree(commit->tree, p, "");
+		p = process_tree(commit->tree, p, NULL, "");
 		if (process_commit(commit) == STOP)
 			break;
 	}
@@ -242,15 +285,15 @@ static void show_commit_list(struct commit_list *list)
 			continue;
 		if (obj->type == tag_type) {
 			obj->flags |= SEEN;
-			p = add_object(obj, p, name);
+			p = add_object(obj, p, NULL, name);
 			continue;
 		}
 		if (obj->type == tree_type) {
-			p = process_tree((struct tree *)obj, p, name);
+			p = process_tree((struct tree *)obj, p, NULL, name);
 			continue;
 		}
 		if (obj->type == blob_type) {
-			p = process_blob((struct blob *)obj, p, name);
+			p = process_blob((struct blob *)obj, p, NULL, name);
 			continue;
 		}
 		die("unknown pending object %s (%s)", sha1_to_hex(obj->sha1), name);
@@ -674,7 +717,7 @@ static struct commit_list *limit_list(struct commit_list *list)
 
 static void add_pending_object(struct object *obj, const char *name)
 {
-	add_object(obj, &pending_objects, name);
+	add_object(obj, &pending_objects, NULL, name);
 }
 
 static struct commit *get_commit_reference(const char *name, const unsigned char *sha1, unsigned int flags)
-- 
cgit v0.10.2-6-g49f6


From eeef7135fed9b8784627c4c96e125241c06c65e1 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Thu, 23 Feb 2006 23:27:49 -0800
Subject: pack-objects: hash basename and direname a bit differently.

...so that "Makefile"s from different revs are sorted together,
separate from "t/Makefile"s, but close enough.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/pack-objects.c b/pack-objects.c
index 2320bcf..095bcb8 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -445,18 +445,29 @@ struct name_path {
 	int len;
 };
 
+#define DIRBITS 12
+
 static unsigned name_hash(struct name_path *path, const char *name)
 {
 	struct name_path *p = path;
 	const char *n = name + strlen(name);
-	unsigned hash = 0;
+	unsigned hash = 0, name_hash = 0, name_done = 0;
 
 	if (n != name && n[-1] == '\n')
 		n--;
 	while (name <= --n) {
 		unsigned char c = *n;
+		if (c == '/' && !name_done) {
+			name_hash = hash;
+			name_done = 1;
+			hash = 0;
+		}
 		hash = hash * 11 + c;
 	}
+	if (!name_done) {
+		name_hash = hash;
+		hash = 0;
+	}
 	for (p = path; p; p = p->up) {
 		hash = hash * 11 + '/';
 		n = p->elem + p->len;
@@ -465,6 +476,26 @@ static unsigned name_hash(struct name_path *path, const char *name)
 			hash = hash * 11 + c;
 		}
 	}
+	/*
+	 * Make sure "Makefile" and "t/Makefile" are hashed separately
+	 * but close enough.
+	 */
+	hash = (name_hash<<DIRBITS) | (hash & ((1U<<DIRBITS )-1));
+
+	if (0) { /* debug */
+		n = name + strlen(name);
+		if (n != name && n[-1] == '\n')
+			n--;
+		while (name <= --n)
+			fputc(*n, stderr);
+		for (p = path; p; p = p->up) {
+			fputc('/', stderr);
+			n = p->elem + p->len;
+			while (p->elem <= --n)
+				fputc(*n, stderr);
+		}
+		fprintf(stderr, "\t%08x\n", hash);
+	}
 	return hash;
 }
 
-- 
cgit v0.10.2-6-g49f6


From 8b42f5ae545d494463e72430fd81a0c0c558c881 Mon Sep 17 00:00:00 2001
From: Aneesh Kumar <aneesh.kumar@gmail.com>
Date: Fri, 24 Feb 2006 14:02:32 +0530
Subject: gitview: Fix DeprecationWarning

DeprecationWarning: integer argument expected, got float

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@gmail.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/contrib/gitview/gitview b/contrib/gitview/gitview
index b04df74..4a6b448 100755
--- a/contrib/gitview/gitview
+++ b/contrib/gitview/gitview
@@ -154,7 +154,7 @@ class CellRendererGraph(gtk.GenericCellRenderer):
 
 		cols = self.node[0]
 		for start, end, colour in self.in_lines + self.out_lines:
-			cols = max(cols, start, end)
+			cols = int(max(cols, start, end))
 
 		(column, colour, names) = self.node
 		names_len = 0
-- 
cgit v0.10.2-6-g49f6


From 20d23f554d6cd40ffa0d41ccc9416bca867667e0 Mon Sep 17 00:00:00 2001
From: Aneesh Kumar <aneesh.kumar@gmail.com>
Date: Fri, 24 Feb 2006 14:08:35 +0530
Subject: gitview: Bump the rev

Make the 0.7 release

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@gmail.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/contrib/gitview/gitview b/contrib/gitview/gitview
index 4a6b448..02e2445 100755
--- a/contrib/gitview/gitview
+++ b/contrib/gitview/gitview
@@ -422,7 +422,7 @@ class DiffWindow:
 class GitView:
 	""" This is the main class
 	"""
-	version = "0.6"
+	version = "0.7"
 
 	def __init__(self, with_diff=0):
 		self.with_diff = with_diff
-- 
cgit v0.10.2-6-g49f6


From 43f72af1bc754f164071140a073d35dad21d2e4e Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Fri, 24 Feb 2006 16:16:10 -0800
Subject: Build and install git-mailinfo.

The merge 712b1dd389ad5bcdbaab0279641f0970702fc1f1 was done
incorrectly, and lost this program from Makefile.

Big thanks go to Tony Luck for noticing it, and Linus for
diagnosing it.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/Makefile b/Makefile
index e79aa96..6c59cee 100644
--- a/Makefile
+++ b/Makefile
@@ -153,8 +153,8 @@ PROGRAMS = \
 	git-convert-objects$X git-diff-files$X \
 	git-diff-index$X git-diff-stages$X \
 	git-diff-tree$X git-fetch-pack$X git-fsck-objects$X \
-	git-hash-object$X git-index-pack$X git-init-db$X \
-	git-local-fetch$X git-ls-files$X git-ls-tree$X git-merge-base$X \
+	git-hash-object$X git-index-pack$X git-init-db$X git-local-fetch$X \
+	git-ls-files$X git-ls-tree$X git-mailinfo$X git-merge-base$X \
 	git-merge-index$X git-mktag$X git-mktree$X git-pack-objects$X git-patch-id$X \
 	git-peek-remote$X git-prune-packed$X git-read-tree$X \
 	git-receive-pack$X git-rev-list$X git-rev-parse$X \
-- 
cgit v0.10.2-6-g49f6


From 1e3584053d56157549c01114f9550d1db7014a3e Mon Sep 17 00:00:00 2001
From: Shawn Pearce <spearce@spearce.org>
Date: Fri, 24 Feb 2006 17:02:34 -0500
Subject: git ls files recursively show ignored files

Make git-ls-files --others --ignored recurse into non-excluded
subdirectories.

Typically when asking git-ls-files to display all files which are
ignored by one or more exclude patterns one would want it to recurse
into subdirectories which are not themselves excluded to see if
there are any excluded files contained within those subdirectories.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/ls-files.c b/ls-files.c
index 90b289f..df25c8c 100644
--- a/ls-files.c
+++ b/ls-files.c
@@ -279,8 +279,11 @@ static void read_directory(const char *path, const char *base, int baselen)
 				continue;
 			len = strlen(de->d_name);
 			memcpy(fullname + baselen, de->d_name, len+1);
-			if (excluded(fullname) != show_ignored)
-				continue;
+			if (excluded(fullname) != show_ignored) {
+				if (!show_ignored || DTYPE(de) != DT_DIR) {
+					continue;
+				}
+			}
 
 			switch (DTYPE(de)) {
 			struct stat st;
-- 
cgit v0.10.2-6-g49f6


From 6ee9240f63f1756cf23a63aa188cfcdf255a5d55 Mon Sep 17 00:00:00 2001
From: Shawn Pearce <spearce@spearce.org>
Date: Fri, 24 Feb 2006 17:51:15 -0500
Subject: Add missing programs to ignore list

Added recently added programs to the default exclude list.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/.gitignore b/.gitignore
index 94f66d5..5be239a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@ GIT-VERSION-FILE
 git
 git-add
 git-am
+git-annotate
 git-apply
 git-applymbox
 git-applypatch
@@ -22,6 +23,7 @@ git-convert-objects
 git-count-objects
 git-cvsexportcommit
 git-cvsimport
+git-cvsserver
 git-daemon
 git-diff
 git-diff-files
@@ -53,6 +55,7 @@ git-mailsplit
 git-merge
 git-merge-base
 git-merge-index
+git-merge-tree
 git-merge-octopus
 git-merge-one-file
 git-merge-ours
@@ -60,6 +63,7 @@ git-merge-recursive
 git-merge-resolve
 git-merge-stupid
 git-mktag
+git-mktree
 git-name-rev
 git-mv
 git-pack-redundant
-- 
cgit v0.10.2-6-g49f6


From 9e4f522da7da8bf5f9018927c82b12e6b02b9058 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@gmail.com>
Date: Fri, 24 Feb 2006 21:49:54 +0530
Subject: gitview: Code cleanup

Rearrange the code little bit so that it is easier to read

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@gmail.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/contrib/gitview/gitview b/contrib/gitview/gitview
index 02e2445..2cde71e 100755
--- a/contrib/gitview/gitview
+++ b/contrib/gitview/gitview
@@ -870,21 +870,22 @@ class GitView:
 
 		# Reset nodepostion
 		if (last_nodepos > 5):
-			last_nodepos = 0
+			last_nodepos = -1 
 
 		# Add the incomplete lines of the last cell in this
 		try:
 			colour = self.colours[commit.commit_sha1]
 		except KeyError:
-			last_colour +=1
-			self.colours[commit.commit_sha1] = last_colour
-			colour =  last_colour
+			self.colours[commit.commit_sha1] = last_colour+1
+			last_colour = self.colours[commit.commit_sha1] 
+			colour =   self.colours[commit.commit_sha1] 
+
 		try:
 			node_pos = self.nodepos[commit.commit_sha1]
 		except KeyError:
-			last_nodepos +=1
-			self.nodepos[commit.commit_sha1] = last_nodepos
-			node_pos = last_nodepos
+			self.nodepos[commit.commit_sha1] = last_nodepos+1
+			last_nodepos = self.nodepos[commit.commit_sha1]
+			node_pos =  self.nodepos[commit.commit_sha1]
 
 		#The first parent always continue on the same line
 		try:
@@ -895,32 +896,25 @@ class GitView:
 			self.nodepos[commit.parent_sha1[0]] = node_pos
 
 		for sha1 in self.incomplete_line.keys():
-			if ( sha1 != commit.commit_sha1):
+			if (sha1 != commit.commit_sha1):
 				self.draw_incomplete_line(sha1, node_pos,
 						out_line, in_line, index)
 			else:
 				del self.incomplete_line[sha1]
 
 
-		in_line.append((node_pos, self.nodepos[commit.parent_sha1[0]],
-					self.colours[commit.parent_sha1[0]]))
-
-		self.add_incomplete_line(commit.parent_sha1[0], index+1)
-
-		if (len(commit.parent_sha1) > 1):
-			for parent_id in commit.parent_sha1[1:]:
-				try:
-					tmp_node_pos = self.nodepos[parent_id]
-				except KeyError:
-					last_colour += 1;
-					self.colours[parent_id] = last_colour
-					last_nodepos +=1
-					self.nodepos[parent_id] = last_nodepos
-
-				in_line.append((node_pos, self.nodepos[parent_id],
-							self.colours[parent_id]))
-				self.add_incomplete_line(parent_id, index+1)
+		for parent_id in commit.parent_sha1:
+			try:
+				tmp_node_pos = self.nodepos[parent_id]
+			except KeyError:
+				self.colours[parent_id] = last_colour+1
+				last_colour = self.colours[parent_id]
+				self.nodepos[parent_id] = last_nodepos+1
+				last_nodepos = self.nodepos[parent_id] 
 
+			in_line.append((node_pos, self.nodepos[parent_id],
+						self.colours[parent_id]))
+			self.add_incomplete_line(parent_id)
 
 		try:
 			branch_tag = self.bt_sha1[commit.commit_sha1]
@@ -935,7 +929,7 @@ class GitView:
 
 		return (in_line, last_colour, last_nodepos)
 
-	def add_incomplete_line(self, sha1, index):
+	def add_incomplete_line(self, sha1):
 		try:
 			self.incomplete_line[sha1].append(self.nodepos[sha1])
 		except KeyError:
-- 
cgit v0.10.2-6-g49f6


From 1509bd9e69b916ca7f04a89fc03662e03e2ba312 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@gmail.com>
Date: Fri, 24 Feb 2006 21:57:51 +0530
Subject: gitview: Fix the graph display .

This fix all the known issue with the graph display
The bug need to be explained graphically

                                 |
                                 a
This line need not be there ---->| \
                                 b  |
                                 | /
                                 c

c is parent of a and all a,b and c are placed on the same line and b is child of c
With my last checkin I added  a seperate line to indicate that a is
connected to c. But then we had the line connecting a and b which should
not be ther. This changes fixes the same bug

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@gmail.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/contrib/gitview/gitview b/contrib/gitview/gitview
index 2cde71e..4e3847d 100755
--- a/contrib/gitview/gitview
+++ b/contrib/gitview/gitview
@@ -938,8 +938,10 @@ class GitView:
 	def draw_incomplete_line(self, sha1, node_pos, out_line, in_line, index):
 		for idx, pos in enumerate(self.incomplete_line[sha1]):
 			if(pos == node_pos):
-				out_line.append((pos,
-					pos+0.5, self.colours[sha1]))
+				#remove the straight line and add a slash
+				if ((pos, pos, self.colours[sha1]) in out_line):
+					out_line.remove((pos, pos, self.colours[sha1]))
+				out_line.append((pos, pos+0.5, self.colours[sha1]))
 				self.incomplete_line[sha1][idx] = pos = pos+0.5
 			try:
 				next_commit = self.commits[index+1]
-- 
cgit v0.10.2-6-g49f6


From 8fcf1ad9c68e15d881194c8544e7c11d33529c2b Mon Sep 17 00:00:00 2001
From: "Luck, Tony" <tony.luck@intel.com>
Date: Thu, 23 Feb 2006 14:42:39 -0800
Subject: fix warning from pack-objects.c

When compiling on ia64 I get this warning (from gcc 3.4.3):

gcc -o pack-objects.o -c -g -O2 -Wall -DSHA1_HEADER='<openssl/sha.h>'  pack-objects.c
pack-objects.c: In function `pack_revindex_ix':
pack-objects.c:94: warning: cast from pointer to integer of different size

A double cast (first to long, then to int) shuts gcc up, but is there
a better way?

[jc: Andreas Ericsson suggests to use ulong instead. ]

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/pack-objects.c b/pack-objects.c
index be7a200..0287449 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -99,7 +99,7 @@ static int reused_delta = 0;
 
 static int pack_revindex_ix(struct packed_git *p)
 {
-	unsigned int ui = (unsigned int) p;
+	unsigned long ui = (unsigned long)(long)p;
 	int i;
 
 	ui = ui ^ (ui >> 16); /* defeat structure alignment */
-- 
cgit v0.10.2-6-g49f6


From 9d7f73d43fa49d0d2f5a8cfcce9d659e8ad2d265 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lukas=20Sandstr=C3=B6m?= <lukass@etek.chalmers.se>
Date: Sat, 25 Feb 2006 12:20:13 +0100
Subject: git-fetch: print the new and old ref when fast-forwarding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Lukas Sandström <lukass@etek.chalmers.se>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/git-fetch.sh b/git-fetch.sh
index de4f011..0346d4a 100755
--- a/git-fetch.sh
+++ b/git-fetch.sh
@@ -164,6 +164,7 @@ fast_forward_local () {
 		;;
 	    *,$local)
 		echo >&2 "* $1: fast forward to $3"
+		echo >&2 "  from $local to $2"
 		git-update-ref "$1" "$2" "$local"
 		;;
 	    *)
-- 
cgit v0.10.2-6-g49f6


From 87475f4dfce96b040fffbaefda9a4daa789786b2 Mon Sep 17 00:00:00 2001
From: Ryan Anderson <ryan@michonline.com>
Date: Sat, 25 Feb 2006 20:48:33 -0500
Subject: annotate: Handle dirty state and arbitrary revisions.

Also, use Getopt::Long and only process each rev once.

(Thanks to Morten Welinder for spotting the performance problems.)

Signed-off-by: Ryan Anderson <ryan@michonline.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/git-annotate.perl b/git-annotate.perl
index 3800c46..91da6d5 100755
--- a/git-annotate.perl
+++ b/git-annotate.perl
@@ -8,44 +8,62 @@
 
 use warnings;
 use strict;
-use Getopt::Std;
+use Getopt::Long;
 use POSIX qw(strftime gmtime);
 
 sub usage() {
-	print STDERR 'Usage: ${\basename $0} [-s] [-S revs-file] file
-
-	-l		show long rev
-	-r		follow renames
-	-S commit	use revs from revs-file instead of calling git-rev-list
+	print STDERR 'Usage: ${\basename $0} [-s] [-S revs-file] file [ revision ]
+	-l, --long
+			Show long rev (Defaults off)
+	-r, --rename
+			Follow renames (Defaults on).
+	-S, --rev-file revs-file
+			use revs from revs-file instead of calling git-rev-list
+	-h, --help
+			This message.
 ';
 
 	exit(1);
 }
 
-our ($opt_h, $opt_l, $opt_r, $opt_S);
-getopts("hlrS:") or usage();
-$opt_h && usage();
+our ($help, $longrev, $rename, $starting_rev, $rev_file) = (0, 0, 1);
+
+my $rc = GetOptions(	"long|l" => \$longrev,
+			"help|h" => \$help,
+			"rename|r" => \$rename,
+			"rev-file|S" => \$rev_file);
+if (!$rc or $help) {
+	usage();
+}
 
 my $filename = shift @ARGV;
+if (@ARGV) {
+	$starting_rev = shift @ARGV;
+}
 
 my @stack = (
 	{
-		'rev' => "HEAD",
+		'rev' => defined $starting_rev ? $starting_rev : "HEAD",
 		'filename' => $filename,
 	},
 );
 
-our (@lineoffsets, @pendinglineoffsets);
 our @filelines = ();
-open(F,"<",$filename)
-	or die "Failed to open filename: $!";
 
-while(<F>) {
-	chomp;
-	push @filelines, $_;
+if (defined $starting_rev) {
+	@filelines = git_cat_file($starting_rev, $filename);
+} else {
+	open(F,"<",$filename)
+		or die "Failed to open filename: $!";
+
+	while(<F>) {
+		chomp;
+		push @filelines, $_;
+	}
+	close(F);
+
 }
-close(F);
-our $leftover_lines = @filelines;
+
 our %revs;
 our @revqueue;
 our $head;
@@ -66,7 +84,7 @@ while (my $bound = pop @stack) {
 			next;
 		}
 
-		if (!$opt_r) {
+		if (!$rename) {
 			next;
 		}
 
@@ -78,8 +96,18 @@ while (my $bound = pop @stack) {
 	}
 }
 push @revqueue, $head;
-init_claim($head);
-$revs{$head}{'lineoffsets'} = {};
+init_claim( defined $starting_rev ? $starting_rev : 'dirty');
+unless (defined $starting_rev) {
+	open(DIFF,"-|","git","diff","-R", "HEAD", "--",$filename)
+		or die "Failed to call git diff to check for dirty state: $!";
+
+	_git_diff_parse(*DIFF, $head, "dirty", (
+				'author' => gitvar_name("GIT_AUTHOR_IDENT"),
+				'author_date' => sprintf("%s +0000",time()),
+				)
+			);
+	close(DIFF);
+}
 handle_rev();
 
 
@@ -88,7 +116,7 @@ foreach my $l (@filelines) {
 	my ($output, $rev, $committer, $date);
 	if (ref $l eq 'ARRAY') {
 		($output, $rev, $committer, $date) = @$l;
-		if (!$opt_l && length($rev) > 8) {
+		if (!$longrev && length($rev) > 8) {
 			$rev = substr($rev,0,8);
 		}
 	} else {
@@ -102,7 +130,6 @@ foreach my $l (@filelines) {
 
 sub init_claim {
 	my ($rev) = @_;
-	my %revinfo = git_commit_info($rev);
 	for (my $i = 0; $i < @filelines; $i++) {
 		$filelines[$i] = [ $filelines[$i], '', '', '', 1];
 			# line,
@@ -117,7 +144,9 @@ sub init_claim {
 
 sub handle_rev {
 	my $i = 0;
+	my %seen;
 	while (my $rev = shift @revqueue) {
+		next if $seen{$rev}++;
 
 		my %revinfo = git_commit_info($rev);
 
@@ -143,8 +172,8 @@ sub handle_rev {
 sub git_rev_list {
 	my ($rev, $file) = @_;
 
-	if ($opt_S) {
-		open(P, '<' . $opt_S);
+	if ($rev_file) {
+		open(P, '<' . $rev_file);
 	} else {
 		open(P,"-|","git-rev-list","--parents","--remove-empty",$rev,"--",$file)
 			or die "Failed to exec git-rev-list: $!";
@@ -216,24 +245,31 @@ sub git_find_parent {
 sub git_diff_parse {
 	my ($parent, $rev, %revinfo) = @_;
 
-	my ($ri, $pi) = (0,0);
 	open(DIFF,"-|","git-diff-tree","-M","-p",$rev,$parent,"--",
 			$revs{$rev}{'filename'}, $revs{$parent}{'filename'})
 		or die "Failed to call git-diff for annotation: $!";
 
+	_git_diff_parse(*DIFF, $parent, $rev, %revinfo);
+
+	close(DIFF);
+}
+
+sub _git_diff_parse {
+	my ($diff, $parent, $rev, %revinfo) = @_;
+
+	my ($ri, $pi) = (0,0);
 	my $slines = $revs{$rev}{'lines'};
 	my @plines;
 
 	my $gotheader = 0;
-	my ($remstart, $remlength, $addstart, $addlength);
-	my ($hunk_start, $hunk_index, $hunk_adds);
+	my ($remstart);
+	my ($hunk_start, $hunk_index);
 	while(<DIFF>) {
 		chomp;
 		if (m/^@@ -(\d+),(\d+) \+(\d+),(\d+)/) {
-			($remstart, $remlength, $addstart, $addlength) = ($1, $2, $3, $4);
+			$remstart = $1;
 			# Adjust for 0-based arrays
 			$remstart--;
-			$addstart--;
 			# Reinit hunk tracking.
 			$hunk_start = $remstart;
 			$hunk_index = 0;
@@ -279,7 +315,6 @@ sub git_diff_parse {
 		}
 		$hunk_index++;
 	}
-	close(DIFF);
 	for (my $i = $ri; $i < @{$slines} ; $i++) {
 		push @plines, $slines->[$ri++];
 	}
@@ -295,13 +330,13 @@ sub get_line {
 }
 
 sub git_cat_file {
-	my ($parent, $filename) = @_;
-	return () unless defined $parent && defined $filename;
-	my $blobline = `git-ls-tree $parent $filename`;
-	my ($mode, $type, $blob, $tfilename) = split(/\s+/, $blobline, 4);
+	my ($rev, $filename) = @_;
+	return () unless defined $rev && defined $filename;
 
-	open(C,"-|","git-cat-file", "blob", $blob)
-		or die "Failed to git-cat-file blob $blob (rev $parent, file $filename): " . $!;
+	my $blob = git_ls_tree($rev, $filename);
+
+	open(C,"-|","git","cat-file", "blob", $blob)
+		or die "Failed to git-cat-file blob $blob (rev $rev, file $filename): " . $!;
 
 	my @lines;
 	while(<C>) {
@@ -313,6 +348,25 @@ sub git_cat_file {
 	return @lines;
 }
 
+sub git_ls_tree {
+	my ($rev, $filename) = @_;
+
+	open(T,"-|","git","ls-tree",$rev,$filename)
+		or die "Failed to call git ls-tree: $!";
+
+	my ($mode, $type, $blob, $tfilename);
+	while(<T>) {
+		($mode, $type, $blob, $tfilename) = split(/\s+/, $_, 4);
+		last if ($tfilename eq $filename);
+	}
+	close(T);
+
+	return $blob if $filename eq $filename;
+	die "git-ls-tree failed to find blob for $filename";
+
+}
+
+
 
 sub claim_line {
 	my ($floffset, $rev, $lines, %revinfo) = @_;
@@ -354,3 +408,25 @@ sub format_date {
 	return strftime("%Y-%m-%d %H:%M:%S " . $timezone, gmtime($timestamp));
 }
 
+# Copied from git-send-email.perl - We need a Git.pm module..
+sub gitvar {
+    my ($var) = @_;
+    my $fh;
+    my $pid = open($fh, '-|');
+    die "$!" unless defined $pid;
+    if (!$pid) {
+	exec('git-var', $var) or die "$!";
+    }
+    my ($val) = <$fh>;
+    close $fh or die "$!";
+    chomp($val);
+    return $val;
+}
+
+sub gitvar_name {
+    my ($name) = @_;
+    my $val = gitvar($name);
+    my @field = split(/\s+/, $val);
+    return join(' ', @field[0...(@field-4)]);
+}
+
-- 
cgit v0.10.2-6-g49f6


From 6b3e21d6031e1e8df8b01cba5ab7374c4b721257 Mon Sep 17 00:00:00 2001
From: Ryan Anderson <ryan@michonline.com>
Date: Sat, 25 Feb 2006 22:02:05 -0500
Subject: annotate: Convert all -| calls to use a helper open_pipe().

When we settle on a solution for ActiveState's forking issues, all
compatibility checks can be handled inside this one function.

Also, fixed an abuse of global variables in the process of cleaning this up.

Signed-off-by: Ryan Anderson <ryan@michonline.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/git-annotate.perl b/git-annotate.perl
index 91da6d5..ee8ff15 100755
--- a/git-annotate.perl
+++ b/git-annotate.perl
@@ -98,15 +98,15 @@ while (my $bound = pop @stack) {
 push @revqueue, $head;
 init_claim( defined $starting_rev ? $starting_rev : 'dirty');
 unless (defined $starting_rev) {
-	open(DIFF,"-|","git","diff","-R", "HEAD", "--",$filename)
+	my $diff = open_pipe("git","diff","-R", "HEAD", "--",$filename)
 		or die "Failed to call git diff to check for dirty state: $!";
 
-	_git_diff_parse(*DIFF, $head, "dirty", (
+	_git_diff_parse($diff, $head, "dirty", (
 				'author' => gitvar_name("GIT_AUTHOR_IDENT"),
 				'author_date' => sprintf("%s +0000",time()),
 				)
 			);
-	close(DIFF);
+	close($diff);
 }
 handle_rev();
 
@@ -172,20 +172,21 @@ sub handle_rev {
 sub git_rev_list {
 	my ($rev, $file) = @_;
 
+	my $revlist;
 	if ($rev_file) {
-		open(P, '<' . $rev_file);
+		open($revlist, '<' . $rev_file);
 	} else {
-		open(P,"-|","git-rev-list","--parents","--remove-empty",$rev,"--",$file)
+		$revlist = open_pipe("git-rev-list","--parents","--remove-empty",$rev,"--",$file)
 			or die "Failed to exec git-rev-list: $!";
 	}
 
 	my @revs;
-	while(my $line = <P>) {
+	while(my $line = <$revlist>) {
 		chomp $line;
 		my ($rev, @parents) = split /\s+/, $line;
 		push @revs, [ $rev, @parents ];
 	}
-	close(P);
+	close($revlist);
 
 	printf("0 revs found for rev %s (%s)\n", $rev, $file) if (@revs == 0);
 	return @revs;
@@ -194,22 +195,22 @@ sub git_rev_list {
 sub find_parent_renames {
 	my ($rev, $file) = @_;
 
-	open(P,"-|","git-diff-tree", "-M50", "-r","--name-status", "-z","$rev")
+	my $patch = open_pipe("git-diff-tree", "-M50", "-r","--name-status", "-z","$rev")
 		or die "Failed to exec git-diff: $!";
 
 	local $/ = "\0";
 	my %bound;
-	my $junk = <P>;
-	while (my $change = <P>) {
+	my $junk = <$patch>;
+	while (my $change = <$patch>) {
 		chomp $change;
-		my $filename = <P>;
+		my $filename = <$patch>;
 		chomp $filename;
 
 		if ($change =~ m/^[AMD]$/ ) {
 			next;
 		} elsif ($change =~ m/^R/ ) {
 			my $oldfilename = $filename;
-			$filename = <P>;
+			$filename = <$patch>;
 			chomp $filename;
 			if ( $file eq $filename ) {
 				my $parent = git_find_parent($rev, $oldfilename);
@@ -218,7 +219,7 @@ sub find_parent_renames {
 			}
 		}
 	}
-	close(P);
+	close($patch);
 
 	return \%bound;
 }
@@ -227,14 +228,14 @@ sub find_parent_renames {
 sub git_find_parent {
 	my ($rev, $filename) = @_;
 
-	open(REVPARENT,"-|","git-rev-list","--remove-empty", "--parents","--max-count=1","$rev","--",$filename)
+	my $revparent = open_pipe("git-rev-list","--remove-empty", "--parents","--max-count=1","$rev","--",$filename)
 		or die "Failed to open git-rev-list to find a single parent: $!";
 
-	my $parentline = <REVPARENT>;
+	my $parentline = <$revparent>;
 	chomp $parentline;
 	my ($revfound,$parent) = split m/\s+/, $parentline;
 
-	close(REVPARENT);
+	close($revparent);
 
 	return $parent;
 }
@@ -245,13 +246,13 @@ sub git_find_parent {
 sub git_diff_parse {
 	my ($parent, $rev, %revinfo) = @_;
 
-	open(DIFF,"-|","git-diff-tree","-M","-p",$rev,$parent,"--",
+	my $diff = open_pipe("git-diff-tree","-M","-p",$rev,$parent,"--",
 			$revs{$rev}{'filename'}, $revs{$parent}{'filename'})
 		or die "Failed to call git-diff for annotation: $!";
 
-	_git_diff_parse(*DIFF, $parent, $rev, %revinfo);
+	_git_diff_parse($diff, $parent, $rev, %revinfo);
 
-	close(DIFF);
+	close($diff);
 }
 
 sub _git_diff_parse {
@@ -264,7 +265,7 @@ sub _git_diff_parse {
 	my $gotheader = 0;
 	my ($remstart);
 	my ($hunk_start, $hunk_index);
-	while(<DIFF>) {
+	while(<$diff>) {
 		chomp;
 		if (m/^@@ -(\d+),(\d+) \+(\d+),(\d+)/) {
 			$remstart = $1;
@@ -335,15 +336,15 @@ sub git_cat_file {
 
 	my $blob = git_ls_tree($rev, $filename);
 
-	open(C,"-|","git","cat-file", "blob", $blob)
+	my $catfile = open_pipe("git","cat-file", "blob", $blob)
 		or die "Failed to git-cat-file blob $blob (rev $rev, file $filename): " . $!;
 
 	my @lines;
-	while(<C>) {
+	while(<$catfile>) {
 		chomp;
 		push @lines, $_;
 	}
-	close(C);
+	close($catfile);
 
 	return @lines;
 }
@@ -351,15 +352,15 @@ sub git_cat_file {
 sub git_ls_tree {
 	my ($rev, $filename) = @_;
 
-	open(T,"-|","git","ls-tree",$rev,$filename)
+	my $lstree = open_pipe("git","ls-tree",$rev,$filename)
 		or die "Failed to call git ls-tree: $!";
 
 	my ($mode, $type, $blob, $tfilename);
-	while(<T>) {
+	while(<$lstree>) {
 		($mode, $type, $blob, $tfilename) = split(/\s+/, $_, 4);
 		last if ($tfilename eq $filename);
 	}
-	close(T);
+	close($lstree);
 
 	return $blob if $filename eq $filename;
 	die "git-ls-tree failed to find blob for $filename";
@@ -379,11 +380,11 @@ sub claim_line {
 
 sub git_commit_info {
 	my ($rev) = @_;
-	open(COMMIT, "-|","git-cat-file", "commit", $rev)
+	my $commit = open_pipe("git-cat-file", "commit", $rev)
 		or die "Failed to call git-cat-file: $!";
 
 	my %info;
-	while(<COMMIT>) {
+	while(<$commit>) {
 		chomp;
 		last if (length $_ == 0);
 
@@ -397,7 +398,7 @@ sub git_commit_info {
 			$info{'committer_date'} = $3;
 		}
 	}
-	close(COMMIT);
+	close($commit);
 
 	return %info;
 }
@@ -430,3 +431,17 @@ sub gitvar_name {
     return join(' ', @field[0...(@field-4)]);
 }
 
+
+sub open_pipe {
+	my (@execlist) = @_;
+
+	my $pid = open my $kid, "-|";
+	defined $pid or die "Cannot fork: $!";
+
+	unless ($pid) {
+		exec @execlist;
+		die "Cannot exec @execlist: $!";
+	}
+
+	return $kid;
+}
-- 
cgit v0.10.2-6-g49f6


From f60d46911dd0c0526339b039ced8772773bd3dea Mon Sep 17 00:00:00 2001
From: Ryan Anderson <ryan@michonline.com>
Date: Sun, 26 Feb 2006 16:09:12 -0500
Subject: annotate: Use qx{} for pipes on activestate.

Note: This needs someone to tell me what the value of $^O is on ActiveState.

Signed-off-by: Ryan Anderson <ryan@michonline.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/git-annotate.perl b/git-annotate.perl
index ee8ff15..f9c2c6c 100755
--- a/git-annotate.perl
+++ b/git-annotate.perl
@@ -431,8 +431,20 @@ sub gitvar_name {
     return join(' ', @field[0...(@field-4)]);
 }
 
-
 sub open_pipe {
+	if ($^O eq '##INSERT_ACTIVESTATE_STRING_HERE##') {
+		return open_pipe_activestate(@_);
+	} else {
+		return open_pipe_normal(@_);
+	}
+}
+
+sub open_pipe_activestate {
+	tie *fh, "Git::ActiveStatePipe", @_;
+	return *fh;
+}
+
+sub open_pipe_normal {
 	my (@execlist) = @_;
 
 	my $pid = open my $kid, "-|";
@@ -445,3 +457,32 @@ sub open_pipe {
 
 	return $kid;
 }
+
+package Git::ActiveStatePipe;
+use strict;
+
+sub TIEHANDLE {
+	my ($class, @params) = @_;
+	my $cmdline = join " ", @params;
+	my  @data = qx{$cmdline};
+	bless { i => 0, data => \@data }, $class;
+}
+
+sub READLINE {
+	my $self = shift;
+	if ($self->{i} >= scalar @{$self->{data}}) {
+		return undef;
+	}
+	return $self->{'data'}->[ $self->{i}++ ];
+}
+
+sub CLOSE {
+	my $self = shift;
+	delete $self->{data};
+	delete $self->{i};
+}
+
+sub EOF {
+	my $self = shift;
+	return ($self->{i} >= scalar @{$self->{data}});
+}
-- 
cgit v0.10.2-6-g49f6


From 8f22562c6bfa413c621517dd654b58ed39e98045 Mon Sep 17 00:00:00 2001
From: Eric Wong <normalperson@yhbt.net>
Date: Sun, 26 Feb 2006 02:22:27 -0800
Subject: contrib/git-svn: add show-ignore command

Recursively finds and lists the svn:ignore property on
directories.  The output is suitable for appending to the
$GIT_DIR/info/exclude file.

Signed-off-by: Eric Wong <normalperson@yhbt.net>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/contrib/git-svn/git-svn.perl b/contrib/git-svn/git-svn.perl
index a32ce15..3d855f1 100755
--- a/contrib/git-svn/git-svn.perl
+++ b/contrib/git-svn/git-svn.perl
@@ -49,6 +49,7 @@ my %cmd = (
 	fetch => [ \&fetch, "Download new revisions from SVN" ],
 	init => [ \&init, "Initialize and fetch (import)"],
 	commit => [ \&commit, "Commit git revisions to SVN" ],
+	'show-ignore' => [ \&show_ignore, "Show svn:ignore listings" ],
 	rebuild => [ \&rebuild, "Rebuild git-svn metadata (after git clone)" ],
 	help => [ \&usage, "Show help" ],
 );
@@ -258,6 +259,30 @@ sub commit {
 
 }
 
+sub show_ignore {
+	require File::Find or die $!;
+	my $exclude_file = "$GIT_DIR/info/exclude";
+	open my $fh, '<', $exclude_file or croak $!;
+	chomp(my @excludes = (<$fh>));
+	close $fh or croak $!;
+
+	$SVN_URL ||= file_to_s("$GIT_DIR/$GIT_SVN/info/url");
+	chdir $SVN_WC or croak $!;
+	my %ign;
+	File::Find::find({wanted=>sub{if(lstat $_ && -d _ && -d "$_/.svn"){
+		s#^\./##;
+		@{$ign{$_}} = safe_qx(qw(svn propget svn:ignore),$_);
+		}}, no_chdir=>1},'.');
+
+	print "\n# /\n";
+	foreach (@{$ign{'.'}}) { print '/',$_ if /\S/ }
+	delete $ign{'.'};
+	foreach my $i (sort keys %ign) {
+		print "\n# ",$i,"\n";
+		foreach (@{$ign{$i}}) { print '/',$i,'/',$_ if /\S/ }
+	}
+}
+
 ########################### utility functions #########################
 
 sub setup_git_svn {
diff --git a/contrib/git-svn/git-svn.txt b/contrib/git-svn/git-svn.txt
index cf098d7..b4b7789 100644
--- a/contrib/git-svn/git-svn.txt
+++ b/contrib/git-svn/git-svn.txt
@@ -61,6 +61,11 @@ rebuild::
 	the directory/repository you're tracking has moved or changed
 	protocols.
 
+show-ignore::
+	Recursively finds and lists the svn:ignore property on
+	directories.  The output is suitable for appending to
+	the $GIT_DIR/info/exclude file.
+
 OPTIONS
 -------
 -r <ARG>::
@@ -152,6 +157,8 @@ Tracking and contributing to an Subversion managed-project:
 	git commit git-svn-HEAD..my-branch
 # Something is committed to SVN, pull the latest into your branch::
 	git-svn fetch && git pull . git-svn-HEAD
+# Append svn:ignore settings to the default git exclude file:
+	git-svn show-ignore >> .git/info/exclude
 
 DESIGN PHILOSOPHY
 -----------------
-- 
cgit v0.10.2-6-g49f6


From e17512f3de13b6af24672822b703ee54aa057582 Mon Sep 17 00:00:00 2001
From: Eric Wong <normalperson@yhbt.net>
Date: Sun, 26 Feb 2006 02:22:27 -0800
Subject: contrib/git-svn: optimize sequential commits to svn

Avoid running 'svn up' to a previous revision if we know the
revision we just committed is the first descendant of the
revision we came from.

This reduces the time to do a series of commits by about 25%.

Signed-off-by: Eric Wong <normalperson@yhbt.net>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/contrib/git-svn/git-svn.perl b/contrib/git-svn/git-svn.perl
index 3d855f1..33977e5 100755
--- a/contrib/git-svn/git-svn.perl
+++ b/contrib/git-svn/git-svn.perl
@@ -30,6 +30,7 @@ use File::Basename qw/dirname basename/;
 use File::Path qw/mkpath/;
 use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/;
 use File::Spec qw//;
+use POSIX qw/strftime/;
 my $sha1 = qr/[a-f\d]{40}/;
 my $sha1_short = qr/[a-f\d]{6,40}/;
 my ($_revision,$_stdin,$_no_ignore_ext,$_no_stop_copy,$_help,$_rmdir,$_edit,
@@ -591,6 +592,7 @@ sub handle_rmdir {
 sub svn_commit_tree {
 	my ($svn_rev, $commit) = @_;
 	my $commit_msg = "$GIT_DIR/$GIT_SVN/.svn-commit.tmp.$$";
+	my %log_msg = ( msg => '' );
 	open my $msg, '>', $commit_msg  or croak $!;
 
 	chomp(my $type = `git-cat-file -t $commit`);
@@ -606,6 +608,7 @@ sub svn_commit_tree {
 			if (!$in_msg) {
 				$in_msg = 1 if (/^\s*$/);
 			} else {
+				$log_msg{msg} .= $_;
 				print $msg $_ or croak $!;
 			}
 		}
@@ -625,9 +628,30 @@ sub svn_commit_tree {
 			join("\n",@ci_output),"\n";
 	my ($rev_committed) = ($committed =~ /^Committed revision (\d+)\./);
 
-	# resync immediately
-	my @svn_up = (qw(svn up), "-r$svn_rev");
+	my @svn_up = qw(svn up);
 	push @svn_up, '--ignore-externals' unless $_no_ignore_ext;
+	if ($rev_committed == ($svn_rev + 1)) {
+		push @svn_up, "-r$rev_committed";
+		sys(@svn_up);
+		my $info = svn_info('.');
+		my $date = $info->{'Last Changed Date'} or die "Missing date\n";
+		if ($info->{'Last Changed Rev'} != $rev_committed) {
+			croak "$info->{'Last Changed Rev'} != $rev_committed\n"
+		}
+		my ($Y,$m,$d,$H,$M,$S,$tz) = ($date =~
+					/(\d{4})\-(\d\d)\-(\d\d)\s
+					 (\d\d)\:(\d\d)\:(\d\d)\s([\-\+]\d+)/x)
+					 or croak "Failed to parse date: $date\n";
+		$log_msg{date} = "$tz $Y-$m-$d $H:$M:$S";
+		$log_msg{author} = $info->{'Last Changed Author'};
+		$log_msg{revision} = $rev_committed;
+		$log_msg{msg} .= "\n";
+		my $parent = file_to_s("$REV_DIR/$svn_rev");
+		git_commit(\%log_msg, $parent, $commit);
+		return $rev_committed;
+	}
+	# resync immediately
+	push @svn_up, "-r$svn_rev";
 	sys(@svn_up);
 	return fetch("$rev_committed=$commit")->{revision};
 }
@@ -724,7 +748,7 @@ sub svn_info {
 	# only single-lines seem to exist in svn info output
 	while (<$info_fh>) {
 		chomp $_;
-		if (m#^([^:]+)\s*:\s*(\S*)$#) {
+		if (m#^([^:]+)\s*:\s*(\S.*)$#) {
 			$ret->{$1} = $2;
 			push @{$ret->{-order}}, $1;
 		}
-- 
cgit v0.10.2-6-g49f6


From 3c0b7511cdadd04690208d9772fdbd6a86496229 Mon Sep 17 00:00:00 2001
From: Eric Wong <normalperson@yhbt.net>
Date: Sun, 26 Feb 2006 02:22:27 -0800
Subject: contrib/git-svn: version 0.10.0

New features deserve an increment of the minor version.  This will very
likely become 1.0.0 unless release-critical bugs are found.

Signed-off-by: Eric Wong <normalperson@yhbt.net>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/contrib/git-svn/git-svn.perl b/contrib/git-svn/git-svn.perl
index 33977e5..0b74165 100755
--- a/contrib/git-svn/git-svn.perl
+++ b/contrib/git-svn/git-svn.perl
@@ -8,7 +8,7 @@ use vars qw/	$AUTHOR $VERSION
 		$GIT_SVN_INDEX $GIT_SVN
 		$GIT_DIR $REV_DIR/;
 $AUTHOR = 'Eric Wong <normalperson@yhbt.net>';
-$VERSION = '0.9.1';
+$VERSION = '0.10.0';
 $GIT_DIR = $ENV{GIT_DIR} || "$ENV{PWD}/.git";
 $GIT_SVN = $ENV{GIT_SVN_ID} || 'git-svn';
 $GIT_SVN_INDEX = "$GIT_DIR/$GIT_SVN/index";
-- 
cgit v0.10.2-6-g49f6


From 962554c616e30991553c8497ed1e7c2a415fa84d Mon Sep 17 00:00:00 2001
From: Timo Hirvonen <tihirvon@gmail.com>
Date: Sun, 26 Feb 2006 17:13:46 +0200
Subject: Use setenv(), fix warnings

  - Fix -Wundef -Wold-style-definition warnings
  - Make pll_free() static

[jc: original patch by Timo had another unrelated bits:

  - Use setenv() instead of putenv()

 I'm postponing that part for now.]

Signed-off-by: Timo Hirvonen <tihirvon@gmail.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/cache.h b/cache.h
index 5020f07..58eec00 100644
--- a/cache.h
+++ b/cache.h
@@ -10,7 +10,7 @@
 #define deflateBound(c,s)  ((s) + (((s) + 7) >> 3) + (((s) + 63) >> 6) + 11)
 #endif
 
-#if defined(DT_UNKNOWN) && !NO_D_TYPE_IN_DIRENT
+#if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT)
 #define DTYPE(de)	((de)->d_type)
 #else
 #undef DT_UNKNOWN
diff --git a/exec_cmd.c b/exec_cmd.c
index 55af33b..b5e59a9 100644
--- a/exec_cmd.c
+++ b/exec_cmd.c
@@ -13,7 +13,7 @@ void git_set_exec_path(const char *exec_path)
 
 
 /* Returns the highest-priority, location to look for git programs. */
-const char *git_exec_path()
+const char *git_exec_path(void)
 {
 	const char *env;
 
diff --git a/fetch-pack.c b/fetch-pack.c
index 09738fe..535de10 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -82,7 +82,7 @@ static void mark_common(struct commit *commit,
   Get the next rev to send, ignoring the common.
 */
 
-static const unsigned char* get_rev()
+static const unsigned char* get_rev(void)
 {
 	struct commit *commit = NULL;
 
diff --git a/fsck-objects.c b/fsck-objects.c
index 6439d55..4ddd676 100644
--- a/fsck-objects.c
+++ b/fsck-objects.c
@@ -20,7 +20,7 @@ static int check_strict = 0;
 static int keep_cache_objects = 0; 
 static unsigned char head_sha1[20];
 
-#if NO_D_INO_IN_DIRENT
+#ifdef NO_D_INO_IN_DIRENT
 #define SORT_DIRENT 0
 #define DIRENT_SORT_HINT(de) 0
 #else
diff --git a/pack-objects.c b/pack-objects.c
index 0287449..21ee572 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -768,7 +768,7 @@ static int sha1_sort(const struct object_entry *a, const struct object_entry *b)
 	return memcmp(a->sha1, b->sha1, 20);
 }
 
-static struct object_entry **create_final_object_list()
+static struct object_entry **create_final_object_list(void)
 {
 	struct object_entry **list;
 	int i, j;
diff --git a/pack-redundant.c b/pack-redundant.c
index 1869b38..cd81f5a 100644
--- a/pack-redundant.c
+++ b/pack-redundant.c
@@ -45,7 +45,7 @@ static inline void llist_item_put(struct llist_item *item)
 	free_nodes = item;
 }
 
-static inline struct llist_item *llist_item_get()
+static inline struct llist_item *llist_item_get(void)
 {
 	struct llist_item *new;
 	if ( free_nodes ) {
@@ -275,7 +275,7 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 	}
 }
 
-void pll_free(struct pll *l)
+static void pll_free(struct pll *l)
 {
 	struct pll *old;
 	struct pack_list *opl;
-- 
cgit v0.10.2-6-g49f6


From 231af8322ac5313243bc1e8beac8dfd9ff95051d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@osdl.org>
Date: Sun, 26 Feb 2006 12:34:51 -0800
Subject: Teach the "git" command to handle some commands internally

This is another patch in the "prepare to do more in C" series, where the
git wrapper command is taught about the notion of handling some
functionality internally.

Right now, the only internal commands are "version" and "help", but the
point being that we can now easily extend it to handle some of the trivial
scripts internally. Things like "git log" and "git diff" wouldn't need
separate external scripts any more.

This also implies that to support the old "git-log" and "git-diff" syntax,
the "git" wrapper now automatically looks at the name it was executed as,
and if it is "git-xxxx", it will assume that it is to internally do what
"git xxxx" would do.

In other words, you can (once you implement an internal command) soft- or
hard-link that command to the "git" wrapper command, and it will do the
right thing, whether you use the "git xxxx" or the "git-xxxx" format.

There's one other change: the search order for external programs is
modified slightly, so that the first entry remains GIT_EXEC_DIR, but the
second entry is the same directory as the git wrapper itself was executed
out of - if we can figure it out from argv[0], of course.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/git.c b/git.c
index 4616df6..993cd0d 100644
--- a/git.c
+++ b/git.c
@@ -230,62 +230,141 @@ static void show_man_page(char *git_cmd)
 	execlp("man", "man", page, NULL);
 }
 
+static int cmd_version(int argc, char **argv, char **envp)
+{
+	printf("git version %s\n", GIT_VERSION);
+	return 0;
+}
+
+static int cmd_help(int argc, char **argv, char **envp)
+{
+	char *help_cmd = argv[1];
+	if (!help_cmd)
+		cmd_usage(git_exec_path(), NULL);
+	show_man_page(help_cmd);
+	return 0;
+}
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
+
+static void handle_internal_command(int argc, char **argv, char **envp)
+{
+	const char *cmd = argv[0];
+	static struct cmd_struct {
+		const char *cmd;
+		int (*fn)(int, char **, char **);
+	} commands[] = {
+		{ "version", cmd_version },
+		{ "help", cmd_help },
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(commands); i++) {
+		struct cmd_struct *p = commands+i;
+		if (strcmp(p->cmd, cmd))
+			continue;
+		exit(p->fn(argc, argv, envp));
+	}
+}
+
 int main(int argc, char **argv, char **envp)
 {
+	char *cmd = argv[0];
+	char *slash = strrchr(cmd, '/');
 	char git_command[PATH_MAX + 1];
-	char wd[PATH_MAX + 1];
-	int i, show_help = 0;
-	const char *exec_path;
+	const char *exec_path = NULL;
+
+	/*
+	 * Take the basename of argv[0] as the command
+	 * name, and the dirname as the default exec_path
+	 * if it's an absolute path and we don't have
+	 * anything better.
+	 */
+	if (slash) {
+		*slash++ = 0;
+		if (*cmd == '/')
+			exec_path = cmd;
+		cmd = slash;
+	}
 
-	getcwd(wd, PATH_MAX);
+	/*
+	 * "git-xxxx" is the same as "git xxxx", but we obviously:
+	 *
+	 *  - cannot take flags in between the "git" and the "xxxx".
+	 *  - cannot execute it externally (since it would just do
+	 *    the same thing over again)
+	 *
+	 * So we just directly call the internal command handler, and
+	 * die if that one cannot handle it.
+	 */
+	if (!strncmp(cmd, "git-", 4)) {
+		cmd += 4;
+		argv[0] = cmd;
+		handle_internal_command(argc, argv, envp);
+		die("cannot handle %s internally", cmd);
+	}
 
-	for (i = 1; i < argc; i++) {
-		char *arg = argv[i];
+	/* Default command: "help" */
+	cmd = "help";
 
-		if (!strcmp(arg, "help")) {
-			show_help = 1;
-			continue;
-		}
+	/* Look for flags.. */
+	while (argc > 1) {
+		cmd = *++argv;
+		argc--;
 
-		if (strncmp(arg, "--", 2))
+		if (strncmp(cmd, "--", 2))
 			break;
 
-		arg += 2;
+		cmd += 2;
+
+		/*
+		 * For legacy reasons, the "version" and "help"
+		 * commands can be written with "--" prepended
+		 * to make them look like flags.
+		 */
+		if (!strcmp(cmd, "help"))
+			break;
+		if (!strcmp(cmd, "version"))
+			break;
 
-		if (!strncmp(arg, "exec-path", 9)) {
-			arg += 9;
-			if (*arg == '=') {
-				exec_path = arg + 1;
-				git_set_exec_path(exec_path);
-			} else {
-				puts(git_exec_path());
-				exit(0);
+		/*
+		 * Check remaining flags (which by now must be
+		 * "--exec-path", but maybe we will accept
+		 * other arguments some day)
+		 */
+		if (!strncmp(cmd, "exec-path", 9)) {
+			cmd += 9;
+			if (*cmd == '=') {
+				git_set_exec_path(cmd + 1);
+				continue;
 			}
-		}
-		else if (!strcmp(arg, "version")) {
-			printf("git version %s\n", GIT_VERSION);
+			puts(git_exec_path());
 			exit(0);
 		}
-		else if (!strcmp(arg, "help"))
-			show_help = 1;
-		else if (!show_help)
-			cmd_usage(NULL, NULL);
-	}
-
-	if (i >= argc || show_help) {
-		if (i >= argc)
-			cmd_usage(git_exec_path(), NULL);
-
-		show_man_page(argv[i]);
+		cmd_usage(NULL, NULL);
 	}
-
+	argv[0] = cmd;
+
+	/*
+	 * We search for git commands in the following order:
+	 *  - git_exec_path()
+	 *  - the path of the "git" command if we could find it
+	 *    in $0
+	 *  - the regular PATH.
+	 */
+	if (exec_path)
+		prepend_to_path(exec_path, strlen(exec_path));
 	exec_path = git_exec_path();
 	prepend_to_path(exec_path, strlen(exec_path));
 
-	execv_git_cmd(argv + i);
+	/* See if it's an internal command */
+	handle_internal_command(argc, argv, envp);
+
+	/* .. then try the external ones */
+	execv_git_cmd(argv);
 
 	if (errno == ENOENT)
-		cmd_usage(exec_path, "'%s' is not a git-command", argv[i]);
+		cmd_usage(exec_path, "'%s' is not a git-command", cmd);
 
 	fprintf(stderr, "Failed to run command '%s': %s\n",
 		git_command, strerror(errno));
-- 
cgit v0.10.2-6-g49f6


From a204756a45bd357280c156d01858138712493dfa Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sun, 26 Feb 2006 15:16:41 -0800
Subject: sample hooks template.

These two sample hooks try to detect and use the corresponding
commit hook from the same repository.  However, they forgot to
set up GIT_DIR for their own use, so was not in effect.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/templates/hooks--applypatch-msg b/templates/hooks--applypatch-msg
index bda3c86..02de1ef 100644
--- a/templates/hooks--applypatch-msg
+++ b/templates/hooks--applypatch-msg
@@ -9,6 +9,7 @@
 #
 # To enable this hook, make this file executable.
 
+. git-sh-setup
 test -x "$GIT_DIR/hooks/commit-msg" &&
 	exec "$GIT_DIR/hooks/commit-msg" ${1+"$@"}
 :
diff --git a/templates/hooks--pre-applypatch b/templates/hooks--pre-applypatch
index a547516..5f56ce8 100644
--- a/templates/hooks--pre-applypatch
+++ b/templates/hooks--pre-applypatch
@@ -8,6 +8,7 @@
 #
 # To enable this hook, make this file executable.
 
+. git-sh-setup
 test -x "$GIT_DIR/hooks/pre-commit" &&
 	exec "$GIT_DIR/hooks/pre-commit" ${1+"$@"}
 :
-- 
cgit v0.10.2-6-g49f6


From ae563542bf10fa8c33abd2a354e4b28aca4264d7 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@osdl.org>
Date: Sat, 25 Feb 2006 16:19:46 -0800
Subject: First cut at libifying revlist generation

This really just splits things up partially, and creates the
interface to set things up by parsing the command line.

No real code changes so far, although the parsing of filenames is a bit
stricter. In particular, if there is a "--", then we do not accept any
filenames before it, and if there isn't any "--", then we check that _all_
paths listed are valid, not just the first one.

The new argument parsing automatically also gives us "--default" and
"--not" handling as in git-rev-parse.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/Makefile b/Makefile
index 6c59cee..3575489 100644
--- a/Makefile
+++ b/Makefile
@@ -192,7 +192,7 @@ LIB_FILE=libgit.a
 LIB_H = \
 	blob.h cache.h commit.h count-delta.h csum-file.h delta.h \
 	diff.h epoch.h object.h pack.h pkt-line.h quote.h refs.h \
-	run-command.h strbuf.h tag.h tree.h git-compat-util.h
+	run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h
 
 DIFF_OBJS = \
 	diff.o diffcore-break.o diffcore-order.o diffcore-pathspec.o \
@@ -205,7 +205,7 @@ LIB_OBJS = \
 	quote.o read-cache.o refs.o run-command.o \
 	server-info.o setup.o sha1_file.o sha1_name.o strbuf.o \
 	tag.o tree.o usage.o config.o environment.o ctype.o copy.o \
-	fetch-clone.o \
+	fetch-clone.o revision.o \
 	$(DIFF_OBJS)
 
 LIBS = $(LIB_FILE)
diff --git a/epoch.c b/epoch.c
index 3a76748..0f37492 100644
--- a/epoch.c
+++ b/epoch.c
@@ -15,6 +15,7 @@
 
 #include "cache.h"
 #include "commit.h"
+#include "revision.h"
 #include "epoch.h"
 
 struct fraction {
diff --git a/epoch.h b/epoch.h
index 7493d5a..3756009 100644
--- a/epoch.h
+++ b/epoch.h
@@ -11,7 +11,6 @@ typedef int (*emitter_func) (struct commit *);
 int sort_list_in_merge_order(struct commit_list *list, emitter_func emitter);
 
 /* Low bits are used by rev-list */
-#define UNINTERESTING   (1u<<10)
 #define BOUNDARY        (1u<<11)
 #define VISITED         (1u<<12)
 #define DISCONTINUITY   (1u<<13)
diff --git a/rev-list.c b/rev-list.c
index 67d2a48..d1c52a6 100644
--- a/rev-list.c
+++ b/rev-list.c
@@ -6,9 +6,10 @@
 #include "blob.h"
 #include "epoch.h"
 #include "diff.h"
+#include "revision.h"
+
+/* bits #0 and #1 in revision.h */
 
-#define SEEN		(1u << 0)
-#define INTERESTING	(1u << 1)
 #define COUNTED		(1u << 2)
 #define SHOWN		(1u << 3)
 #define TREECHANGE	(1u << 4)
@@ -38,60 +39,20 @@ static const char rev_list_usage[] =
 "    --bisect"
 ;
 
-static int dense = 1;
+struct rev_info revs;
+
 static int unpacked = 0;
 static int bisect_list = 0;
-static int tag_objects = 0;
-static int tree_objects = 0;
-static int blob_objects = 0;
-static int edge_hint = 0;
 static int verbose_header = 0;
 static int abbrev = DEFAULT_ABBREV;
 static int show_parents = 0;
 static int hdr_termination = 0;
 static const char *commit_prefix = "";
-static unsigned long max_age = -1;
-static unsigned long min_age = -1;
-static int max_count = -1;
 static enum cmit_fmt commit_format = CMIT_FMT_RAW;
 static int merge_order = 0;
 static int show_breaks = 0;
 static int stop_traversal = 0;
-static int topo_order = 0;
-static int lifo = 1;
 static int no_merges = 0;
-static const char **paths = NULL;
-static int remove_empty_trees = 0;
-
-struct name_path {
-	struct name_path *up;
-	int elem_len;
-	const char *elem;
-};
-
-static char *path_name(struct name_path *path, const char *name)
-{
-	struct name_path *p;
-	char *n, *m;
-	int nlen = strlen(name);
-	int len = nlen + 1;
-
-	for (p = path; p; p = p->up) {
-		if (p->elem_len)
-			len += p->elem_len + 1;
-	}
-	n = xmalloc(len);
-	m = n + len - (nlen + 1);
-	strcpy(m, name);
-	for (p = path; p; p = p->up) {
-		if (p->elem_len) {
-			m -= p->elem_len + 1;
-			memcpy(m, p->elem, p->elem_len);
-			m[p->elem_len] = '/';
-		}
-	}
-	return n;
-}
 
 static void show_commit(struct commit *commit)
 {
@@ -168,15 +129,15 @@ static int filter_commit(struct commit * commit)
 		return STOP;
 	if (commit->object.flags & (UNINTERESTING|SHOWN))
 		return CONTINUE;
-	if (min_age != -1 && (commit->date > min_age))
+	if (revs.min_age != -1 && (commit->date > revs.min_age))
 		return CONTINUE;
-	if (max_age != -1 && (commit->date < max_age)) {
+	if (revs.max_age != -1 && (commit->date < revs.max_age)) {
 		stop_traversal=1;
 		return CONTINUE;
 	}
 	if (no_merges && (commit->parents && commit->parents->next))
 		return CONTINUE;
-	if (paths && dense) {
+	if (revs.paths && revs.dense) {
 		if (!(commit->object.flags & TREECHANGE))
 			return CONTINUE;
 		rewrite_parents(commit);
@@ -196,7 +157,7 @@ static int process_commit(struct commit * commit)
 		return CONTINUE;
 	}
 
-	if (max_count != -1 && !max_count--)
+	if (revs.max_count != -1 && !revs.max_count--)
 		return STOP;
 
 	show_commit(commit);
@@ -204,19 +165,6 @@ static int process_commit(struct commit * commit)
 	return CONTINUE;
 }
 
-static struct object_list **add_object(struct object *obj,
-				       struct object_list **p,
-				       struct name_path *path,
-				       const char *name)
-{
-	struct object_list *entry = xmalloc(sizeof(*entry));
-	entry->item = obj;
-	entry->next = *p;
-	entry->name = path_name(path, name);
-	*p = entry;
-	return &entry->next;
-}
-
 static struct object_list **process_blob(struct blob *blob,
 					 struct object_list **p,
 					 struct name_path *path,
@@ -224,7 +172,7 @@ static struct object_list **process_blob(struct blob *blob,
 {
 	struct object *obj = &blob->object;
 
-	if (!blob_objects)
+	if (!revs.blob_objects)
 		return p;
 	if (obj->flags & (UNINTERESTING | SEEN))
 		return p;
@@ -241,7 +189,7 @@ static struct object_list **process_tree(struct tree *tree,
 	struct tree_entry_list *entry;
 	struct name_path me;
 
-	if (!tree_objects)
+	if (!revs.tree_objects)
 		return p;
 	if (obj->flags & (UNINTERESTING | SEEN))
 		return p;
@@ -314,75 +262,6 @@ static void show_commit_list(struct commit_list *list)
 	}
 }
 
-static void mark_blob_uninteresting(struct blob *blob)
-{
-	if (!blob_objects)
-		return;
-	if (blob->object.flags & UNINTERESTING)
-		return;
-	blob->object.flags |= UNINTERESTING;
-}
-
-static void mark_tree_uninteresting(struct tree *tree)
-{
-	struct object *obj = &tree->object;
-	struct tree_entry_list *entry;
-
-	if (!tree_objects)
-		return;
-	if (obj->flags & UNINTERESTING)
-		return;
-	obj->flags |= UNINTERESTING;
-	if (!has_sha1_file(obj->sha1))
-		return;
-	if (parse_tree(tree) < 0)
-		die("bad tree %s", sha1_to_hex(obj->sha1));
-	entry = tree->entries;
-	tree->entries = NULL;
-	while (entry) {
-		struct tree_entry_list *next = entry->next;
-		if (entry->directory)
-			mark_tree_uninteresting(entry->item.tree);
-		else
-			mark_blob_uninteresting(entry->item.blob);
-		free(entry);
-		entry = next;
-	}
-}
-
-static void mark_parents_uninteresting(struct commit *commit)
-{
-	struct commit_list *parents = commit->parents;
-
-	while (parents) {
-		struct commit *commit = parents->item;
-		commit->object.flags |= UNINTERESTING;
-
-		/*
-		 * Normally we haven't parsed the parent
-		 * yet, so we won't have a parent of a parent
-		 * here. However, it may turn out that we've
-		 * reached this commit some other way (where it
-		 * wasn't uninteresting), in which case we need
-		 * to mark its parents recursively too..
-		 */
-		if (commit->parents)
-			mark_parents_uninteresting(commit);
-
-		/*
-		 * A missing commit is ok iff its parent is marked 
-		 * uninteresting.
-		 *
-		 * We just mark such a thing parsed, so that when
-		 * it is popped next time around, we won't be trying
-		 * to parse it and get an error.
-		 */
-		if (!has_sha1_file(commit->object.sha1))
-			commit->object.parsed = 1;
-		parents = parents->next;
-	}
-}
-
 static int everybody_uninteresting(struct commit_list *orig)
 {
 	struct commit_list *list = orig;
@@ -413,7 +292,7 @@ static int count_distance(struct commit_list *entry)
 
 		if (commit->object.flags & (UNINTERESTING | COUNTED))
 			break;
-		if (!paths || (commit->object.flags & TREECHANGE))
+		if (!revs.paths || (commit->object.flags & TREECHANGE))
 			nr++;
 		commit->object.flags |= COUNTED;
 		p = commit->parents;
@@ -447,7 +326,7 @@ static struct commit_list *find_bisection(struct commit_list *list)
 	nr = 0;
 	p = list;
 	while (p) {
-		if (!paths || (p->item->object.flags & TREECHANGE))
+		if (!revs.paths || (p->item->object.flags & TREECHANGE))
 			nr++;
 		p = p->next;
 	}
@@ -457,7 +336,7 @@ static struct commit_list *find_bisection(struct commit_list *list)
 	for (p = list; p; p = p->next) {
 		int distance;
 
-		if (paths && !(p->item->object.flags & TREECHANGE))
+		if (revs.paths && !(p->item->object.flags & TREECHANGE))
 			continue;
 
 		distance = count_distance(p);
@@ -483,7 +362,7 @@ static void mark_edge_parents_uninteresting(struct commit *commit)
 		if (!(parent->object.flags & UNINTERESTING))
 			continue;
 		mark_tree_uninteresting(parent->tree);
-		if (edge_hint && !(parent->object.flags & SHOWN)) {
+		if (revs.edge_hint && !(parent->object.flags & SHOWN)) {
 			parent->object.flags |= SHOWN;
 			printf("-%s\n", sha1_to_hex(parent->object.sha1));
 		}
@@ -613,7 +492,7 @@ static void try_to_simplify_commit(struct commit *commit)
 			return;
 
 		case TREE_NEW:
-			if (remove_empty_trees && same_tree_as_empty(p->tree)) {
+			if (revs.remove_empty_trees && same_tree_as_empty(p->tree)) {
 				*pp = parent->next;
 				continue;
 			}
@@ -664,7 +543,7 @@ static void add_parents_to_list(struct commit *commit, struct commit_list **list
 	 * simplify the commit history and find the parent
 	 * that has no differences in the path set if one exists.
 	 */
-	if (paths)
+	if (revs.paths)
 		try_to_simplify_commit(commit);
 
 	parent = commit->parents;
@@ -693,7 +572,7 @@ static struct commit_list *limit_list(struct commit_list *list)
 		list = list->next;
 		free(entry);
 
-		if (max_age != -1 && (commit->date < max_age))
+		if (revs.max_age != -1 && (commit->date < revs.max_age))
 			obj->flags |= UNINTERESTING;
 		if (unpacked && has_sha1_pack(obj->sha1))
 			obj->flags |= UNINTERESTING;
@@ -704,155 +583,40 @@ static struct commit_list *limit_list(struct commit_list *list)
 				break;
 			continue;
 		}
-		if (min_age != -1 && (commit->date > min_age))
+		if (revs.min_age != -1 && (commit->date > revs.min_age))
 			continue;
 		p = &commit_list_insert(commit, p)->next;
 	}
-	if (tree_objects)
+	if (revs.tree_objects)
 		mark_edges_uninteresting(newlist);
 	if (bisect_list)
 		newlist = find_bisection(newlist);
 	return newlist;
 }
 
-static void add_pending_object(struct object *obj, const char *name)
-{
-	add_object(obj, &pending_objects, NULL, name);
-}
-
-static struct commit *get_commit_reference(const char *name, const unsigned char *sha1, unsigned int flags)
-{
-	struct object *object;
-
-	object = parse_object(sha1);
-	if (!object)
-		die("bad object %s", name);
-
-	/*
-	 * Tag object? Look what it points to..
-	 */
-	while (object->type == tag_type) {
-		struct tag *tag = (struct tag *) object;
-		object->flags |= flags;
-		if (tag_objects && !(object->flags & UNINTERESTING))
-			add_pending_object(object, tag->tag);
-		object = parse_object(tag->tagged->sha1);
-		if (!object)
-			die("bad object %s", sha1_to_hex(tag->tagged->sha1));
-	}
-
-	/*
-	 * Commit object? Just return it, we'll do all the complex
-	 * reachability crud.
-	 */
-	if (object->type == commit_type) {
-		struct commit *commit = (struct commit *)object;
-		object->flags |= flags;
-		if (parse_commit(commit) < 0)
-			die("unable to parse commit %s", name);
-		if (flags & UNINTERESTING)
-			mark_parents_uninteresting(commit);
-		return commit;
-	}
-
-	/*
-	 * Tree object? Either mark it uniniteresting, or add it
-	 * to the list of objects to look at later..
-	 */
-	if (object->type == tree_type) {
-		struct tree *tree = (struct tree *)object;
-		if (!tree_objects)
-			return NULL;
-		if (flags & UNINTERESTING) {
-			mark_tree_uninteresting(tree);
-			return NULL;
-		}
-		add_pending_object(object, "");
-		return NULL;
-	}
-
-	/*
-	 * Blob object? You know the drill by now..
-	 */
-	if (object->type == blob_type) {
-		struct blob *blob = (struct blob *)object;
-		if (!blob_objects)
-			return NULL;
-		if (flags & UNINTERESTING) {
-			mark_blob_uninteresting(blob);
-			return NULL;
-		}
-		add_pending_object(object, "");
-		return NULL;
-	}
-	die("%s is unknown object", name);
-}
-
-static void handle_one_commit(struct commit *com, struct commit_list **lst)
-{
-	if (!com || com->object.flags & SEEN)
-		return;
-	com->object.flags |= SEEN;
-	commit_list_insert(com, lst);
-}
-
-/* for_each_ref() callback does not allow user data -- Yuck. */
-static struct commit_list **global_lst;
-
-static int include_one_commit(const char *path, const unsigned char *sha1)
-{
-	struct commit *com = get_commit_reference(path, sha1, 0);
-	handle_one_commit(com, global_lst);
-	return 0;
-}
-
-static void handle_all(struct commit_list **lst)
-{
-	global_lst = lst;
-	for_each_ref(include_one_commit);
-	global_lst = NULL;
-}
-
 int main(int argc, const char **argv)
 {
-	const char *prefix = setup_git_directory();
-	struct commit_list *list = NULL;
+	struct commit_list *list;
 	int i, limited = 0;
 
+	argc = setup_revisions(argc, argv, &revs);
+
 	for (i = 1 ; i < argc; i++) {
-		int flags;
 		const char *arg = argv[i];
-		char *dotdot;
-		struct commit *commit;
-		unsigned char sha1[20];
 
 		/* accept -<digit>, like traditilnal "head" */
 		if ((*arg == '-') && isdigit(arg[1])) {
-			max_count = atoi(arg + 1);
+			revs.max_count = atoi(arg + 1);
 			continue;
 		}
 		if (!strcmp(arg, "-n")) {
 			if (++i >= argc)
 				die("-n requires an argument");
-			max_count = atoi(argv[i]);
+			revs.max_count = atoi(argv[i]);
 			continue;
 		}
 		if (!strncmp(arg,"-n",2)) {
-			max_count = atoi(arg + 2);
-			continue;
-		}
-		if (!strncmp(arg, "--max-count=", 12)) {
-			max_count = atoi(arg + 12);
-			continue;
-		}
-		if (!strncmp(arg, "--max-age=", 10)) {
-			max_age = atoi(arg + 10);
-			limited = 1;
-			continue;
-		}
-		if (!strncmp(arg, "--min-age=", 10)) {
-			min_age = atoi(arg + 10);
-			limited = 1;
+			revs.max_count = atoi(arg + 2);
 			continue;
 		}
 		if (!strcmp(arg, "--header")) {
@@ -893,23 +657,6 @@ int main(int argc, const char **argv)
 			bisect_list = 1;
 			continue;
 		}
-		if (!strcmp(arg, "--all")) {
-			handle_all(&list);
-			continue;
-		}
-		if (!strcmp(arg, "--objects")) {
-			tag_objects = 1;
-			tree_objects = 1;
-			blob_objects = 1;
-			continue;
-		}
-		if (!strcmp(arg, "--objects-edge")) {
-			tag_objects = 1;
-			tree_objects = 1;
-			blob_objects = 1;
-			edge_hint = 1;
-			continue;
-		}
 		if (!strcmp(arg, "--unpacked")) {
 			unpacked = 1;
 			limited = 1;
@@ -923,100 +670,42 @@ int main(int argc, const char **argv)
 			show_breaks = 1;
 			continue;
 		}
-		if (!strcmp(arg, "--topo-order")) {
-		        topo_order = 1;
-			lifo = 1;
-		        limited = 1;
-			continue;
-		}
-		if (!strcmp(arg, "--date-order")) {
-		        topo_order = 1;
-			lifo = 0;
-		        limited = 1;
-			continue;
-		}
-		if (!strcmp(arg, "--dense")) {
-			dense = 1;
-			continue;
-		}
-		if (!strcmp(arg, "--sparse")) {
-			dense = 0;
-			continue;
-		}
-		if (!strcmp(arg, "--remove-empty")) {
-			remove_empty_trees = 1;
-			continue;
-		}
-		if (!strcmp(arg, "--")) {
-			i++;
-			break;
-		}
+		usage(rev_list_usage);
 
-		if (show_breaks && !merge_order)
-			usage(rev_list_usage);
-
-		flags = 0;
-		dotdot = strstr(arg, "..");
-		if (dotdot) {
-			unsigned char from_sha1[20];
-			char *next = dotdot + 2;
-			*dotdot = 0;
-			if (!*next)
-				next = "HEAD";
-			if (!get_sha1(arg, from_sha1) && !get_sha1(next, sha1)) {
-				struct commit *exclude;
-				struct commit *include;
-				
-				exclude = get_commit_reference(arg, from_sha1, UNINTERESTING);
-				include = get_commit_reference(next, sha1, 0);
-				if (!exclude || !include)
-					die("Invalid revision range %s..%s", arg, next);
-				limited = 1;
-				handle_one_commit(exclude, &list);
-				handle_one_commit(include, &list);
-				continue;
-			}
-			*dotdot = '.';
-		}
-		if (*arg == '^') {
-			flags = UNINTERESTING;
-			arg++;
-			limited = 1;
-		}
-		if (get_sha1(arg, sha1) < 0) {
-			struct stat st;
-			if (lstat(arg, &st) < 0)
-				die("'%s': %s", arg, strerror(errno));
-			break;
-		}
-		commit = get_commit_reference(arg, sha1, flags);
-		handle_one_commit(commit, &list);
 	}
 
+	list = revs.commits;
+	if (list && list->next)
+		limited = 1;
+
+	if (revs.topo_order)
+		limited = 1;
+
 	if (!list &&
-	    (!(tag_objects||tree_objects||blob_objects) && !pending_objects))
+	    (!(revs.tag_objects||revs.tree_objects||revs.blob_objects) && !revs.pending_objects))
 		usage(rev_list_usage);
 
-	paths = get_pathspec(prefix, argv + i);
-	if (paths) {
+	if (revs.paths) {
 		limited = 1;
-		diff_tree_setup_paths(paths);
+		diff_tree_setup_paths(revs.paths);
 	}
+	if (revs.max_age || revs.min_age)
+		limited = 1;
 
 	save_commit_buffer = verbose_header;
 	track_object_refs = 0;
 
 	if (!merge_order) {		
 		sort_by_date(&list);
-		if (list && !limited && max_count == 1 &&
-		    !tag_objects && !tree_objects && !blob_objects) {
+		if (list && !limited && revs.max_count == 1 &&
+		    !revs.tag_objects && !revs.tree_objects && !revs.blob_objects) {
 			show_commit(list->item);
 			return 0;
 		}
 	        if (limited)
 			list = limit_list(list);
-		if (topo_order)
-			sort_in_topological_order(&list, lifo);
+		if (revs.topo_order)
+			sort_in_topological_order(&list, revs.lifo);
 		show_commit_list(list);
 	} else {
 #ifndef NO_OPENSSL
diff --git a/revision.c b/revision.c
new file mode 100644
index 0000000..d61410b
--- /dev/null
+++ b/revision.c
@@ -0,0 +1,370 @@
+#include "cache.h"
+#include "tag.h"
+#include "blob.h"
+#include "tree.h"
+#include "commit.h"
+#include "refs.h"
+#include "revision.h"
+
+static char *path_name(struct name_path *path, const char *name)
+{
+	struct name_path *p;
+	char *n, *m;
+	int nlen = strlen(name);
+	int len = nlen + 1;
+
+	for (p = path; p; p = p->up) {
+		if (p->elem_len)
+			len += p->elem_len + 1;
+	}
+	n = xmalloc(len);
+	m = n + len - (nlen + 1);
+	strcpy(m, name);
+	for (p = path; p; p = p->up) {
+		if (p->elem_len) {
+			m -= p->elem_len + 1;
+			memcpy(m, p->elem, p->elem_len);
+			m[p->elem_len] = '/';
+		}
+	}
+	return n;
+}
+
+struct object_list **add_object(struct object *obj,
+				       struct object_list **p,
+				       struct name_path *path,
+				       const char *name)
+{
+	struct object_list *entry = xmalloc(sizeof(*entry));
+	entry->item = obj;
+	entry->next = *p;
+	entry->name = path_name(path, name);
+	*p = entry;
+	return &entry->next;
+}
+
+static void mark_blob_uninteresting(struct blob *blob)
+{
+	if (blob->object.flags & UNINTERESTING)
+		return;
+	blob->object.flags |= UNINTERESTING;
+}
+
+void mark_tree_uninteresting(struct tree *tree)
+{
+	struct object *obj = &tree->object;
+	struct tree_entry_list *entry;
+
+	if (obj->flags & UNINTERESTING)
+		return;
+	obj->flags |= UNINTERESTING;
+	if (!has_sha1_file(obj->sha1))
+		return;
+	if (parse_tree(tree) < 0)
+		die("bad tree %s", sha1_to_hex(obj->sha1));
+	entry = tree->entries;
+	tree->entries = NULL;
+	while (entry) {
+		struct tree_entry_list *next = entry->next;
+		if (entry->directory)
+			mark_tree_uninteresting(entry->item.tree);
+		else
+			mark_blob_uninteresting(entry->item.blob);
+		free(entry);
+		entry = next;
+	}
+}
+
+void mark_parents_uninteresting(struct commit *commit)
+{
+	struct commit_list *parents = commit->parents;
+
+	while (parents) {
+		struct commit *commit = parents->item;
+		commit->object.flags |= UNINTERESTING;
+
+		/*
+		 * Normally we haven't parsed the parent
+		 * yet, so we won't have a parent of a parent
+		 * here. However, it may turn out that we've
+		 * reached this commit some other way (where it
+		 * wasn't uninteresting), in which case we need
+		 * to mark its parents recursively too..
+		 */
+		if (commit->parents)
+			mark_parents_uninteresting(commit);
+
+		/*
+		 * A missing commit is ok iff its parent is marked
+		 * uninteresting.
+		 *
+		 * We just mark such a thing parsed, so that when
+		 * it is popped next time around, we won't be trying
+		 * to parse it and get an error.
+		 */
+		if (!has_sha1_file(commit->object.sha1))
+			commit->object.parsed = 1;
+		parents = parents->next;
+	}
+}
+
+static void add_pending_object(struct rev_info *revs, struct object *obj, const char *name)
+{
+	add_object(obj, &revs->pending_objects, NULL, name);
+}
+
+static struct commit *get_commit_reference(struct rev_info *revs, const char *name, const unsigned char *sha1, unsigned int flags)
+{
+	struct object *object;
+
+	object = parse_object(sha1);
+	if (!object)
+		die("bad object %s", name);
+
+	/*
+	 * Tag object? Look what it points to..
+	 */
+	while (object->type == tag_type) {
+		struct tag *tag = (struct tag *) object;
+		object->flags |= flags;
+		if (revs->tag_objects && !(object->flags & UNINTERESTING))
+			add_pending_object(revs, object, tag->tag);
+		object = parse_object(tag->tagged->sha1);
+		if (!object)
+			die("bad object %s", sha1_to_hex(tag->tagged->sha1));
+	}
+
+	/*
+	 * Commit object? Just return it, we'll do all the complex
+	 * reachability crud.
+	 */
+	if (object->type == commit_type) {
+		struct commit *commit = (struct commit *)object;
+		object->flags |= flags;
+		if (parse_commit(commit) < 0)
+			die("unable to parse commit %s", name);
+		if (flags & UNINTERESTING)
+			mark_parents_uninteresting(commit);
+		return commit;
+	}
+
+	/*
+	 * Tree object? Either mark it uniniteresting, or add it
+	 * to the list of objects to look at later..
+	 */
+	if (object->type == tree_type) {
+		struct tree *tree = (struct tree *)object;
+		if (!revs->tree_objects)
+			return NULL;
+		if (flags & UNINTERESTING) {
+			mark_tree_uninteresting(tree);
+			return NULL;
+		}
+		add_pending_object(revs, object, "");
+		return NULL;
+	}
+
+	/*
+	 * Blob object? You know the drill by now..
+	 */
+	if (object->type == blob_type) {
+		struct blob *blob = (struct blob *)object;
+		if (!revs->blob_objects)
+			return NULL;
+		if (flags & UNINTERESTING) {
+			mark_blob_uninteresting(blob);
+			return NULL;
+		}
+		add_pending_object(revs, object, "");
+		return NULL;
+	}
+	die("%s is unknown object", name);
+}
+
+static void add_one_commit(struct commit *commit, struct rev_info *revs)
+{
+	if (!commit || (commit->object.flags & SEEN))
+		return;
+	commit->object.flags |= SEEN;
+	commit_list_insert(commit, &revs->commits);
+}
+
+static int all_flags;
+static struct rev_info *all_revs;
+
+static int handle_one_ref(const char *path, const unsigned char *sha1)
+{
+	struct commit *commit = get_commit_reference(all_revs, path, sha1, all_flags);
+	add_one_commit(commit, all_revs);
+	return 0;
+}
+
+static void handle_all(struct rev_info *revs, unsigned flags)
+{
+	all_revs = revs;
+	all_flags = flags;
+	for_each_ref(handle_one_ref);
+}
+
+/*
+ * Parse revision information, filling in the "rev_info" structure,
+ * and removing the used arguments from the argument list.
+ *
+ * Returns the number of arguments left ("new argc").
+ */
+int setup_revisions(int argc, const char **argv, struct rev_info *revs)
+{
+	int i, flags, seen_dashdash;
+	const char *def = NULL;
+	const char **unrecognized = argv+1;
+	int left = 1;
+
+	memset(revs, 0, sizeof(*revs));
+	revs->lifo = 1;
+	revs->dense = 1;
+	revs->prefix = setup_git_directory();
+	revs->max_age = -1;
+	revs->min_age = -1;
+	revs->max_count = -1;
+
+	/* First, search for "--" */
+	seen_dashdash = 0;
+	for (i = 1; i < argc; i++) {
+		const char *arg = argv[i];
+		if (strcmp(arg, "--"))
+			continue;
+		argv[i] = NULL;
+		argc = i;
+		revs->paths = get_pathspec(revs->prefix, argv + i + 1);
+		seen_dashdash = 1;
+		break;
+	}
+
+	flags = 0;
+	for (i = 1; i < argc; i++) {
+		struct commit *commit;
+		const char *arg = argv[i];
+		unsigned char sha1[20];
+		char *dotdot;
+		int local_flags;
+
+		if (*arg == '-') {
+			if (!strncmp(arg, "--max-count=", 12)) {
+				revs->max_count = atoi(arg + 12);
+				continue;
+			}
+			if (!strncmp(arg, "--max-age=", 10)) {
+				revs->max_age = atoi(arg + 10);
+				continue;
+			}
+			if (!strncmp(arg, "--min-age=", 10)) {
+				revs->min_age = atoi(arg + 10);
+				continue;
+			}
+			if (!strcmp(arg, "--all")) {
+				handle_all(revs, flags);
+				continue;
+			}
+			if (!strcmp(arg, "--not")) {
+				flags ^= UNINTERESTING;
+				continue;
+			}
+			if (!strcmp(arg, "--default")) {
+				if (++i >= argc)
+					die("bad --default argument");
+				def = argv[i];
+				continue;
+			}
+			if (!strcmp(arg, "--topo-order")) {
+				revs->topo_order = 1;
+				continue;
+			}
+			if (!strcmp(arg, "--date-order")) {
+				revs->lifo = 0;
+				revs->topo_order = 1;
+				continue;
+			}
+			if (!strcmp(arg, "--dense")) {
+				revs->dense = 1;
+				continue;
+			}
+			if (!strcmp(arg, "--sparse")) {
+				revs->dense = 0;
+				continue;
+			}
+			if (!strcmp(arg, "--remove-empty")) {
+				revs->remove_empty_trees = 1;
+				continue;
+			}
+			if (!strcmp(arg, "--objects")) {
+				revs->tag_objects = 1;
+				revs->tree_objects = 1;
+				revs->blob_objects = 1;
+				continue;
+			}
+			if (!strcmp(arg, "--objects-edge")) {
+				revs->tag_objects = 1;
+				revs->tree_objects = 1;
+				revs->blob_objects = 1;
+				revs->edge_hint = 1;
+				continue;
+			}
+			*unrecognized++ = arg;
+			left++;
+			continue;
+		}
+		dotdot = strstr(arg, "..");
+		if (dotdot) {
+			unsigned char from_sha1[20];
+			char *next = dotdot + 2;
+			*dotdot = 0;
+			if (!*next)
+				next = "HEAD";
+			if (!get_sha1(arg, from_sha1) && !get_sha1(next, sha1)) {
+				struct commit *exclude;
+				struct commit *include;
+
+				exclude = get_commit_reference(revs, arg, from_sha1, flags ^ UNINTERESTING);
+				include = get_commit_reference(revs, next, sha1, flags);
+				if (!exclude || !include)
+					die("Invalid revision range %s..%s", arg, next);
+				add_one_commit(exclude, revs);
+				add_one_commit(include, revs);
+				continue;
+			}
+			*dotdot = '.';
+		}
+		local_flags = 0;
+		if (*arg == '^') {
+			local_flags = UNINTERESTING;
+			arg++;
+		}
+		if (get_sha1(arg, sha1) < 0) {
+			struct stat st;
+			int j;
+
+			if (seen_dashdash || local_flags)
+				die("bad revision '%s'", arg);
+
+			/* If we didn't have a "--", all filenames must exist */
+			for (j = i; j < argc; j++) {
+				if (lstat(argv[j], &st) < 0)
+					die("'%s': %s", arg, strerror(errno));
+			}
+			revs->paths = get_pathspec(revs->prefix, argv + i);
+			break;
+		}
+		commit = get_commit_reference(revs, arg, sha1, flags ^ local_flags);
+		add_one_commit(commit, revs);
+	}
+	if (def && !revs->commits) {
+		unsigned char sha1[20];
+		struct commit *commit;
+		if (get_sha1(def, sha1) < 0)
+			die("bad default revision '%s'", def);
+		commit = get_commit_reference(revs, def, sha1, 0);
+		add_one_commit(commit, revs);
+	}
+	*unrecognized = NULL;
+	return left;
+}
diff --git a/revision.h b/revision.h
new file mode 100644
index 0000000..5170ac4
--- /dev/null
+++ b/revision.h
@@ -0,0 +1,48 @@
+#ifndef REVISION_H
+#define REVISION_H
+
+#define SEEN		(1u<<0)
+#define UNINTERESTING   (1u<<1)
+
+struct rev_info {
+	/* Starting list */
+	struct commit_list *commits;
+	struct object_list *pending_objects;
+
+	/* Basic information */
+	const char *prefix;
+	const char **paths;
+
+	/* Traversal flags */
+	unsigned int	dense:1,
+			remove_empty_trees:1,
+			lifo:1,
+			topo_order:1,
+			tag_objects:1,
+			tree_objects:1,
+			blob_objects:1,
+			edge_hint:1;
+
+	/* special limits */
+	int max_count;
+	unsigned long max_age;
+	unsigned long min_age;
+};
+
+/* revision.c */
+extern int setup_revisions(int argc, const char **argv, struct rev_info *revs);
+extern void mark_parents_uninteresting(struct commit *commit);
+extern void mark_tree_uninteresting(struct tree *tree);
+
+struct name_path {
+	struct name_path *up;
+	int elem_len;
+	const char *elem;
+};
+
+extern struct object_list **add_object(struct object *obj,
+				       struct object_list **p,
+				       struct name_path *path,
+				       const char *name);
+
+#endif
-- 
cgit v0.10.2-6-g49f6


From d9cfb964c7a59a39711da12e56563e10aa388331 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sun, 26 Feb 2006 21:19:14 -0800
Subject: rev-list split: minimum fixup.

This fixes "the other end has commit X but since then we tagged
that commit with tag T, and he says he wants T -- what is the
list of objects we need to send him?" question:

	git-rev-list --objects ^X T

We ended up sending everything since the beginning of time X-<.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/rev-list.c b/rev-list.c
index d1c52a6..630626e 100644
--- a/rev-list.c
+++ b/rev-list.c
@@ -214,8 +214,6 @@ static struct object_list **process_tree(struct tree *tree,
 	return p;
 }
 
-static struct object_list *pending_objects = NULL;
-
 static void show_commit_list(struct commit_list *list)
 {
 	struct object_list *objects = NULL, **p = &objects, *pending;
@@ -226,7 +224,7 @@ static void show_commit_list(struct commit_list *list)
 		if (process_commit(commit) == STOP)
 			break;
 	}
-	for (pending = pending_objects; pending; pending = pending->next) {
+	for (pending = revs.pending_objects; pending; pending = pending->next) {
 		struct object *obj = pending->item;
 		const char *name = pending->name;
 		if (obj->flags & (UNINTERESTING | SEEN))
@@ -675,7 +673,7 @@ int main(int argc, const char **argv)
 	}
 
 	list = revs.commits;
-	if (list && list->next)
+	if (list)
 		limited = 1;
 
 	if (revs.topo_order)
@@ -689,7 +687,7 @@ int main(int argc, const char **argv)
 		limited = 1;
 		diff_tree_setup_paths(revs.paths);
 	}
-	if (revs.max_age || revs.min_age)
+	if (revs.max_age != -1 || revs.min_age != -1)
 		limited = 1;
 
 	save_commit_buffer = verbose_header;
-- 
cgit v0.10.2-6-g49f6


From d9a83684c473e04c61f0060c4926d20f8183f7b6 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@osdl.org>
Date: Mon, 27 Feb 2006 08:54:36 -0800
Subject: Splitting rev-list into revisions lib, end of beginning.

This makes the rewrite easier to validate in that revision flag
parsing and warlking part are now all in rev_info structure.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/rev-list.c b/rev-list.c
index 630626e..2e80930 100644
--- a/rev-list.c
+++ b/rev-list.c
@@ -41,7 +41,6 @@ static const char rev_list_usage[] =
 
 struct rev_info revs;
 
-static int unpacked = 0;
 static int bisect_list = 0;
 static int verbose_header = 0;
 static int abbrev = DEFAULT_ABBREV;
@@ -572,7 +571,7 @@ static struct commit_list *limit_list(struct commit_list *list)
 
 		if (revs.max_age != -1 && (commit->date < revs.max_age))
 			obj->flags |= UNINTERESTING;
-		if (unpacked && has_sha1_pack(obj->sha1))
+		if (revs.unpacked && has_sha1_pack(obj->sha1))
 			obj->flags |= UNINTERESTING;
 		add_parents_to_list(commit, &list);
 		if (obj->flags & UNINTERESTING) {
@@ -595,7 +594,7 @@ static struct commit_list *limit_list(struct commit_list *list)
 int main(int argc, const char **argv)
 {
 	struct commit_list *list;
-	int i, limited = 0;
+	int i;
 
 	argc = setup_revisions(argc, argv, &revs);
 
@@ -655,11 +654,6 @@ int main(int argc, const char **argv)
 			bisect_list = 1;
 			continue;
 		}
-		if (!strcmp(arg, "--unpacked")) {
-			unpacked = 1;
-			limited = 1;
-			continue;
-		}
 		if (!strcmp(arg, "--merge-order")) {
 		        merge_order = 1;
 			continue;
@@ -673,34 +667,25 @@ int main(int argc, const char **argv)
 	}
 
 	list = revs.commits;
-	if (list)
-		limited = 1;
-
-	if (revs.topo_order)
-		limited = 1;
 
 	if (!list &&
 	    (!(revs.tag_objects||revs.tree_objects||revs.blob_objects) && !revs.pending_objects))
 		usage(rev_list_usage);
 
-	if (revs.paths) {
-		limited = 1;
+	if (revs.paths)
 		diff_tree_setup_paths(revs.paths);
-	}
-	if (revs.max_age != -1 || revs.min_age != -1)
-		limited = 1;
 
 	save_commit_buffer = verbose_header;
 	track_object_refs = 0;
 
 	if (!merge_order) {		
 		sort_by_date(&list);
-		if (list && !limited && revs.max_count == 1 &&
+		if (list && !revs.limited && revs.max_count == 1 &&
 		    !revs.tag_objects && !revs.tree_objects && !revs.blob_objects) {
 			show_commit(list->item);
 			return 0;
 		}
-	        if (limited)
+	        if (revs.limited)
 			list = limit_list(list);
 		if (revs.topo_order)
 			sort_in_topological_order(&list, revs.lifo);
diff --git a/revision.c b/revision.c
index d61410b..67ff4de 100644
--- a/revision.c
+++ b/revision.c
@@ -143,8 +143,10 @@ static struct commit *get_commit_reference(struct rev_info *revs, const char *na
 		object->flags |= flags;
 		if (parse_commit(commit) < 0)
 			die("unable to parse commit %s", name);
-		if (flags & UNINTERESTING)
+		if (flags & UNINTERESTING) {
 			mark_parents_uninteresting(commit);
+			revs->limited = 1;
+		}
 		return commit;
 	}
 
@@ -255,10 +257,12 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs)
 			}
 			if (!strncmp(arg, "--max-age=", 10)) {
 				revs->max_age = atoi(arg + 10);
+				revs->limited = 1;
 				continue;
 			}
 			if (!strncmp(arg, "--min-age=", 10)) {
 				revs->min_age = atoi(arg + 10);
+				revs->limited = 1;
 				continue;
 			}
 			if (!strcmp(arg, "--all")) {
@@ -277,11 +281,13 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs)
 			}
 			if (!strcmp(arg, "--topo-order")) {
 				revs->topo_order = 1;
+				revs->limited = 1;
 				continue;
 			}
 			if (!strcmp(arg, "--date-order")) {
 				revs->lifo = 0;
 				revs->topo_order = 1;
+				revs->limited = 1;
 				continue;
 			}
 			if (!strcmp(arg, "--dense")) {
@@ -309,6 +315,11 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs)
 				revs->edge_hint = 1;
 				continue;
 			}
+			if (!strcmp(arg, "--unpacked")) {
+				revs->unpacked = 1;
+				revs->limited = 1;
+				continue;
+			}
 			*unrecognized++ = arg;
 			left++;
 			continue;
@@ -365,6 +376,8 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs)
 		commit = get_commit_reference(revs, def, sha1, 0);
 		add_one_commit(commit, revs);
 	}
+	if (revs->paths)
+		revs->limited = 1;
 	*unrecognized = NULL;
 	return left;
 }
diff --git a/revision.h b/revision.h
index 5170ac4..a22f198 100644
--- a/revision.h
+++ b/revision.h
@@ -21,7 +21,9 @@ struct rev_info {
 			tag_objects:1,
 			tree_objects:1,
 			blob_objects:1,
-			edge_hint:1;
+			edge_hint:1,
+			limited:1,
+			unpacked:1;
 
 	/* special limits */
 	int max_count;
-- 
cgit v0.10.2-6-g49f6


From a4a88b2bab3b6fb0b30f63418701f42388e0fe0a Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@osdl.org>
Date: Tue, 28 Feb 2006 11:24:00 -0800
Subject: git-rev-list libification: rev-list walking

This actually moves the "meat" of the revision walking from rev-list.c
to the new library code in revision.h. It introduces the new functions

	void prepare_revision_walk(struct rev_info *revs);
	struct commit *get_revision(struct rev_info *revs);

to prepare and then walk the revisions that we have.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/rev-list.c b/rev-list.c
index 2e80930..94f22dd 100644
--- a/rev-list.c
+++ b/rev-list.c
@@ -8,11 +8,10 @@
 #include "diff.h"
 #include "revision.h"
 
-/* bits #0 and #1 in revision.h */
+/* bits #0-2 in revision.h */
 
-#define COUNTED		(1u << 2)
-#define SHOWN		(1u << 3)
-#define TREECHANGE	(1u << 4)
+#define COUNTED		(1u << 3)
+#define SHOWN		(1u << 4)
 #define TMP_MARK	(1u << 5) /* for isolated cases; clean after use */
 
 static const char rev_list_usage[] =
@@ -213,17 +212,17 @@ static struct object_list **process_tree(struct tree *tree,
 	return p;
 }
 
-static void show_commit_list(struct commit_list *list)
+static void show_commit_list(struct rev_info *revs)
 {
+	struct commit *commit;
 	struct object_list *objects = NULL, **p = &objects, *pending;
-	while (list) {
-		struct commit *commit = pop_most_recent_commit(&list, SEEN);
 
+	while ((commit = get_revision(revs)) != NULL) {
 		p = process_tree(commit->tree, p, NULL, "");
 		if (process_commit(commit) == STOP)
 			break;
 	}
-	for (pending = revs.pending_objects; pending; pending = pending->next) {
+	for (pending = revs->pending_objects; pending; pending = pending->next) {
 		struct object *obj = pending->item;
 		const char *name = pending->name;
 		if (obj->flags & (UNINTERESTING | SEEN))
@@ -259,19 +258,6 @@ static void show_commit_list(struct commit_list *list)
 	}
 }
 
-static int everybody_uninteresting(struct commit_list *orig)
-{
-	struct commit_list *list = orig;
-	while (list) {
-		struct commit *commit = list->item;
-		list = list->next;
-		if (commit->object.flags & UNINTERESTING)
-			continue;
-		return 0;
-	}
-	return 1;
-}
-
 /*
  * This is a truly stupid algorithm, but it's only
  * used for bisection, and we just don't care enough.
@@ -379,224 +365,12 @@ static void mark_edges_uninteresting(struct commit_list *list)
 	}
 }
 
-#define TREE_SAME	0
-#define TREE_NEW	1
-#define TREE_DIFFERENT	2
-static int tree_difference = TREE_SAME;
-
-static void file_add_remove(struct diff_options *options,
-		    int addremove, unsigned mode,
-		    const unsigned char *sha1,
-		    const char *base, const char *path)
-{
-	int diff = TREE_DIFFERENT;
-
-	/*
-	 * Is it an add of a new file? It means that
-	 * the old tree didn't have it at all, so we
-	 * will turn "TREE_SAME" -> "TREE_NEW", but
-	 * leave any "TREE_DIFFERENT" alone (and if
-	 * it already was "TREE_NEW", we'll keep it
-	 * "TREE_NEW" of course).
-	 */
-	if (addremove == '+') {
-		diff = tree_difference;
-		if (diff != TREE_SAME)
-			return;
-		diff = TREE_NEW;
-	}
-	tree_difference = diff;
-}
-
-static void file_change(struct diff_options *options,
-		 unsigned old_mode, unsigned new_mode,
-		 const unsigned char *old_sha1,
-		 const unsigned char *new_sha1,
-		 const char *base, const char *path)
-{
-	tree_difference = TREE_DIFFERENT;
-}
-
-static struct diff_options diff_opt = {
-	.recursive = 1,
-	.add_remove = file_add_remove,
-	.change = file_change,
-};
-
-static int compare_tree(struct tree *t1, struct tree *t2)
-{
-	if (!t1)
-		return TREE_NEW;
-	if (!t2)
-		return TREE_DIFFERENT;
-	tree_difference = TREE_SAME;
-	if (diff_tree_sha1(t1->object.sha1, t2->object.sha1, "", &diff_opt) < 0)
-		return TREE_DIFFERENT;
-	return tree_difference;
-}
-
-static int same_tree_as_empty(struct tree *t1)
-{
-	int retval;
-	void *tree;
-	struct tree_desc empty, real;
-
-	if (!t1)
-		return 0;
-
-	tree = read_object_with_reference(t1->object.sha1, "tree", &real.size, NULL);
-	if (!tree)
-		return 0;
-	real.buf = tree;
-
-	empty.buf = "";
-	empty.size = 0;
-
-	tree_difference = 0;
-	retval = diff_tree(&empty, &real, "", &diff_opt);
-	free(tree);
-
-	return retval >= 0 && !tree_difference;
-}
-
-static void try_to_simplify_commit(struct commit *commit)
-{
-	struct commit_list **pp, *parent;
-
-	if (!commit->tree)
-		return;
-
-	if (!commit->parents) {
-		if (!same_tree_as_empty(commit->tree))
-			commit->object.flags |= TREECHANGE;
-		return;
-	}
-
-	pp = &commit->parents;
-	while ((parent = *pp) != NULL) {
-		struct commit *p = parent->item;
-
-		if (p->object.flags & UNINTERESTING) {
-			pp = &parent->next;
-			continue;
-		}
-
-		parse_commit(p);
-		switch (compare_tree(p->tree, commit->tree)) {
-		case TREE_SAME:
-			parent->next = NULL;
-			commit->parents = parent;
-			return;
-
-		case TREE_NEW:
-			if (revs.remove_empty_trees && same_tree_as_empty(p->tree)) {
-				*pp = parent->next;
-				continue;
-			}
-		/* fallthrough */
-		case TREE_DIFFERENT:
-			pp = &parent->next;
-			continue;
-		}
-		die("bad tree compare for commit %s", sha1_to_hex(commit->object.sha1));
-	}
-	commit->object.flags |= TREECHANGE;
-}
-
-static void add_parents_to_list(struct commit *commit, struct commit_list **list)
-{
-	struct commit_list *parent = commit->parents;
-
-	/*
-	 * If the commit is uninteresting, don't try to
-	 * prune parents - we want the maximal uninteresting
-	 * set.
-	 *
-	 * Normally we haven't parsed the parent
-	 * yet, so we won't have a parent of a parent
-	 * here. However, it may turn out that we've
-	 * reached this commit some other way (where it
-	 * wasn't uninteresting), in which case we need
-	 * to mark its parents recursively too..
-	 */
-	if (commit->object.flags & UNINTERESTING) {
-		while (parent) {
-			struct commit *p = parent->item;
-			parent = parent->next;
-			parse_commit(p);
-			p->object.flags |= UNINTERESTING;
-			if (p->parents)
-				mark_parents_uninteresting(p);
-			if (p->object.flags & SEEN)
-				continue;
-			p->object.flags |= SEEN;
-			insert_by_date(p, list);
-		}
-		return;
-	}
-
-	/*
-	 * Ok, the commit wasn't uninteresting. Try to
-	 * simplify the commit history and find the parent
-	 * that has no differences in the path set if one exists.
-	 */
-	if (revs.paths)
-		try_to_simplify_commit(commit);
-
-	parent = commit->parents;
-	while (parent) {
-		struct commit *p = parent->item;
-
-		parent = parent->next;
-
-		parse_commit(p);
-		if (p->object.flags & SEEN)
-			continue;
-		p->object.flags |= SEEN;
-		insert_by_date(p, list);
-	}
-}
-
-static struct commit_list *limit_list(struct commit_list *list)
-{
-	struct commit_list *newlist = NULL;
-	struct commit_list **p = &newlist;
-	while (list) {
-		struct commit_list *entry = list;
-		struct commit *commit = list->item;
-		struct object *obj = &commit->object;
-
-		list = list->next;
-		free(entry);
-
-		if (revs.max_age != -1 && (commit->date < revs.max_age))
-			obj->flags |= UNINTERESTING;
-		if (revs.unpacked && has_sha1_pack(obj->sha1))
-			obj->flags |= UNINTERESTING;
-		add_parents_to_list(commit, &list);
-		if (obj->flags & UNINTERESTING) {
-			mark_parents_uninteresting(commit);
-			if (everybody_uninteresting(list))
-				break;
-			continue;
-		}
-		if (revs.min_age != -1 && (commit->date > revs.min_age))
-			continue;
-		p = &commit_list_insert(commit, p)->next;
-	}
-	if (revs.tree_objects)
-		mark_edges_uninteresting(newlist);
-	if (bisect_list)
-		newlist = find_bisection(newlist);
-	return newlist;
-}
-
 int main(int argc, const char **argv)
 {
 	struct commit_list *list;
 	int i;
 
-	argc = setup_revisions(argc, argv, &revs);
+	argc = setup_revisions(argc, argv, &revs, NULL);
 
 	for (i = 1 ; i < argc; i++) {
 		const char *arg = argv[i];
@@ -672,24 +446,18 @@ int main(int argc, const char **argv)
 	    (!(revs.tag_objects||revs.tree_objects||revs.blob_objects) && !revs.pending_objects))
 		usage(rev_list_usage);
 
-	if (revs.paths)
-		diff_tree_setup_paths(revs.paths);
+	prepare_revision_walk(&revs);
+	if (revs.tree_objects)
+		mark_edges_uninteresting(revs.commits);
+
+	if (bisect_list)
+		revs.commits = find_bisection(revs.commits);
 
 	save_commit_buffer = verbose_header;
 	track_object_refs = 0;
 
-	if (!merge_order) {		
-		sort_by_date(&list);
-		if (list && !revs.limited && revs.max_count == 1 &&
-		    !revs.tag_objects && !revs.tree_objects && !revs.blob_objects) {
-			show_commit(list->item);
-			return 0;
-		}
-	        if (revs.limited)
-			list = limit_list(list);
-		if (revs.topo_order)
-			sort_in_topological_order(&list, revs.lifo);
-		show_commit_list(list);
+	if (!merge_order) {
+		show_commit_list(&revs);
 	} else {
 #ifndef NO_OPENSSL
 		if (sort_list_in_merge_order(list, &process_commit)) {
diff --git a/revision.c b/revision.c
index 67ff4de..f1ac62d 100644
--- a/revision.c
+++ b/revision.c
@@ -3,6 +3,7 @@
 #include "blob.h"
 #include "tree.h"
 #include "commit.h"
+#include "diff.h"
 #include "refs.h"
 #include "revision.h"
 
@@ -183,6 +184,229 @@ static struct commit *get_commit_reference(struct rev_info *revs, const char *na
 	die("%s is unknown object", name);
 }
 
+static int everybody_uninteresting(struct commit_list *orig)
+{
+	struct commit_list *list = orig;
+	while (list) {
+		struct commit *commit = list->item;
+		list = list->next;
+		if (commit->object.flags & UNINTERESTING)
+			continue;
+		return 0;
+	}
+	return 1;
+}
+
+#define TREE_SAME	0
+#define TREE_NEW	1
+#define TREE_DIFFERENT	2
+static int tree_difference = TREE_SAME;
+
+static void file_add_remove(struct diff_options *options,
+		    int addremove, unsigned mode,
+		    const unsigned char *sha1,
+		    const char *base, const char *path)
+{
+	int diff = TREE_DIFFERENT;
+
+	/*
+	 * Is it an add of a new file? It means that
+	 * the old tree didn't have it at all, so we
+	 * will turn "TREE_SAME" -> "TREE_NEW", but
+	 * leave any "TREE_DIFFERENT" alone (and if
+	 * it already was "TREE_NEW", we'll keep it
+	 * "TREE_NEW" of course).
+	 */
+	if (addremove == '+') {
+		diff = tree_difference;
+		if (diff != TREE_SAME)
+			return;
+		diff = TREE_NEW;
+	}
+	tree_difference = diff;
+}
+
+static void file_change(struct diff_options *options,
+		 unsigned old_mode, unsigned new_mode,
+		 const unsigned char *old_sha1,
+		 const unsigned char *new_sha1,
+		 const char *base, const char *path)
+{
+	tree_difference = TREE_DIFFERENT;
+}
+
+static struct diff_options diff_opt = {
+	.recursive = 1,
+	.add_remove = file_add_remove,
+	.change = file_change,
+};
+
+static int compare_tree(struct tree *t1, struct tree *t2)
+{
+	if (!t1)
+		return TREE_NEW;
+	if (!t2)
+		return TREE_DIFFERENT;
+	tree_difference = TREE_SAME;
+	if (diff_tree_sha1(t1->object.sha1, t2->object.sha1, "", &diff_opt) < 0)
+		return TREE_DIFFERENT;
+	return tree_difference;
+}
+
+static int same_tree_as_empty(struct tree *t1)
+{
+	int retval;
+	void *tree;
+	struct tree_desc empty, real;
+
+	if (!t1)
+		return 0;
+
+	tree = read_object_with_reference(t1->object.sha1, "tree", &real.size, NULL);
+	if (!tree)
+		return 0;
+	real.buf = tree;
+
+	empty.buf = "";
+	empty.size = 0;
+
+	tree_difference = 0;
+	retval = diff_tree(&empty, &real, "", &diff_opt);
+	free(tree);
+
+	return retval >= 0 && !tree_difference;
+}
+
+static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
+{
+	struct commit_list **pp, *parent;
+
+	if (!commit->tree)
+		return;
+
+	if (!commit->parents) {
+		if (!same_tree_as_empty(commit->tree))
+			commit->object.flags |= TREECHANGE;
+		return;
+	}
+
+	pp = &commit->parents;
+	while ((parent = *pp) != NULL) {
+		struct commit *p = parent->item;
+
+		if (p->object.flags & UNINTERESTING) {
+			pp = &parent->next;
+			continue;
+		}
+
+		parse_commit(p);
+		switch (compare_tree(p->tree, commit->tree)) {
+		case TREE_SAME:
+			parent->next = NULL;
+			commit->parents = parent;
+			return;
+
+		case TREE_NEW:
+			if (revs->remove_empty_trees && same_tree_as_empty(p->tree)) {
+				*pp = parent->next;
+				continue;
+			}
+		/* fallthrough */
+		case TREE_DIFFERENT:
+			pp = &parent->next;
+			continue;
+		}
+		die("bad tree compare for commit %s", sha1_to_hex(commit->object.sha1));
+	}
+	commit->object.flags |= TREECHANGE;
+}
+
+static void add_parents_to_list(struct rev_info *revs, struct commit *commit, struct commit_list **list)
+{
+	struct commit_list *parent = commit->parents;
+
+	/*
+	 * If the commit is uninteresting, don't try to
+	 * prune parents - we want the maximal uninteresting
+	 * set.
+	 *
+	 * Normally we haven't parsed the parent
+	 * yet, so we won't have a parent of a parent
+	 * here. However, it may turn out that we've
+	 * reached this commit some other way (where it
+	 * wasn't uninteresting), in which case we need
+	 * to mark its parents recursively too..
+	 */
+	if (commit->object.flags & UNINTERESTING) {
+		while (parent) {
+			struct commit *p = parent->item;
+			parent = parent->next;
+			parse_commit(p);
+			p->object.flags |= UNINTERESTING;
+			if (p->parents)
+				mark_parents_uninteresting(p);
+			if (p->object.flags & SEEN)
+				continue;
+			p->object.flags |= SEEN;
+			insert_by_date(p, list);
+		}
+		return;
+	}
+
+	/*
+	 * Ok, the commit wasn't uninteresting. Try to
+	 * simplify the commit history and find the parent
+	 * that has no differences in the path set if one exists.
+	 */
+	if (revs->paths)
+		try_to_simplify_commit(revs, commit);
+
+	parent = commit->parents;
+	while (parent) {
+		struct commit *p = parent->item;
+
+		parent = parent->next;
+
+		parse_commit(p);
+		if (p->object.flags & SEEN)
+			continue;
+		p->object.flags |= SEEN;
+		insert_by_date(p, list);
+	}
+}
+
+static void limit_list(struct rev_info *revs)
+{
+	struct commit_list *list = revs->commits;
+	struct commit_list *newlist = NULL;
+	struct commit_list **p = &newlist;
+
+	while (list) {
+		struct commit_list *entry = list;
+		struct commit *commit = list->item;
+		struct object *obj = &commit->object;
+
+		list = list->next;
+		free(entry);
+
+		if (revs->max_age != -1 && (commit->date < revs->max_age))
+			obj->flags |= UNINTERESTING;
+		if (revs->unpacked && has_sha1_pack(obj->sha1))
+			obj->flags |= UNINTERESTING;
+		add_parents_to_list(revs, commit, &list);
+		if (obj->flags & UNINTERESTING) {
+			mark_parents_uninteresting(commit);
+			if (everybody_uninteresting(list))
+				break;
+			continue;
+		}
+		if (revs->min_age != -1 && (commit->date > revs->min_age))
+			continue;
+		p = &commit_list_insert(commit, p)->next;
+	}
+	revs->commits = newlist;
+}
+
 static void add_one_commit(struct commit *commit, struct rev_info *revs)
 {
 	if (!commit || (commit->object.flags & SEEN))
@@ -214,10 +438,9 @@ static void handle_all(struct rev_info *revs, unsigned flags)
  *
  * Returns the number of arguments left ("new argc").
  */
-int setup_revisions(int argc, const char **argv, struct rev_info *revs)
+int setup_revisions(int argc, const char **argv, struct rev_info *revs, const char *def)
 {
 	int i, flags, seen_dashdash;
-	const char *def = NULL;
 	const char **unrecognized = argv+1;
 	int left = 1;
 
@@ -381,3 +604,23 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs)
 	*unrecognized = NULL;
 	return left;
 }
+
+void prepare_revision_walk(struct rev_info *revs)
+{
+	if (revs->paths)
+		diff_tree_setup_paths(revs->paths);
+	sort_by_date(&revs->commits);
+	if (revs->limited)
+		limit_list(revs);
+	if (revs->topo_order)
+		sort_in_topological_order(&revs->commits, revs->lifo);
+}
+
+struct commit *get_revision(struct rev_info *revs)
+{
+	if (!revs->commits)
+		return NULL;
+	return pop_most_recent_commit(&revs->commits, SEEN);
+}
+
+
diff --git a/revision.h b/revision.h
index a22f198..0bed3c0 100644
--- a/revision.h
+++ b/revision.h
@@ -3,6 +3,7 @@
 
 #define SEEN		(1u<<0)
 #define UNINTERESTING   (1u<<1)
+#define TREECHANGE	(1u<<2)
 
 struct rev_info {
 	/* Starting list */
@@ -32,7 +33,10 @@ struct rev_info {
 };
 
 /* revision.c */
-extern int setup_revisions(int argc, const char **argv, struct rev_info *revs);
+extern int setup_revisions(int argc, const char **argv, struct rev_info *revs, const char *def);
+extern void prepare_revision_walk(struct rev_info *revs);
+extern struct commit *get_revision(struct rev_info *revs);
+
 extern void mark_parents_uninteresting(struct commit *commit);
 extern void mark_tree_uninteresting(struct tree *tree);
 
-- 
cgit v0.10.2-6-g49f6


From f67b45f862d17b54037b9b59eefc385624d1fa83 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@osdl.org>
Date: Tue, 28 Feb 2006 11:26:21 -0800
Subject: Introduce trivial new pager.c helper infrastructure

This introduces the new function

	void setup_pager(void);

to set up output to be written through a pager applocation.

All in preparation for doing the simple scripts in C.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/Makefile b/Makefile
index 3575489..0b1a998 100644
--- a/Makefile
+++ b/Makefile
@@ -205,7 +205,7 @@ LIB_OBJS = \
 	quote.o read-cache.o refs.o run-command.o \
 	server-info.o setup.o sha1_file.o sha1_name.o strbuf.o \
 	tag.o tree.o usage.o config.o environment.o ctype.o copy.o \
-	fetch-clone.o revision.o \
+	fetch-clone.o revision.o pager.o \
 	$(DIFF_OBJS)
 
 LIBS = $(LIB_FILE)
diff --git a/cache.h b/cache.h
index 58eec00..3af6b86 100644
--- a/cache.h
+++ b/cache.h
@@ -352,4 +352,7 @@ extern int copy_fd(int ifd, int ofd);
 extern int receive_unpack_pack(int fd[2], const char *me, int quiet);
 extern int receive_keep_pack(int fd[2], const char *me, int quiet);
 
+/* pager.c */
+extern void setup_pager(void);
+
 #endif /* CACHE_H */
diff --git a/pager.c b/pager.c
new file mode 100644
index 0000000..1364e15
--- /dev/null
+++ b/pager.c
@@ -0,0 +1,48 @@
+#include "cache.h"
+
+/*
+ * This is split up from the rest of git so that we might do
+ * something different on Windows, for example.
+ */
+
+static void run_pager(void)
+{
+	const char *prog = getenv("PAGER");
+	if (!prog)
+		prog = "less";
+	setenv("LESS", "-S", 0);
+	execlp(prog, prog, NULL);
+}
+
+void setup_pager(void)
+{
+	pid_t pid;
+	int fd[2];
+
+	if (!isatty(1))
+		return;
+	if (pipe(fd) < 0)
+		return;
+	pid = fork();
+	if (pid < 0) {
+		close(fd[0]);
+		close(fd[1]);
+		return;
+	}
+
+	/* return in the child */
+	if (!pid) {
+		dup2(fd[1], 1);
+		close(fd[0]);
+		close(fd[1]);
+		return;
+	}
+
+	/* The original process turns into the PAGER */
+	dup2(fd[0], 0);
+	close(fd[0]);
+	close(fd[1]);
+
+	run_pager();
+	exit(255);
+}
-- 
cgit v0.10.2-6-g49f6


From 70b006b9712b57741ec1320b15aef2f8b1d6a905 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@osdl.org>
Date: Tue, 28 Feb 2006 11:30:19 -0800
Subject: Tie it all together: "git log"

This is what the previous diffs all built up to.

We can do "git log" as a trivial small helper function inside git.c,
because the infrastructure is all there for us to use as a library.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/Makefile b/Makefile
index 0b1a998..ead13be 100644
--- a/Makefile
+++ b/Makefile
@@ -450,7 +450,7 @@ strip: $(PROGRAMS) git$X
 
 git$X: git.c $(LIB_FILE)
 	$(CC) -DGIT_VERSION='"$(GIT_VERSION)"' \
-		$(CFLAGS) $(COMPAT_CFLAGS) -o $@ $(filter %.c,$^) $(LIB_FILE)
+		$(ALL_CFLAGS) -o $@ $(filter %.c,$^) $(LIB_FILE) $(LIBS)
 
 $(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh
 	rm -f $@
diff --git a/git.c b/git.c
index 993cd0d..b0da6b1 100644
--- a/git.c
+++ b/git.c
@@ -12,6 +12,10 @@
 #include "git-compat-util.h"
 #include "exec_cmd.h"
 
+#include "cache.h"
+#include "commit.h"
+#include "revision.h"
+
 #ifndef PATH_MAX
 # define PATH_MAX 4096
 #endif
@@ -245,6 +249,25 @@ static int cmd_help(int argc, char **argv, char **envp)
 	return 0;
 }
 
+#define LOGSIZE (65536)
+
+static int cmd_log(int argc, char **argv, char **envp)
+{
+	struct rev_info rev;
+	struct commit *commit;
+	char *buf = xmalloc(LOGSIZE);
+
+	argc = setup_revisions(argc, argv, &rev, "HEAD");
+	prepare_revision_walk(&rev);
+	setup_pager();
+	while ((commit = get_revision(&rev)) != NULL) {
+		pretty_print_commit(CMIT_FMT_DEFAULT, commit, ~0, buf, LOGSIZE, 18);
+		printf("%s\n", buf);
+	}
+	free(buf);
+	return 0;
+}
+
 #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
 
 static void handle_internal_command(int argc, char **argv, char **envp)
@@ -256,6 +279,7 @@ static void handle_internal_command(int argc, char **argv, char **envp)
 	} commands[] = {
 		{ "version", cmd_version },
 		{ "help", cmd_help },
+		{ "log", cmd_log },
 	};
 	int i;
 
-- 
cgit v0.10.2-6-g49f6


From 765ac8ec469f110e88376e4fac05d0ed475bcb28 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@osdl.org>
Date: Tue, 28 Feb 2006 15:07:20 -0800
Subject: Rip out merge-order and make "git log <paths>..." work again.

Well, assuming breaking --merge-order is fine, here's a patch (on top of
the other ones) that makes

	git log <filename>

actually work, as far as I can tell.

I didn't add the logic for --before/--after flags, but that should be
pretty trivial, and is independent of this anyway.

Signed-off-by: Junio C Hamano <junkio@cox.net>

diff --git a/Documentation/git-rev-list.txt b/Documentation/git-rev-list.txt
index 1c6146c..5b306d6 100644
--- a/Documentation/git-rev-list.txt
+++ b/Documentation/git-rev-list.txt
@@ -16,7 +16,7 @@ SYNOPSIS
 	     [ \--no-merges ]
 	     [ \--remove-empty ]
 	     [ \--all ]
-	     [ [ \--merge-order [ \--show-breaks ] ] | [ \--topo-order ] ]
+	     [ \--topo-order ]
 	     [ \--parents ]
 	     [ \--objects [ \--unpacked ] ]
 	     [ \--pretty | \--header ]
@@ -94,57 +94,10 @@ OPTIONS
 	topological order (i.e. descendant commits are shown
 	before their parents).
 
---merge-order::
-	When specified the commit history is decomposed into a unique
-	sequence of minimal, non-linear epochs and maximal, linear epochs.
-	Non-linear epochs are then linearised by sorting them into merge
-	order, which is described below.
-+
-Maximal, linear epochs correspond to periods of sequential development.
-Minimal, non-linear epochs correspond to periods of divergent development
-followed by a converging merge. The theory of epochs is described in more
-detail at
-link:http://blackcubes.dyndns.org/epoch/[http://blackcubes.dyndns.org/epoch/].
-+
-The merge order for a non-linear epoch is defined as a linearisation for which
-the following invariants are true:
-+
-    1. if a commit P is reachable from commit N, commit P sorts after commit N
-       in the linearised list.
-    2. if Pi and Pj are any two parents of a merge M (with i < j), then any
-       commit N, such that N is reachable from Pj but not reachable from Pi,
-       sorts before all commits reachable from Pi.
-+
-Invariant 1 states that later commits appear before earlier commits they are
-derived from.
-+
-Invariant 2 states that commits unique to "later" parents in a merge, appear
-before all commits from "earlier" parents of a merge.
-
---show-breaks::
-	Each item of the list is output with a 2-character prefix consisting
-	of one of: (|), (^), (=) followed by a space.
-+
-Commits marked with (=) represent the boundaries of minimal, non-linear epochs
-and correspond either to the start of a period of divergent development or to
-the end of such a period.
-+
-Commits marked with (|) are direct parents of commits immediately preceding
-the marked commit in the list.
-+
-Commits marked with (^) are not parents of the immediately preceding commit.
-These "breaks" represent necessary discontinuities implied by trying to
-represent an arbitrary DAG in a linear form.
-+
-`--show-breaks` is only valid if `--merge-order` is also specified.
-
-
 Author
 ------
 Written by Linus Torvalds <torvalds@osdl.org>
 
-Original *--merge-order* logic by Jon Seymour <jon.seymour@gmail.com>
-
 Documentation
 --------------
 Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>.
diff --git a/INSTALL b/INSTALL
index 433449f..63af8ec 100644
--- a/INSTALL
+++ b/INSTALL
@@ -40,9 +40,7 @@ Issues of note:
 
 	  If you don't have openssl, you can use one of the SHA1 libraries
 	  that come with git (git includes the one from Mozilla, and has
-	  its own PowerPC-optimized one too - see the Makefile), and you
-	  can avoid the bignum support by excising git-rev-list support
-	  for "--merge-order" (by hand).
+	  its own PowerPC and ARM optimized ones too - see the Makefile).
 
 	- "libcurl" and "curl" executable.  git-http-fetch and
 	  git-fetch use them.  If you do not use http
diff --git a/Makefile b/Makefile
index ead13be..bd156d2 100644
--- a/Makefile
+++ b/Makefile
@@ -6,8 +6,8 @@ all:
 # on non-x86 architectures (e.g. PowerPC), while the OpenSSL version (default
 # choice) has very fast version optimized for i586.
 #
-# Define NO_OPENSSL environment variable if you do not have OpenSSL. You will
-# miss out git-rev-list --merge-order. This also implies MOZILLA_SHA1.
+# Define NO_OPENSSL environment variable if you do not have OpenSSL.
+# This also implies MOZILLA_SHA1.
 #
 # Define NO_CURL if you do not have curl installed.  git-http-pull and
 # git-http-push are not built, and you cannot use http:// and https://
@@ -191,7 +191,7 @@ LIB_FILE=libgit.a
 
 LIB_H = \
 	blob.h cache.h commit.h count-delta.h csum-file.h delta.h \
-	diff.h epoch.h object.h pack.h pkt-line.h quote.h refs.h \
+	diff.h object.h pack.h pkt-line.h quote.h refs.h \
 	run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h
 
 DIFF_OBJS = \
@@ -324,7 +324,6 @@ ifndef NO_CURL
 endif
 
 ifndef NO_OPENSSL
-	LIB_OBJS += epoch.o
 	OPENSSL_LIBSSL = -lssl
 	ifdef OPENSSLDIR
 		# Again this may be problematic -- gcc does not always want -R.
diff --git a/epoch.c b/epoch.c
deleted file mode 100644
index 0f37492..0000000
--- a/epoch.c
+++ /dev/null
@@ -1,640 +0,0 @@
-/*
- * Copyright (c) 2005, Jon Seymour
- *
- * For more information about epoch theory on which this module is based,
- * refer to http://blackcubes.dyndns.org/epoch/. That web page defines
- * terms such as "epoch" and "minimal, non-linear epoch" and provides rationales
- * for some of the algorithms used here.
- *
- */
-#include <stdlib.h>
-
-/* Provides arbitrary precision integers required to accurately represent
- * fractional mass: */
-#include <openssl/bn.h>
-
-#include "cache.h"
-#include "commit.h"
-#include "revision.h"
-#include "epoch.h"
-
-struct fraction {
-	BIGNUM numerator;
-	BIGNUM denominator;
-};
-
-#define HAS_EXACTLY_ONE_PARENT(n) ((n)->parents && !(n)->parents->next)
-
-static BN_CTX *context = NULL;
-static struct fraction *one = NULL;
-static struct fraction *zero = NULL;
-
-static BN_CTX *get_BN_CTX(void)
-{
-	if (!context) {
-		context = BN_CTX_new();
-	}
-	return context;
-}
-
-static struct fraction *new_zero(void)
-{
-	struct fraction *result = xmalloc(sizeof(*result));
-	BN_init(&result->numerator);
-	BN_init(&result->denominator);
-	BN_zero(&result->numerator);
-	BN_one(&result->denominator);
-	return result;
-}
-
-static void clear_fraction(struct fraction *fraction)
-{
-	BN_clear(&fraction->numerator);
-	BN_clear(&fraction->denominator);
-}
-
-static struct fraction *divide(struct fraction *result, struct fraction *fraction, int divisor)
-{
-	BIGNUM bn_divisor;
-
-	BN_init(&bn_divisor);
-	BN_set_word(&bn_divisor, divisor);
-
-	BN_copy(&result->numerator, &fraction->numerator);
-	BN_mul(&result->denominator, &fraction->denominator, &bn_divisor, get_BN_CTX());
-
-	BN_clear(&bn_divisor);
-	return result;
-}
-
-static struct fraction *init_fraction(struct fraction *fraction)
-{
-	BN_init(&fraction->numerator);
-	BN_init(&fraction->denominator);
-	BN_zero(&fraction->numerator);
-	BN_one(&fraction->denominator);
-	return fraction;
-}
-
-static struct fraction *get_one(void)
-{
-	if (!one) {
-		one = new_zero();
-		BN_one(&one->numerator);
-	}
-	return one;
-}
-
-static struct fraction *get_zero(void)
-{
-	if (!zero) {
-		zero = new_zero();
-	}
-	return zero;
-}
-
-static struct fraction *copy(struct fraction *to, struct fraction *from)
-{
-	BN_copy(&to->numerator, &from->numerator);
-	BN_copy(&to->denominator, &from->denominator);
-	return to;
-}
-
-static struct fraction *add(struct fraction *result, struct fraction *left, struct fraction *right)
-{
-	BIGNUM a, b, gcd;
-
-	BN_init(&a);
-	BN_init(&b);
-	BN_init(&gcd);
-
-	BN_mul(&a, &left->numerator, &right->denominator, get_BN_CTX());
-	BN_mul(&b, &left->denominator, &right->numerator, get_BN_CTX());
-	BN_mul(&result->denominator, &left->denominator, &right->denominator, get_BN_CTX());
-	BN_add(&result->numerator, &a, &b);
-
-	BN_gcd(&gcd, &result->denominator, &result->numerator, get_BN_CTX());
-	BN_div(&result->denominator, NULL, &result->denominator, &gcd, get_BN_CTX());
-	BN_div(&result->numerator, NULL, &result->numerator, &gcd, get_BN_CTX());
-
-	BN_clear(&a);
-	BN_clear(&b);
-	BN_clear(&gcd);
-
-	return result;
-}
-
-static int compare(struct fraction *left, struct fraction *right)
-{
-	BIGNUM a, b;
-	int result;
-
-	BN_init(&a);
-	BN_init(&b);
-
-	BN_mul(&a, &left->numerator, &right->denominator, get_BN_CTX());
-	BN_mul(&b, &left->denominator, &right->numerator, get_BN_CTX());
-
-	result = BN_cmp(&a, &b);
-
-	BN_clear(&a);
-	BN_clear(&b);
-
-	return result;
-}
-
-struct mass_counter {
-	struct fraction seen;
-	struct fraction pending;
-};
-
-static struct mass_counter *new_mass_counter(struct commit *commit, struct fraction *pending)
-{
-	struct mass_counter *mass_counter = xmalloc(sizeof(*mass_counter));
-	memset(mass_counter, 0, sizeof(*mass_counter));
-
-	init_fraction(&mass_counter->seen);
-	init_fraction(&mass_counter->pending);
-
-	copy(&mass_counter->pending, pending);
-	copy(&mass_counter->seen, get_zero());
-
-	if (commit->object.util) {
-		die("multiple attempts to initialize mass counter for %s",
-		    sha1_to_hex(commit->object.sha1));
-	}
-
-	commit->object.util = mass_counter;
-
-	return mass_counter;
-}
-
-static void free_mass_counter(struct mass_counter *counter)
-{
-	clear_fraction(&counter->seen);
-	clear_fraction(&counter->pending);
-	free(counter);
-}
-
-/*
- * Finds the base commit of a list of commits.
- *
- * One property of the commit being searched for is that every commit reachable
- * from the base commit is reachable from the commits in the starting list only
- * via paths that include the base commit.
- *
- * This algorithm uses a conservation of mass approach to find the base commit.
- *
- * We start by injecting one unit of mass into the graph at each
- * of the commits in the starting list. Injecting mass into a commit
- * is achieved by adding to its pending mass counter and, if it is not already
- * enqueued, enqueuing the commit in a list of pending commits, in latest
- * commit date first order.
- *
- * The algorithm then proceeds to visit each commit in the pending queue.
- * Upon each visit, the pending mass is added to the mass already seen for that
- * commit and then divided into N equal portions, where N is the number of
- * parents of the commit being visited. The divided portions are then injected
- * into each of the parents.
- *
- * The algorithm continues until we discover a commit which has seen all the
- * mass originally injected or until we run out of things to do.
- *
- * If we find a commit that has seen all the original mass, we have found
- * the common base of all the commits in the starting list.
- *
- * The algorithm does _not_ depend on accurate timestamps for correct operation.
- * However, reasonably sane (e.g. non-random) timestamps are required in order
- * to prevent an exponential performance characteristic. The occasional
- * timestamp inaccuracy will not dramatically affect performance but may
- * result in more nodes being processed than strictly necessary.
- *
- * This procedure sets *boundary to the address of the base commit. It returns
- * non-zero if, and only if, there was a problem parsing one of the
- * commits discovered during the traversal.
- */
-static int find_base_for_list(struct commit_list *list, struct commit **boundary)
-{
-	int ret = 0;
-	struct commit_list *cleaner = NULL;
-	struct commit_list *pending = NULL;
-	struct fraction injected;
-	init_fraction(&injected);
-	*boundary = NULL;
-
-	for (; list; list = list->next) {
-		struct commit *item = list->item;
-
-		if (!item->object.util) {
-			new_mass_counter(list->item, get_one());
-			add(&injected, &injected, get_one());
-
-			commit_list_insert(list->item, &cleaner);
-			commit_list_insert(list->item, &pending);
-		}
-	}
-
-	while (!*boundary && pending && !ret) {
-		struct commit *latest = pop_commit(&pending);
-		struct mass_counter *latest_node = (struct mass_counter *) latest->object.util;
-		int num_parents;
-
-		if ((ret = parse_commit(latest)))
-			continue;
-		add(&latest_node->seen, &latest_node->seen, &latest_node->pending);
-
-		num_parents = count_parents(latest);
-		if (num_parents) {
-			struct fraction distribution;
-			struct commit_list *parents;
-
-			divide(init_fraction(&distribution), &latest_node->pending, num_parents);
-
-			for (parents = latest->parents; parents; parents = parents->next) {
-				struct commit *parent = parents->item;
-				struct mass_counter *parent_node = (struct mass_counter *) parent->object.util;
-
-				if (!parent_node) {
-					parent_node = new_mass_counter(parent, &distribution);
-					insert_by_date(parent, &pending);
-					commit_list_insert(parent, &cleaner);
-				} else {
-					if (!compare(&parent_node->pending, get_zero()))
-						insert_by_date(parent, &pending);
-					add(&parent_node->pending, &parent_node->pending, &distribution);
-				}
-			}
-
-			clear_fraction(&distribution);
-		}
-
-		if (!compare(&latest_node->seen, &injected))
-			*boundary = latest;
-		copy(&latest_node->pending, get_zero());
-	}
-
-	while (cleaner) {
-		struct commit *next = pop_commit(&cleaner);
-		free_mass_counter((struct mass_counter *) next->object.util);
-		next->object.util = NULL;
-	}
-
-	if (pending)
-		free_commit_list(pending);
-
-	clear_fraction(&injected);
-	return ret;
-}
-
-
-/*
- * Finds the base of an minimal, non-linear epoch, headed at head, by
- * applying the find_base_for_list to a list consisting of the parents
- */
-static int find_base(struct commit *head, struct commit **boundary)
-{
-	int ret = 0;
-	struct commit_list *pending = NULL;
-	struct commit_list *next;
-
-	for (next = head->parents; next; next = next->next) {
-		commit_list_insert(next->item, &pending);
-	}
-	ret = find_base_for_list(pending, boundary);
-	free_commit_list(pending);
-
-	return ret;
-}
-
-/*
- * This procedure traverses to the boundary of the first epoch in the epoch
- * sequence of the epoch headed at head_of_epoch. This is either the end of
- * the maximal linear epoch or the base of a minimal non-linear epoch.
- *
- * The queue of pending nodes is sorted in reverse date order and each node
- * is currently in the queue at most once.
- */
-static int find_next_epoch_boundary(struct commit *head_of_epoch, struct commit **boundary)
-{
-	int ret;
-	struct commit *item = head_of_epoch;
-
-	ret = parse_commit(item);
-	if (ret)
-		return ret;
-
-	if (HAS_EXACTLY_ONE_PARENT(item)) {
-		/*
-		 * We are at the start of a maximimal linear epoch.
-		 * Traverse to the end.
-		 */
-		while (HAS_EXACTLY_ONE_PARENT(item) && !ret) {
-			item = item->parents->item;
-			ret = parse_commit(item);
-		}
-		*boundary = item;
-
-	} else {
-		/*
-		 * Otherwise, we are at the start of a minimal, non-linear
-		 * epoch - find the common base of all parents.
-		 */
-		ret = find_base(item, boundary);
-	}
-
-	return ret;
-}
-
-/*
- * Returns non-zero if parent is known to be a parent of child.
- */
-static int is_parent_of(struct commit *parent, struct commit *child)
-{
-	struct commit_list *parents;
-	for (parents = child->parents; parents; parents = parents->next) {
-		if (!memcmp(parent->object.sha1, parents->item->object.sha1,
-		            sizeof(parents->item->object.sha1)))
-			return 1;
-	}
-	return 0;
-}
-
-/*
- * Pushes an item onto the merge order stack. If the top of the stack is
- * marked as being a possible "break", we check to see whether it actually
- * is a break.
- */
-static void push_onto_merge_order_stack(struct commit_list **stack, struct commit *item)
-{
-	struct commit_list *top = *stack;
-	if (top && (top->item->object.flags & DISCONTINUITY)) {
-		if (is_parent_of(top->item, item)) {
-			top->item->object.flags &= ~DISCONTINUITY;
-		}
-	}
-	commit_list_insert(item, stack);
-}
-
-/*
- * Marks all interesting, visited commits reachable from this commit
- * as uninteresting. We stop recursing when we reach the epoch boundary,
- * an unvisited node or a node that has already been marking uninteresting.
- *
- * This doesn't actually mark all ancestors between the start node and the
- * epoch boundary uninteresting, but does ensure that they will eventually
- * be marked uninteresting when the main sort_first_epoch() traversal
- * eventually reaches them.
- */
-static void mark_ancestors_uninteresting(struct commit *commit)
-{
-	unsigned int flags = commit->object.flags;
-	int visited = flags & VISITED;
-	int boundary = flags & BOUNDARY;
-	int uninteresting = flags & UNINTERESTING;
-	struct commit_list *next;
-
-	commit->object.flags |= UNINTERESTING;
-
-	/*
-	 * We only need to recurse if
-	 *      we are not on the boundary and
-	 *      we have not already been marked uninteresting and
-	 *      we have already been visited.
-	 *
-	 * The main sort_first_epoch traverse will mark unreachable
-	 * all uninteresting, unvisited parents as they are visited
-	 * so there is no need to duplicate that traversal here.
-	 *
-	 * Similarly, if we are already marked uninteresting
-	 * then either all ancestors have already been marked
-	 * uninteresting or will be once the sort_first_epoch
-	 * traverse reaches them.
-	 */
-
-	if (uninteresting || boundary || !visited)
-		return;
-
-	for (next = commit->parents; next; next = next->next)
-		mark_ancestors_uninteresting(next->item);
-}
-
-/*
- * Sorts the nodes of the first epoch of the epoch sequence of the epoch headed at head
- * into merge order.
- */
-static void sort_first_epoch(struct commit *head, struct commit_list **stack)
-{
-	struct commit_list *parents;
-
-	head->object.flags |= VISITED;
-
-	/*
-	 * TODO: By sorting the parents in a different order, we can alter the
-	 * merge order to show contemporaneous changes in parallel branches
-	 * occurring after "local" changes. This is useful for a developer
-	 * when a developer wants to see all changes that were incorporated
-	 * into the same merge as her own changes occur after her own
-	 * changes.
-	 */
-
-	for (parents = head->parents; parents; parents = parents->next) {
-		struct commit *parent = parents->item;
-
-		if (head->object.flags & UNINTERESTING) {
-			/*
-			 * Propagates the uninteresting bit to all parents.
-			 * if we have already visited this parent, then
-			 * the uninteresting bit will be propagated to each
-			 * reachable commit that is still not marked
-			 * uninteresting and won't otherwise be reached.
-			 */
-			mark_ancestors_uninteresting(parent);
-		}
-
-		if (!(parent->object.flags & VISITED)) {
-			if (parent->object.flags & BOUNDARY) {
-				if (*stack) {
-					die("something else is on the stack - %s",
-					    sha1_to_hex((*stack)->item->object.sha1));
-				}
-				push_onto_merge_order_stack(stack, parent);
-				parent->object.flags |= VISITED;
-
-			} else {
-				sort_first_epoch(parent, stack);
-				if (parents) {
-					/*
-					 * This indicates a possible
-					 * discontinuity it may not be be
-					 * actual discontinuity if the head
-					 * of parent N happens to be the tail
-					 * of parent N+1.
-					 *
-					 * The next push onto the stack will
-					 * resolve the question.
-					 */
-					(*stack)->item->object.flags |= DISCONTINUITY;
-				}
-			}
-		}
-	}
-
-	push_onto_merge_order_stack(stack, head);
-}
-
-/*
- * Emit the contents of the stack.
- *
- * The stack is freed and replaced by NULL.
- *
- * Sets the return value to STOP if no further output should be generated.
- */
-static int emit_stack(struct commit_list **stack, emitter_func emitter, int include_last)
-{
-	unsigned int seen = 0;
-	int action = CONTINUE;
-
-	while (*stack && (action != STOP)) {
-		struct commit *next = pop_commit(stack);
-		seen |= next->object.flags;
-		if (*stack || include_last) {
-			if (!*stack) 
-				next->object.flags |= BOUNDARY;
-			action = emitter(next);
-		}
-	}
-
-	if (*stack) {
-		free_commit_list(*stack);
-		*stack = NULL;
-	}
-
-	return (action == STOP || (seen & UNINTERESTING)) ? STOP : CONTINUE;
-}
-
-/*
- * Sorts an arbitrary epoch into merge order by sorting each epoch
- * of its epoch sequence into order.
- *
- * Note: this algorithm currently leaves traces of its execution in the
- * object flags of nodes it discovers. This should probably be fixed.
- */
-static int sort_in_merge_order(struct commit *head_of_epoch, emitter_func emitter)
-{
-	struct commit *next = head_of_epoch;
-	int ret = 0;
-	int action = CONTINUE;
-
-	ret = parse_commit(head_of_epoch);
-
-	next->object.flags |= BOUNDARY;
-
-	while (next && next->parents && !ret && (action != STOP)) {
-		struct commit *base = NULL;
-
-		ret = find_next_epoch_boundary(next, &base);
-		if (ret)
-			return ret;
-		next->object.flags |= BOUNDARY;
-		if (base)
-			base->object.flags |= BOUNDARY;
-
-		if (HAS_EXACTLY_ONE_PARENT(next)) {
-			while (HAS_EXACTLY_ONE_PARENT(next)
-			       && (action != STOP)
-			       && !ret) {
-				if (next->object.flags & UNINTERESTING) {
-					action = STOP;
-				} else {
-					action = emitter(next);
-				}
-				if (action != STOP) {
-					next = next->parents->item;
-					ret = parse_commit(next);
-				}
-			}
-
-		} else {
-			struct commit_list *stack = NULL;
-			sort_first_epoch(next, &stack);
-			action = emit_stack(&stack, emitter, (base == NULL));
-			next = base;
-		}
-	}
-
-	if (next && (action != STOP) && !ret) {
-		emitter(next);
-	}
-
-	return ret;
-}
-
-/*
- * Sorts the nodes reachable from a starting list in merge order, we
- * first find the base for the starting list and then sort all nodes
- * in this subgraph using the sort_first_epoch algorithm. Once we have
- * reached the base we can continue sorting using sort_in_merge_order.
- */
-int sort_list_in_merge_order(struct commit_list *list, emitter_func emitter)
-{
-	struct commit_list *stack = NULL;
-	struct commit *base;
-	int ret = 0;
-	int action = CONTINUE;
-	struct commit_list *reversed = NULL;
-
-	for (; list; list = list->next)
-		commit_list_insert(list->item, &reversed);
-
-	if (!reversed)
-		return ret;
-	else if (!reversed->next) {
-		/*
-		 * If there is only one element in the list, we can sort it
-		 * using sort_in_merge_order.
-		 */
-		base = reversed->item;
-	} else {
-		/*
-		 * Otherwise, we search for the base of the list.
-		 */
-		ret = find_base_for_list(reversed, &base);
-		if (ret)
-			return ret;
-		if (base)
-			base->object.flags |= BOUNDARY;
-
-		while (reversed) {
-			struct commit * next = pop_commit(&reversed);
-
-			if (!(next->object.flags & VISITED) && next!=base) {
-				sort_first_epoch(next, &stack);
-				if (reversed) {
-					/*
-					 * If we have more commits 
-					 * to push, then the first
-					 * push for the next parent may 
-					 * (or may * not) represent a 
-					 * discontinuity with respect
-					 * to the parent currently on 
-					 * the top of the stack.
-					 *
-					 * Mark it for checking here, 
-					 * and check it with the next 
-					 * push. See sort_first_epoch()
-					 * for more details.
-					 */
-					stack->item->object.flags |= DISCONTINUITY;
-				}
-			}
-		}
-
-		action = emit_stack(&stack, emitter, (base==NULL));
-	}
-
-	if (base && (action != STOP)) {
-		ret = sort_in_merge_order(base, emitter);
-	}
-
-	return ret;
-}
diff --git a/epoch.h b/epoch.h
deleted file mode 100644
index 3756009..0000000
--- a/epoch.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef EPOCH_H
-#define EPOCH_H
-
-
-// return codes for emitter_func
-#define STOP     0
-#define CONTINUE 1
-#define DO       2
-typedef int (*emitter_func) (struct commit *); 
-
-int sort_list_in_merge_order(struct commit_list *list, emitter_func emitter);
-
-/* Low bits are used by rev-list */
-#define BOUNDARY        (1u<<11)
-#define VISITED         (1u<<12)
-#define DISCONTINUITY   (1u<<13)
-#define LAST_EPOCH_FLAG (1u<<14)
-
-
-#endif	/* EPOCH_H */
diff --git a/git-archimport.perl b/git-archimport.perl
index 6792624..740bc1f 100755
--- a/git-archimport.perl
+++ b/git-archimport.perl
@@ -928,7 +928,7 @@ sub find_parents {
 
 	# now walk up to the mergepoint collecting what patches we have
 	my $branchtip = git_rev_parse($ps->{branch});
-	my @ancestors = `git-rev-list --merge-order $branchtip ^$mergebase`;
+	my @ancestors = `git-rev-list --topo-order $branchtip ^$mergebase`;
 	my %have; # collected merges this branch has
 	foreach my $merge (@{$ps->{merges}}) {
 	    $have{$merge} = 1;
@@ -951,7 +951,7 @@ sub find_parents {
 	# see what the remote branch has - these are the merges we 
 	# will want to have in a consecutive series from the mergebase
 	my $otherbranchtip = git_rev_parse($branch);
-	my @needraw = `git-rev-list --merge-order $otherbranchtip ^$mergebase`;
+	my @needraw = `git-rev-list --topo-order $otherbranchtip ^$mergebase`;
 	my @need;
 	foreach my $needps (@needraw) { 	# get the psets
 	    $needps = commitid2pset($needps);
diff --git a/rev-list.c b/rev-list.c
index 94f22dd..6af8d86 100644
--- a/rev-list.c
+++ b/rev-list.c
@@ -4,14 +4,12 @@
 #include "commit.h"
 #include "tree.h"
 #include "blob.h"
-#include "epoch.h"
 #include "diff.h"
 #include "revision.h"
 
-/* bits #0-2 in revision.h */
+/* bits #0-3 in revision.h */
 
-#define COUNTED		(1u << 3)
-#define SHOWN		(1u << 4)
+#define COUNTED		(1u << 4)
 #define TMP_MARK	(1u << 5) /* for isolated cases; clean after use */
 
 static const char rev_list_usage[] =
@@ -25,7 +23,6 @@ static const char rev_list_usage[] =
 "    --remove-empty\n"
 "    --all\n"
 "  ordering output:\n"
-"    --merge-order [ --show-breaks ]\n"
 "    --topo-order\n"
 "    --date-order\n"
 "  formatting output:\n"
@@ -47,22 +44,9 @@ static int show_parents = 0;
 static int hdr_termination = 0;
 static const char *commit_prefix = "";
 static enum cmit_fmt commit_format = CMIT_FMT_RAW;
-static int merge_order = 0;
-static int show_breaks = 0;
-static int stop_traversal = 0;
-static int no_merges = 0;
 
 static void show_commit(struct commit *commit)
 {
-	commit->object.flags |= SHOWN;
-	if (show_breaks) {
-		commit_prefix = "| ";
-		if (commit->object.flags & DISCONTINUITY) {
-			commit_prefix = "^ ";     
-		} else if (commit->object.flags & BOUNDARY) {
-			commit_prefix = "= ";
-		} 
-        }        		
 	printf("%s%s", commit_prefix, sha1_to_hex(commit->object.sha1));
 	if (show_parents) {
 		struct commit_list *parents = commit->parents;
@@ -96,73 +80,6 @@ static void show_commit(struct commit *commit)
 	fflush(stdout);
 }
 
-static int rewrite_one(struct commit **pp)
-{
-	for (;;) {
-		struct commit *p = *pp;
-		if (p->object.flags & (TREECHANGE | UNINTERESTING))
-			return 0;
-		if (!p->parents)
-			return -1;
-		*pp = p->parents->item;
-	}
-}
-
-static void rewrite_parents(struct commit *commit)
-{
-	struct commit_list **pp = &commit->parents;
-	while (*pp) {
-		struct commit_list *parent = *pp;
-		if (rewrite_one(&parent->item) < 0) {
-			*pp = parent->next;
-			continue;
-		}
-		pp = &parent->next;
-	}
-}
-
-static int filter_commit(struct commit * commit)
-{
-	if (stop_traversal && (commit->object.flags & BOUNDARY))
-		return STOP;
-	if (commit->object.flags & (UNINTERESTING|SHOWN))
-		return CONTINUE;
-	if (revs.min_age != -1 && (commit->date > revs.min_age))
-		return CONTINUE;
-	if (revs.max_age != -1 && (commit->date < revs.max_age)) {
-		stop_traversal=1;
-		return CONTINUE;
-	}
-	if (no_merges && (commit->parents && commit->parents->next))
-		return CONTINUE;
-	if (revs.paths && revs.dense) {
-		if (!(commit->object.flags & TREECHANGE))
-			return CONTINUE;
-		rewrite_parents(commit);
-	}
-	return DO;
-}
-
-static int process_commit(struct commit * commit)
-{
-	int action=filter_commit(commit);
-
-	if (action == STOP) {
-		return STOP;
-	}
-
-	if (action == CONTINUE) {
-		return CONTINUE;
-	}
-
-	if (revs.max_count != -1 && !revs.max_count--)
-		return STOP;
-
-	show_commit(commit);
-
-	return CONTINUE;
-}
-
 static struct object_list **process_blob(struct blob *blob,
 					 struct object_list **p,
 					 struct name_path *path,
@@ -219,8 +136,7 @@ static void show_commit_list(struct rev_info *revs)
 
 	while ((commit = get_revision(revs)) != NULL) {
 		p = process_tree(commit->tree, p, NULL, "");
-		if (process_commit(commit) == STOP)
-			break;
+		show_commit(commit);
 	}
 	for (pending = revs->pending_objects; pending; pending = pending->next) {
 		struct object *obj = pending->item;
@@ -416,10 +332,6 @@ int main(int argc, const char **argv)
 				commit_prefix = "commit ";
 			continue;
 		}
-		if (!strncmp(arg, "--no-merges", 11)) {
-			no_merges = 1;
-			continue;
-		}
 		if (!strcmp(arg, "--parents")) {
 			show_parents = 1;
 			continue;
@@ -428,14 +340,6 @@ int main(int argc, const char **argv)
 			bisect_list = 1;
 			continue;
 		}
-		if (!strcmp(arg, "--merge-order")) {
-		        merge_order = 1;
-			continue;
-		}
-		if (!strcmp(arg, "--show-breaks")) {
-			show_breaks = 1;
-			continue;
-		}
 		usage(rev_list_usage);
 
 	}
@@ -456,17 +360,7 @@ int main(int argc, const char **argv)
 	save_commit_buffer = verbose_header;
 	track_object_refs = 0;
 
-	if (!merge_order) {
-		show_commit_list(&revs);
-	} else {
-#ifndef NO_OPENSSL
-		if (sort_list_in_merge_order(list, &process_commit)) {
-			die("merge order sort failed\n");
-		}
-#else
-		die("merge order sort unsupported, OpenSSL not linked");
-#endif
-	}
+	show_commit_list(&revs);
 
 	return 0;
 }
diff --git a/rev-parse.c b/rev-parse.c
index 610eacb..f90e999 100644
--- a/rev-parse.c
+++ b/rev-parse.c
@@ -39,14 +39,12 @@ static int is_rev_argument(const char *arg)
 		"--header",
 		"--max-age=",
 		"--max-count=",
-		"--merge-order",
 		"--min-age=",
 		"--no-merges",
 		"--objects",
 		"--objects-edge",
 		"--parents",
 		"--pretty",
-		"--show-breaks",
 		"--sparse",
 		"--topo-order",
 		"--date-order",
diff --git a/revision.c b/revision.c
index f1ac62d..c84f146 100644
--- a/revision.c
+++ b/revision.c
@@ -381,6 +381,9 @@ static void limit_list(struct rev_info *revs)
 	struct commit_list *newlist = NULL;
 	struct commit_list **p = &newlist;
 
+	if (revs->paths)
+		diff_tree_setup_paths(revs->paths);
+
 	while (list) {
 		struct commit_list *entry = list;
 		struct commit *commit = list->item;
@@ -436,12 +439,13 @@ static void handle_all(struct rev_info *revs, unsigned flags)
  * Parse revision information, filling in the "rev_info" structure,
  * and removing the used arguments from the argument list.
  *
- * Returns the number of arguments left ("new argc").
+ * Returns the number of arguments left that weren't recognized
+ * (which are also moved to the head of the argument list)
  */
 int setup_revisions(int argc, const char **argv, struct rev_info *revs, const char *def)
 {
 	int i, flags, seen_dashdash;
-	const char **unrecognized = argv+1;
+	const char **unrecognized = argv + 1;
 	int left = 1;
 
 	memset(revs, 0, sizeof(*revs));
@@ -525,6 +529,10 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, const ch
 				revs->remove_empty_trees = 1;
 				continue;
 			}
+			if (!strncmp(arg, "--no-merges", 11)) {
+				revs->no_merges = 1;
+				continue;
+			}
 			if (!strcmp(arg, "--objects")) {
 				revs->tag_objects = 1;
 				revs->tree_objects = 1;
@@ -601,14 +609,11 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, const ch
 	}
 	if (revs->paths)
 		revs->limited = 1;
-	*unrecognized = NULL;
 	return left;
 }
 
 void prepare_revision_walk(struct rev_info *revs)
 {
-	if (revs->paths)
-		diff_tree_setup_paths(revs->paths);
 	sort_by_date(&revs->commits);
 	if (revs->limited)
 		limit_list(revs);
@@ -616,11 +621,67 @@ void prepare_revision_walk(struct rev_info *revs)
 		sort_in_topological_order(&revs->commits, revs->lifo);
 }
 
+static int rewrite_one(struct commit **pp)
+{
+	for (;;) {
+		struct commit *p = *pp;
+		if (p->object.flags & (TREECHANGE | UNINTERESTING))
+			return 0;
+		if (!p->parents)
+			return -1;
+		*pp = p->parents->item;
+	}
+}
+
+static void rewrite_parents(struct commit *commit)
+{
+	struct commit_list **pp = &commit->parents;
+	while (*pp) {
+		struct commit_list *parent = *pp;
+		if (rewrite_one(&parent->item) < 0) {
+			*pp = parent->next;
+			continue;
+		}
+		pp = &parent->next;
+	}
+}
+
 struct commit *get_revision(struct rev_info *revs)
 {
-	if (!revs->commits)
+	struct commit_list *list = revs->commits;
+	struct commit *commit;
+
+	if (!list)
 		return NULL;
-	return pop_most_recent_commit(&revs->commits, SEEN);
-}
 
+	/* Check the max_count ... */
+	commit = list->item;
+	switch (revs->max_count) {
+	case -1:
+		break;
+	case 0:
+		return NULL;
+	default:
+		revs->max_count--;
+	}
 
+	do {
+		commit = pop_most_recent_commit(&revs->commits, SEEN);
+		if (commit->object.flags & (UNINTERESTING|SHOWN))
+			continue;
+		if (revs->min_age != -1 && (commit->date > revs->min_age))
+			continue;
+		if (revs->max_age != -1 && (commit->date < revs->max_age))
+			return NULL;
+		if (revs->no_merges && commit->parents && commit->parents->next)
+			continue;
+		if (revs->paths && revs->dense) {
+			if (!(commit->object.flags & TREECHANGE))
+				continue;
+			rewrite_parents(commit);
+		}
+		commit->object.flags |= SHOWN;
+		return commit;
+	} while (revs->commits);
+	return NULL;
+}
diff --git a/revision.h b/revision.h
index 0bed3c0..0043c16 100644
--- a/revision.h
+++ b/revision.h
@@ -4,6 +4,7 @@
 #define SEEN		(1u<<0)
 #define UNINTERESTING   (1u<<1)
 #define TREECHANGE	(1u<<2)
+#define SHOWN		(1u<<3)
 
 struct rev_info {
 	/* Starting list */
@@ -16,6 +17,7 @@ struct rev_info {
 
 	/* Traversal flags */
 	unsigned int	dense:1,
+			no_merges:1,
 			remove_empty_trees:1,
 			lifo:1,
 			topo_order:1,
diff --git a/t/t6001-rev-list-merge-order.sh b/t/t6001-rev-list-merge-order.sh
deleted file mode 100755
index 7724e8a..0000000
--- a/t/t6001-rev-list-merge-order.sh
+++ /dev/null
@@ -1,462 +0,0 @@
-#!/bin/sh
-#
-# Copyright (c) 2005 Jon Seymour
-#
-
-test_description='Tests git-rev-list --merge-order functionality'
-
-. ./test-lib.sh
-. ../t6000lib.sh # t6xxx specific functions
-
-# test-case specific test function
-check_adjacency()
-{
-    read previous
-    echo "= $previous"
-    while read next
-    do
-        if ! (git-cat-file commit $previous | grep "^parent $next" >/dev/null)
-        then
-            echo "^ $next"
-        else
-            echo "| $next"
-        fi
-        previous=$next
-    done
-}
-
-list_duplicates()
-{
-    "$@" | sort | uniq -d
-}
-
-grep_stderr()
-{
-    args=$1
-    shift 1
-    "$@" 2>&1 | grep "$args"
-}
-
-date >path0
-git-update-index --add path0
-save_tag tree git-write-tree
-on_committer_date "1971-08-16 00:00:00" hide_error save_tag root unique_commit root tree
-on_committer_date "1971-08-16 00:00:01" save_tag l0 unique_commit l0 tree -p root
-on_committer_date "1971-08-16 00:00:02" save_tag l1 unique_commit l1 tree -p l0
-on_committer_date "1971-08-16 00:00:03" save_tag l2 unique_commit l2 tree -p l1
-on_committer_date "1971-08-16 00:00:04" save_tag a0 unique_commit a0 tree -p l2
-on_committer_date "1971-08-16 00:00:05" save_tag a1 unique_commit a1 tree -p a0
-on_committer_date "1971-08-16 00:00:06" save_tag b1 unique_commit b1 tree -p a0
-on_committer_date "1971-08-16 00:00:07" save_tag c1 unique_commit c1 tree -p b1
-on_committer_date "1971-08-16 00:00:08" as_author foobar@example.com save_tag b2 unique_commit b2 tree -p b1
-on_committer_date "1971-08-16 00:00:09" save_tag b3 unique_commit b2 tree -p b2
-on_committer_date "1971-08-16 00:00:10" save_tag c2 unique_commit c2 tree -p c1 -p b2
-on_committer_date "1971-08-16 00:00:11" save_tag c3 unique_commit c3 tree -p c2
-on_committer_date "1971-08-16 00:00:12" save_tag a2 unique_commit a2 tree -p a1
-on_committer_date "1971-08-16 00:00:13" save_tag a3 unique_commit a3 tree -p a2
-on_committer_date "1971-08-16 00:00:14" save_tag b4 unique_commit b4 tree -p b3 -p a3
-on_committer_date "1971-08-16 00:00:15" save_tag a4 unique_commit a4 tree -p a3 -p b4 -p c3
-on_committer_date "1971-08-16 00:00:16" save_tag l3 unique_commit l3 tree -p a4
-on_committer_date "1971-08-16 00:00:17" save_tag l4 unique_commit l4 tree -p l3
-on_committer_date "1971-08-16 00:00:18" save_tag l5 unique_commit l5 tree -p l4
-on_committer_date "1971-08-16 00:00:19" save_tag m1 unique_commit m1 tree -p a4 -p c3
-on_committer_date "1971-08-16 00:00:20" save_tag m2 unique_commit m2 tree -p c3 -p a4
-on_committer_date "1971-08-16 00:00:21" hide_error save_tag alt_root unique_commit alt_root tree
-on_committer_date "1971-08-16 00:00:22" save_tag r0 unique_commit r0 tree -p alt_root
-on_committer_date "1971-08-16 00:00:23" save_tag r1 unique_commit r1 tree -p r0
-on_committer_date "1971-08-16 00:00:24" save_tag l5r1 unique_commit l5r1 tree -p l5 -p r1
-on_committer_date "1971-08-16 00:00:25" save_tag r1l5 unique_commit r1l5 tree -p r1 -p l5
-
-
-#
-# note: as of 20/6, it isn't possible to create duplicate parents, so this
-# can't be tested.
-#
-#on_committer_date "1971-08-16 00:00:20" save_tag m3 unique_commit m3 tree -p c3 -p a4 -p c3
-hide_error save_tag e1 as_author e@example.com unique_commit e1 tree
-save_tag e2 as_author e@example.com unique_commit e2 tree -p e1
-save_tag f1 as_author f@example.com unique_commit f1 tree -p e1
-save_tag e3 as_author e@example.com unique_commit e3 tree -p e2
-save_tag f2 as_author f@example.com unique_commit f2 tree -p f1
-save_tag e4 as_author e@example.com unique_commit e4 tree -p e3 -p f2
-save_tag e5 as_author e@example.com unique_commit e5 tree -p e4
-save_tag f3 as_author f@example.com unique_commit f3 tree -p f2
-save_tag f4 as_author f@example.com unique_commit f4 tree -p f3
-save_tag e6 as_author e@example.com unique_commit e6 tree -p e5 -p f4
-save_tag f5 as_author f@example.com unique_commit f5 tree -p f4
-save_tag f6 as_author f@example.com unique_commit f6 tree -p f5 -p e6
-save_tag e7 as_author e@example.com unique_commit e7 tree -p e6
-save_tag e8 as_author e@example.com unique_commit e8 tree -p e7
-save_tag e9 as_author e@example.com unique_commit e9 tree -p e8
-save_tag f7 as_author f@example.com unique_commit f7 tree -p f6
-save_tag f8 as_author f@example.com unique_commit f8 tree -p f7
-save_tag f9 as_author f@example.com unique_commit f9 tree -p f8
-save_tag e10 as_author e@example.com unique_commit e1 tree -p e9 -p f8
-
-hide_error save_tag g0 unique_commit g0 tree
-save_tag g1 unique_commit g1 tree -p g0
-save_tag h1 unique_commit g2 tree -p g0
-save_tag g2 unique_commit g3 tree -p g1 -p h1
-save_tag h2 unique_commit g4 tree -p g2
-save_tag g3 unique_commit g5 tree -p g2
-save_tag g4 unique_commit g6 tree -p g3 -p h2
-
-git-update-ref HEAD $(tag l5)
-
-test_output_expect_success 'rev-list has correct number of entries' 'git-rev-list HEAD | wc -l | tr -d \" \"' <<EOF
-19
-EOF
-
-if git-rev-list --merge-order HEAD 2>&1 | grep 'OpenSSL not linked' >/dev/null
-then
-    test_expect_success 'skipping merge-order test' :
-    test_done
-    exit
-fi
-
-normal_adjacency_count=$(git-rev-list HEAD | check_adjacency | grep -c "\^" | tr -d ' ')
-merge_order_adjacency_count=$(git-rev-list --merge-order HEAD | check_adjacency | grep -c "\^" | tr -d ' ')
-test_expect_success '--merge-order produces as many or fewer discontinuities' '[ $merge_order_adjacency_count -le $normal_adjacency_count ]'
-test_output_expect_success 'simple merge order' 'git-rev-list --merge-order --show-breaks HEAD' <<EOF
-= l5
-| l4
-| l3
-= a4
-| c3
-| c2
-| c1
-^ b4
-| b3
-| b2
-| b1
-^ a3
-| a2
-| a1
-= a0
-| l2
-| l1
-| l0
-= root
-EOF
-
-test_output_expect_success 'two diamonds merge order (g6)' 'git-rev-list --merge-order --show-breaks g4' <<EOF
-= g4
-| h2
-^ g3
-= g2
-| h1
-^ g1
-= g0
-EOF
-
-test_output_expect_success 'multiple heads' 'git-rev-list --merge-order a3 b3 c3' <<EOF
-c3
-c2
-c1
-b3
-b2
-b1
-a3
-a2
-a1
-a0
-l2
-l1
-l0
-root
-EOF
-
-test_output_expect_success 'multiple heads, prune at a1' 'git-rev-list --merge-order a3 b3 c3 ^a1' <<EOF
-c3
-c2
-c1
-b3
-b2
-b1
-a3
-a2
-EOF
-
-test_output_expect_success 'multiple heads, prune at l1' 'git-rev-list --merge-order a3 b3 c3 ^l1' <<EOF
-c3
-c2
-c1
-b3
-b2
-b1
-a3
-a2
-a1
-a0
-l2
-EOF
-
-test_output_expect_success 'cross-epoch, head at l5, prune at l1' 'git-rev-list --merge-order l5 ^l1' <<EOF
-l5
-l4
-l3
-a4
-c3
-c2
-c1
-b4
-b3
-b2
-b1
-a3
-a2
-a1
-a0
-l2
-EOF
-
-test_output_expect_success 'duplicated head arguments' 'git-rev-list --merge-order l5 l5 ^l1' <<EOF
-l5
-l4
-l3
-a4
-c3
-c2
-c1
-b4
-b3
-b2
-b1
-a3
-a2
-a1
-a0
-l2
-EOF
-
-test_output_expect_success 'prune near merge' 'git-rev-list --merge-order a4 ^c3' <<EOF
-a4
-b4
-b3
-a3
-a2
-a1
-EOF
-
-test_output_expect_success "head has no parent" 'git-rev-list --merge-order --show-breaks root' <<EOF
-= root
-EOF
-
-test_output_expect_success "two nodes - one head, one base" 'git-rev-list --merge-order --show-breaks l0' <<EOF
-= l0
-= root
-EOF
-
-test_output_expect_success "three nodes one head, one internal, one base" 'git-rev-list --merge-order --show-breaks l1' <<EOF
-= l1
-| l0
-= root
-EOF
-
-test_output_expect_success "linear prune l2 ^root" 'git-rev-list --merge-order --show-breaks l2 ^root' <<EOF
-^ l2
-| l1
-| l0
-EOF
-
-test_output_expect_success "linear prune l2 ^l0" 'git-rev-list --merge-order --show-breaks l2 ^l0' <<EOF
-^ l2
-| l1
-EOF
-
-test_output_expect_success "linear prune l2 ^l1" 'git-rev-list --merge-order --show-breaks l2 ^l1' <<EOF
-^ l2
-EOF
-
-test_output_expect_success "linear prune l5 ^a4" 'git-rev-list --merge-order --show-breaks l5 ^a4' <<EOF
-^ l5
-| l4
-| l3
-EOF
-
-test_output_expect_success "linear prune l5 ^l3" 'git-rev-list --merge-order --show-breaks l5 ^l3' <<EOF
-^ l5
-| l4
-EOF
-
-test_output_expect_success "linear prune l5 ^l4" 'git-rev-list --merge-order --show-breaks l5 ^l4' <<EOF
-^ l5
-EOF
-
-test_output_expect_success "max-count 10 - merge order" 'git-rev-list --merge-order --show-breaks --max-count=10 l5' <<EOF
-= l5
-| l4
-| l3
-= a4
-| c3
-| c2
-| c1
-^ b4
-| b3
-| b2
-EOF
-
-test_output_expect_success "max-count 10 - non merge order" 'git-rev-list --max-count=10 l5' <<EOF
-l5
-l4
-l3
-a4
-b4
-a3
-a2
-c3
-c2
-b3
-EOF
-
-test_output_expect_success '--max-age=c3, no --merge-order' "git-rev-list --max-age=$(commit_date c3) l5" <<EOF
-l5
-l4
-l3
-a4
-b4
-a3
-a2
-c3
-EOF
-
-test_output_expect_success '--max-age=c3, --merge-order' "git-rev-list --merge-order --max-age=$(commit_date c3) l5" <<EOF
-l5
-l4
-l3
-a4
-c3
-b4
-a3
-a2
-EOF
-
-test_output_expect_success 'one specified head reachable from another a4, c3, --merge-order' "list_duplicates git-rev-list --merge-order a4 c3" <<EOF
-EOF
-
-test_output_expect_success 'one specified head reachable from another c3, a4, --merge-order' "list_duplicates git-rev-list --merge-order c3 a4" <<EOF
-EOF
-
-test_output_expect_success 'one specified head reachable from another a4, c3, no --merge-order' "list_duplicates git-rev-list a4 c3" <<EOF
-EOF
-
-test_output_expect_success 'one specified head reachable from another c3, a4, no --merge-order' "list_duplicates git-rev-list c3 a4" <<EOF
-EOF
-
-test_output_expect_success 'graph with c3 and a4 parents of head' "list_duplicates git-rev-list m1" <<EOF
-EOF
-
-test_output_expect_success 'graph with a4 and c3 parents of head' "list_duplicates git-rev-list m2" <<EOF
-EOF
-
-test_expect_success "head ^head --merge-order" 'git-rev-list --merge-order --show-breaks a3 ^a3' <<EOF
-EOF
-
-#
-# can't test this now - duplicate parents can't be created
-#
-#test_output_expect_success 'duplicate parents' 'git-rev-list --parents --merge-order --show-breaks m3' <<EOF
-#= m3 c3 a4 c3
-#| a4 c3 b4 a3
-#| b4 a3 b3
-#| b3 b2
-#^ a3 a2
-#| a2 a1
-#| a1 a0
-#^ c3 c2
-#| c2 b2 c1
-#| b2 b1
-#^ c1 b1
-#| b1 a0
-#= a0 l2
-#| l2 l1
-#| l1 l0
-#| l0 root
-#= root
-#EOF
-
-test_expect_success "head ^head no --merge-order" 'git-rev-list a3 ^a3' <<EOF
-EOF
-
-test_output_expect_success 'simple merge order (l5r1)' 'git-rev-list --merge-order --show-breaks l5r1' <<EOF
-= l5r1
-| r1
-| r0
-| alt_root
-^ l5
-| l4
-| l3
-| a4
-| c3
-| c2
-| c1
-^ b4
-| b3
-| b2
-| b1
-^ a3
-| a2
-| a1
-| a0
-| l2
-| l1
-| l0
-= root
-EOF
-
-test_output_expect_success 'simple merge order (r1l5)' 'git-rev-list --merge-order --show-breaks r1l5' <<EOF
-= r1l5
-| l5
-| l4
-| l3
-| a4
-| c3
-| c2
-| c1
-^ b4
-| b3
-| b2
-| b1
-^ a3
-| a2
-| a1
-| a0
-| l2
-| l1
-| l0
-| root
-^ r1
-| r0
-= alt_root
-EOF
-
-test_output_expect_success "don't print things unreachable from one branch" "git-rev-list a3 ^b3 --merge-order" <<EOF
-a3
-a2
-a1
-EOF
-
-test_output_expect_success "--merge-order a4 l3" "git-rev-list --merge-order a4 l3" <<EOF
-l3
-a4
-c3
-c2
-c1
-b4
-b3
-b2
-b1
-a3
-a2
-a1
-a0
-l2
-l1
-l0
-root
-EOF
-
-#
-#
-
-test_done
-- 
cgit v0.10.2-6-g49f6