path: root/diff.c
diff options
authorLinus Torvalds <>2007-10-25 18:20:56 (GMT)
committerJunio C Hamano <>2007-10-27 06:18:06 (GMT)
commit644797119d7a3b7a043a51a9cccd8758f8451f91 (patch)
treeed4e6fb50c33a3fe92028747244f7106b0a3a2fe /diff.c
parent9fb88419ba85e641006c80db53620423f37f1c93 (diff)
copy vs rename detection: avoid unnecessary O(n*m) loops
The core rename detection had some rather stupid code to check if a pathname was used by a later modification or rename, which basically walked the whole pathname space for all renames for each rename, in order to tell whether it was a pure rename (no remaining users) or should be considered a copy (other users of the source file remaining). That's really silly, since we can just keep a count of users around, and replace all those complex and expensive loops with just testing that simple counter (but this all depends on the previous commit that shared the diff_filespec data structure by using a separate reference count). Note that the reference count is not the same as the rename count: they behave otherwise rather similarly, but the reference count is tied to the allocation (and decremented at de-allocation, so that when it turns zero we can get rid of the memory), while the rename count is tied to the renames and is decremented when we find a rename (so that when it turns zero we know that it was a rename, not a copy). Signed-off-by: Linus Torvalds <> Signed-off-by: Junio C Hamano <>
Diffstat (limited to 'diff.c')
1 files changed, 17 insertions, 23 deletions
diff --git a/diff.c b/diff.c
index 0b320f6..af85b94 100644
--- a/diff.c
+++ b/diff.c
@@ -2597,9 +2597,9 @@ void diff_debug_filepair(const struct diff_filepair *p, int i)
diff_debug_filespec(p->one, i, "one");
diff_debug_filespec(p->two, i, "two");
- fprintf(stderr, "score %d, status %c stays %d broken %d\n",
+ fprintf(stderr, "score %d, status %c rename_used %d broken %d\n",
p->score, p->status ? p->status : '?',
- p->source_stays, p->broken_pair);
+ p->one->rename_used, p->broken_pair);
void diff_debug_queue(const char *msg, struct diff_queue_struct *q)
@@ -2617,8 +2617,8 @@ void diff_debug_queue(const char *msg, struct diff_queue_struct *q)
static void diff_resolve_rename_copy(void)
- int i, j;
- struct diff_filepair *p, *pp;
+ int i;
+ struct diff_filepair *p;
struct diff_queue_struct *q = &diff_queued_diff;
diff_debug_queue("resolve-rename-copy", q);
@@ -2640,27 +2640,21 @@ static void diff_resolve_rename_copy(void)
* either in-place edit or rename/copy edit.
else if (DIFF_PAIR_RENAME(p)) {
- if (p->source_stays) {
- p->status = DIFF_STATUS_COPIED;
- continue;
- }
- /* See if there is some other filepair that
- * copies from the same source as us. If so
- * we are a copy. Otherwise we are either a
- * copy if the path stays, or a rename if it
- * does not, but we already handled "stays" case.
+ /*
+ * A rename might have re-connected a broken
+ * pair up, causing the pathnames to be the
+ * same again. If so, that's not a rename at
+ * all, just a modification..
+ *
+ * Otherwise, see if this source was used for
+ * multiple renames, in which case we decrement
+ * the count, and call it a copy.
- for (j = i + 1; j < q->nr; j++) {
- pp = q->queue[j];
- if (strcmp(pp->one->path, p->one->path))
- continue; /* not us */
- if (!DIFF_PAIR_RENAME(pp))
- continue; /* not a rename/copy */
- /* pp is a rename/copy from the same source */
+ if (!strcmp(p->one->path, p->two->path))
+ else if (--p->one->rename_used > 0)
- break;
- }
- if (!p->status)
+ else
else if (hashcmp(p->one->sha1, p->two->sha1) ||