From 51af1886c73f12b1e020db1aa03525e2d74bed93 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Mon, 3 Feb 2014 16:47:19 +0400 Subject: combine-diff: move show_log_first logic/action out of paths scanning Judging from sample outputs and tests nothing changes in diff -c output, and this change will help later patches, when we'll be refactoring paths scanning into its own function with several variants - the show_log_first logic / code will stay common to all of them. NOTE: only now we have to take care to explicitly not show anything if parents array is empty, as in fact there are some clients in Git code, which calls diff_tree_combined() in such a way. Signed-off-by: Kirill Smelkov Signed-off-by: Junio C Hamano diff --git a/combine-diff.c b/combine-diff.c index 24ca7e2..68d2e53 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -1311,6 +1311,20 @@ void diff_tree_combined(const unsigned char *sha1, struct combine_diff_path *p, *paths = NULL; int i, num_paths, needsep, show_log_first, num_parent = parents->nr; + /* nothing to do, if no parents */ + if (!num_parent) + return; + + show_log_first = !!rev->loginfo && !rev->no_commit_id; + needsep = 0; + if (show_log_first) { + show_log(rev); + + if (rev->verbose_header && opt->output_format) + printf("%s%c", diff_line_prefix(opt), + opt->line_termination); + } + diffopts = *opt; copy_pathspec(&diffopts.pathspec, &opt->pathspec); diffopts.output_format = DIFF_FORMAT_NO_OUTPUT; @@ -1319,8 +1333,6 @@ void diff_tree_combined(const unsigned char *sha1, /* tell diff_tree to emit paths in sorted (=tree) order */ diffopts.orderfile = NULL; - show_log_first = !!rev->loginfo && !rev->no_commit_id; - needsep = 0; /* find set of paths that everybody touches */ for (i = 0; i < num_parent; i++) { /* show stat against the first parent even @@ -1336,14 +1348,6 @@ void diff_tree_combined(const unsigned char *sha1, diffcore_std(&diffopts); paths = intersect_paths(paths, i, num_parent); - if (show_log_first && i == 0) { - show_log(rev); - - if (rev->verbose_header && opt->output_format) - printf("%s%c", diff_line_prefix(opt), - opt->line_termination); - } - /* if showing diff, show it in requested order */ if (diffopts.output_format != DIFF_FORMAT_NO_OUTPUT && opt->orderfile) { -- cgit v0.10.2-6-g49f6 From eeb3f32868862609b475122f3e0c2ef7c0dd3e79 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Mon, 3 Feb 2014 16:47:20 +0400 Subject: combine-diff: move changed-paths scanning logic into its own function Move code for finding paths for which diff(commit,parent_i) is not-empty for all parents to separate function - at present we have generic (and slow) code for this job, which translates 1 n-parent problem to n 1-parent problems and then intersect results, and will be adding another limited, but faster, paths scanning implementation in the next patch. Signed-off-by: Kirill Smelkov Signed-off-by: Junio C Hamano diff --git a/combine-diff.c b/combine-diff.c index 68d2e53..1732dfd 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -1301,6 +1301,51 @@ static const char *path_path(void *obj) return path->path; } + +/* find set of paths that every parent touches */ +static struct combine_diff_path *find_paths(const unsigned char *sha1, + const struct sha1_array *parents, struct diff_options *opt) +{ + struct combine_diff_path *paths = NULL; + int i, num_parent = parents->nr; + + int output_format = opt->output_format; + const char *orderfile = opt->orderfile; + + opt->output_format = DIFF_FORMAT_NO_OUTPUT; + /* tell diff_tree to emit paths in sorted (=tree) order */ + opt->orderfile = NULL; + + for (i = 0; i < num_parent; i++) { + /* + * show stat against the first parent even when doing + * combined diff. + */ + int stat_opt = (output_format & + (DIFF_FORMAT_NUMSTAT|DIFF_FORMAT_DIFFSTAT)); + if (i == 0 && stat_opt) + opt->output_format = stat_opt; + else + opt->output_format = DIFF_FORMAT_NO_OUTPUT; + diff_tree_sha1(parents->sha1[i], sha1, "", opt); + diffcore_std(opt); + paths = intersect_paths(paths, i, num_parent); + + /* if showing diff, show it in requested order */ + if (opt->output_format != DIFF_FORMAT_NO_OUTPUT && + orderfile) { + diffcore_order(orderfile); + } + + diff_flush(opt); + } + + opt->output_format = output_format; + opt->orderfile = orderfile; + return paths; +} + + void diff_tree_combined(const unsigned char *sha1, const struct sha1_array *parents, int dense, @@ -1308,7 +1353,7 @@ void diff_tree_combined(const unsigned char *sha1, { struct diff_options *opt = &rev->diffopt; struct diff_options diffopts; - struct combine_diff_path *p, *paths = NULL; + struct combine_diff_path *p, *paths; int i, num_paths, needsep, show_log_first, num_parent = parents->nr; /* nothing to do, if no parents */ @@ -1327,35 +1372,16 @@ void diff_tree_combined(const unsigned char *sha1, diffopts = *opt; copy_pathspec(&diffopts.pathspec, &opt->pathspec); - diffopts.output_format = DIFF_FORMAT_NO_OUTPUT; DIFF_OPT_SET(&diffopts, RECURSIVE); DIFF_OPT_CLR(&diffopts, ALLOW_EXTERNAL); - /* tell diff_tree to emit paths in sorted (=tree) order */ - diffopts.orderfile = NULL; - /* find set of paths that everybody touches */ - for (i = 0; i < num_parent; i++) { - /* show stat against the first parent even - * when doing combined diff. - */ - int stat_opt = (opt->output_format & - (DIFF_FORMAT_NUMSTAT|DIFF_FORMAT_DIFFSTAT)); - if (i == 0 && stat_opt) - diffopts.output_format = stat_opt; - else - diffopts.output_format = DIFF_FORMAT_NO_OUTPUT; - diff_tree_sha1(parents->sha1[i], sha1, "", &diffopts); - diffcore_std(&diffopts); - paths = intersect_paths(paths, i, num_parent); - - /* if showing diff, show it in requested order */ - if (diffopts.output_format != DIFF_FORMAT_NO_OUTPUT && - opt->orderfile) { - diffcore_order(opt->orderfile); - } - - diff_flush(&diffopts); - } + /* find set of paths that everybody touches + * + * NOTE find_paths() also handles --stat, as it computes + * diff(sha1,parent_i) for all i to do the job, specifically + * for parent0. + */ + paths = find_paths(sha1, parents, &diffopts); /* find out number of surviving paths */ for (num_paths = 0, p = paths; p; p = p->next) -- cgit v0.10.2-6-g49f6 From e197c2b650316f62e6dfee4fadcb4198a03b8325 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Mon, 3 Feb 2014 16:47:17 +0400 Subject: tree-diff: no need to manually verify that there is no mode change for a path Because if there is, such two tree entries would never be compared as equal - the code in base_name_compare() explicitly compares modes, if there is a change for dir bit, even for equal paths, entries would compare as different. The code I'm removing here is from 2005 April 262e82b4 (Fix diff-tree recursion), which pre-dates base_name_compare() introduction in 958ba6c9 (Introduce "base_name_compare()" helper function) by a month. Signed-off-by: Kirill Smelkov Signed-off-by: Junio C Hamano diff --git a/tree-diff.c b/tree-diff.c index 11c3550..5810b00 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -23,6 +23,11 @@ static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2, pathlen1 = tree_entry_len(&t1->entry); pathlen2 = tree_entry_len(&t2->entry); + + /* + * NOTE files and directories *always* compare differently, + * even when having the same name. + */ cmp = base_name_compare(path1, pathlen1, mode1, path2, pathlen2, mode2); if (cmp < 0) { show_entry(opt, "-", t1, base); @@ -35,16 +40,6 @@ static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2, if (!DIFF_OPT_TST(opt, FIND_COPIES_HARDER) && !hashcmp(sha1, sha2) && mode1 == mode2) return 0; - /* - * If the filemode has changed to/from a directory from/to a regular - * file, we need to consider it a remove and an add. - */ - if (S_ISDIR(mode1) != S_ISDIR(mode2)) { - show_entry(opt, "-", t1, base); - show_entry(opt, "+", t2, base); - return 0; - } - strbuf_add(base, path1, pathlen1); if (DIFF_OPT_TST(opt, RECURSIVE) && S_ISDIR(mode1)) { if (DIFF_OPT_TST(opt, TREE_IN_RECURSIVE)) { -- cgit v0.10.2-6-g49f6 From e906612121bc9d436a3a64cd03be0537654e800c Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Mon, 3 Feb 2014 16:47:18 +0400 Subject: tree-diff: no need to pass match to skip_uninteresting() It is neither used there as input, nor the output written through it, is used outside. Signed-off-by: Kirill Smelkov Signed-off-by: Junio C Hamano diff --git a/tree-diff.c b/tree-diff.c index 5810b00..a8c2aec 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -109,13 +109,14 @@ static void show_entry(struct diff_options *opt, const char *prefix, } static void skip_uninteresting(struct tree_desc *t, struct strbuf *base, - struct diff_options *opt, - enum interesting *match) + struct diff_options *opt) { + enum interesting match; + while (t->size) { - *match = tree_entry_interesting(&t->entry, base, 0, &opt->pathspec); - if (*match) { - if (*match == all_entries_not_interesting) + match = tree_entry_interesting(&t->entry, base, 0, &opt->pathspec); + if (match) { + if (match == all_entries_not_interesting) t->size = 0; break; } @@ -128,8 +129,6 @@ int diff_tree(struct tree_desc *t1, struct tree_desc *t2, { struct strbuf base; int baselen = strlen(base_str); - enum interesting t1_match = entry_not_interesting; - enum interesting t2_match = entry_not_interesting; /* Enable recursion indefinitely */ opt->pathspec.recursive = DIFF_OPT_TST(opt, RECURSIVE); @@ -141,8 +140,8 @@ int diff_tree(struct tree_desc *t1, struct tree_desc *t2, if (diff_can_quit_early(opt)) break; if (opt->pathspec.nr) { - skip_uninteresting(t1, &base, opt, &t1_match); - skip_uninteresting(t2, &base, opt, &t2_match); + skip_uninteresting(t1, &base, opt); + skip_uninteresting(t2, &base, opt); } if (!t1->size) { if (!t2->size) -- cgit v0.10.2-6-g49f6 From 7e9003c1497d01f2e75a5f3df7303643fb2432c3 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Mon, 24 Feb 2014 20:21:37 +0400 Subject: tree-diff: show_tree() is not needed We don't need special code for showing added/removed subtree, because we can do the same via diff_tree_sha1, just passing NULL for absent tree. And compared to show_tree(), which was calling show_entry() for every tree entry, that would lead to the same show_entry() callings: show_tree(t): for e in t.entries: show_entry(e) diff_tree_sha1(NULL, new): /* the same applies to (old, NULL) */ diff_tree(t1=NULL, t2) ... if (!t1->size) show_entry(t2) ... and possible overhead is negligible, since after the patch, timing for `git log --raw --no-abbrev --no-renames` for navy.git and `linux.git v3.10..v3.11` is practically the same. So let's say goodbye to show_tree() - it removes some code, but also, and what is important, consolidates more code for showing/recursing into trees into one place. Signed-off-by: Kirill Smelkov Signed-off-by: Junio C Hamano diff --git a/tree-diff.c b/tree-diff.c index a8c2aec..2ad7788 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -55,25 +55,7 @@ static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2, return 0; } -/* A whole sub-tree went away or appeared */ -static void show_tree(struct diff_options *opt, const char *prefix, - struct tree_desc *desc, struct strbuf *base) -{ - enum interesting match = entry_not_interesting; - for (; desc->size; update_tree_entry(desc)) { - if (match != all_entries_interesting) { - match = tree_entry_interesting(&desc->entry, base, 0, - &opt->pathspec); - if (match == all_entries_not_interesting) - break; - if (match == entry_not_interesting) - continue; - } - show_entry(opt, prefix, desc, base); - } -} - -/* A file entry went away or appeared */ +/* An entry went away or appeared */ static void show_entry(struct diff_options *opt, const char *prefix, struct tree_desc *desc, struct strbuf *base) { @@ -85,23 +67,12 @@ static void show_entry(struct diff_options *opt, const char *prefix, strbuf_add(base, path, pathlen); if (DIFF_OPT_TST(opt, RECURSIVE) && S_ISDIR(mode)) { - enum object_type type; - struct tree_desc inner; - void *tree; - unsigned long size; - - tree = read_sha1_file(sha1, &type, &size); - if (!tree || type != OBJ_TREE) - die("corrupt tree sha %s", sha1_to_hex(sha1)); - if (DIFF_OPT_TST(opt, TREE_IN_RECURSIVE)) opt->add_remove(opt, *prefix, mode, sha1, 1, base->buf, 0); strbuf_addch(base, '/'); - - init_tree_desc(&inner, tree, size); - show_tree(opt, prefix, &inner, base); - free(tree); + diff_tree_sha1(*prefix == '-' ? sha1 : NULL, + *prefix == '+' ? sha1 : NULL, base->buf, opt); } else opt->add_remove(opt, prefix[0], mode, sha1, 1, base->buf, 0); -- cgit v0.10.2-6-g49f6 From d00e980c224d4b65972dda4474fbd9294bdb185f Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Mon, 24 Feb 2014 20:21:38 +0400 Subject: tree-diff: consolidate code for emitting diffs and recursion in one place Currently both compare_tree_entry() and show_entry() invoke opt diff callbacks (opt->add_remove() and opt->change()), and also they both have code which decides whether to recurse into sub-tree, and whether to emit a tree as separate entry if DIFF_OPT_TREE_IN_RECURSIVE is set. I.e. we have code duplication and logic scattered on two places. Let's consolidate it - all diff emiting code and recurion logic moves to show_entry, which is now named as show_path, because it shows diff for a path, based on up to two tree entries, with actual diff emitting code being kept in new helper emit_diff() for clarity. What we have as the result, is that compare_tree_entry is now free from code with logic for diff generation, and also performance is not affected as timings for `git log --raw --no-abbrev --no-renames` for navy.git and `linux.git v3.10..v3.11`, just like in previous patch, stay the same. Signed-off-by: Kirill Smelkov Signed-off-by: Junio C Hamano diff --git a/tree-diff.c b/tree-diff.c index 2ad7788..1af8219 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -6,8 +6,8 @@ #include "diffcore.h" #include "tree.h" -static void show_entry(struct diff_options *opt, const char *prefix, - struct tree_desc *desc, struct strbuf *base); +static void show_path(struct strbuf *base, struct diff_options *opt, + struct tree_desc *t1, struct tree_desc *t2); static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2, struct strbuf *base, struct diff_options *opt) @@ -16,7 +16,6 @@ static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2, const char *path1, *path2; const unsigned char *sha1, *sha2; int cmp, pathlen1, pathlen2; - int old_baselen = base->len; sha1 = tree_entry_extract(t1, &path1, &mode1); sha2 = tree_entry_extract(t2, &path2, &mode2); @@ -30,51 +29,104 @@ static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2, */ cmp = base_name_compare(path1, pathlen1, mode1, path2, pathlen2, mode2); if (cmp < 0) { - show_entry(opt, "-", t1, base); + show_path(base, opt, t1, /*t2=*/NULL); return -1; } if (cmp > 0) { - show_entry(opt, "+", t2, base); + show_path(base, opt, /*t1=*/NULL, t2); return 1; } if (!DIFF_OPT_TST(opt, FIND_COPIES_HARDER) && !hashcmp(sha1, sha2) && mode1 == mode2) return 0; - strbuf_add(base, path1, pathlen1); - if (DIFF_OPT_TST(opt, RECURSIVE) && S_ISDIR(mode1)) { - if (DIFF_OPT_TST(opt, TREE_IN_RECURSIVE)) { - opt->change(opt, mode1, mode2, - sha1, sha2, 1, 1, base->buf, 0, 0); + show_path(base, opt, t1, t2); + return 0; +} + + +/* convert path, t1/t2 -> opt->diff_*() callbacks */ +static void emit_diff(struct diff_options *opt, struct strbuf *path, + struct tree_desc *t1, struct tree_desc *t2) +{ + unsigned int mode1 = t1 ? t1->entry.mode : 0; + unsigned int mode2 = t2 ? t2->entry.mode : 0; + + if (mode1 && mode2) { + opt->change(opt, mode1, mode2, t1->entry.sha1, t2->entry.sha1, + 1, 1, path->buf, 0, 0); + } + else { + const unsigned char *sha1; + unsigned int mode; + int addremove; + + if (mode2) { + addremove = '+'; + sha1 = t2->entry.sha1; + mode = mode2; + } else { + addremove = '-'; + sha1 = t1->entry.sha1; + mode = mode1; } - strbuf_addch(base, '/'); - diff_tree_sha1(sha1, sha2, base->buf, opt); - } else { - opt->change(opt, mode1, mode2, sha1, sha2, 1, 1, base->buf, 0, 0); + + opt->add_remove(opt, addremove, mode, sha1, 1, path->buf, 0); } - strbuf_setlen(base, old_baselen); - return 0; } -/* An entry went away or appeared */ -static void show_entry(struct diff_options *opt, const char *prefix, - struct tree_desc *desc, struct strbuf *base) + +/* new path should be added to diff + * + * 3 cases on how/when it should be called and behaves: + * + * !t1, t2 -> path added, parent lacks it + * t1, !t2 -> path removed from parent + * t1, t2 -> path modified + */ +static void show_path(struct strbuf *base, struct diff_options *opt, + struct tree_desc *t1, struct tree_desc *t2) { unsigned mode; const char *path; - const unsigned char *sha1 = tree_entry_extract(desc, &path, &mode); - int pathlen = tree_entry_len(&desc->entry); + int pathlen; int old_baselen = base->len; + int isdir, recurse = 0, emitthis = 1; + + /* at least something has to be valid */ + assert(t1 || t2); + + if (t2) { + /* path present in resulting tree */ + tree_entry_extract(t2, &path, &mode); + pathlen = tree_entry_len(&t2->entry); + isdir = S_ISDIR(mode); + } else { + /* + * a path was removed - take path from parent. Also take + * mode from parent, to decide on recursion. + */ + tree_entry_extract(t1, &path, &mode); + pathlen = tree_entry_len(&t1->entry); + + isdir = S_ISDIR(mode); + mode = 0; + } + + if (DIFF_OPT_TST(opt, RECURSIVE) && isdir) { + recurse = 1; + emitthis = DIFF_OPT_TST(opt, TREE_IN_RECURSIVE); + } strbuf_add(base, path, pathlen); - if (DIFF_OPT_TST(opt, RECURSIVE) && S_ISDIR(mode)) { - if (DIFF_OPT_TST(opt, TREE_IN_RECURSIVE)) - opt->add_remove(opt, *prefix, mode, sha1, 1, base->buf, 0); + if (emitthis) + emit_diff(opt, base, t1, t2); + + if (recurse) { strbuf_addch(base, '/'); - diff_tree_sha1(*prefix == '-' ? sha1 : NULL, - *prefix == '+' ? sha1 : NULL, base->buf, opt); - } else - opt->add_remove(opt, prefix[0], mode, sha1, 1, base->buf, 0); + diff_tree_sha1(t1 ? t1->entry.sha1 : NULL, + t2 ? t2->entry.sha1 : NULL, base->buf, opt); + } strbuf_setlen(base, old_baselen); } @@ -117,12 +169,12 @@ int diff_tree(struct tree_desc *t1, struct tree_desc *t2, if (!t1->size) { if (!t2->size) break; - show_entry(opt, "+", t2, &base); + show_path(&base, opt, /*t1=*/NULL, t2); update_tree_entry(t2); continue; } if (!t2->size) { - show_entry(opt, "-", t1, &base); + show_path(&base, opt, t1, /*t2=*/NULL); update_tree_entry(t1); continue; } -- cgit v0.10.2-6-g49f6 From 5dfb2bbd8d2e2e48aa3ad6c6a8f437bbe5d2a7fb Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Mon, 24 Feb 2014 20:21:39 +0400 Subject: tree-diff: don't assume compare_tree_entry() returns -1,0,1 It does, but we'll be reworking it in the next patch after it won't, and besides it is better to stick to standard strcmp/memcmp/base_name_compare/etc... convention, where comparison function returns <0, =0, >0 Regarding performance, comparing for <0, =0, >0 should be a little bit faster, than switch, because it is just 1 test-without-immediate instruction and then up to 3 conditional branches, and in switch you have up to 3 tests with immediate and up to 3 conditional branches. No worry, that update_tree_entry(t2) is duplicated for =0 and >0 - it will be good after we'll be adding support for multiparent walker and will stay that way. =0 case goes first, because it happens more often in real diffs - i.e. paths are the same. Signed-off-by: Kirill Smelkov Signed-off-by: Junio C Hamano diff --git a/tree-diff.c b/tree-diff.c index 1af8219..6a677da 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -178,18 +178,24 @@ int diff_tree(struct tree_desc *t1, struct tree_desc *t2, update_tree_entry(t1); continue; } - switch (compare_tree_entry(t1, t2, &base, opt)) { - case -1: + + cmp = compare_tree_entry(t1, t2, &base, opt); + + /* t1 = t2 */ + if (cmp == 0) { update_tree_entry(t1); - continue; - case 0: + update_tree_entry(t2); + } + + /* t1 < t2 */ + else if (cmp < 0) { update_tree_entry(t1); - /* Fallthrough */ - case 1: + } + + /* t1 > t2 */ + else { update_tree_entry(t2); - continue; } - die("git diff-tree: internal error"); } strbuf_release(&base); -- cgit v0.10.2-6-g49f6 From 903bba68abb8153f181d854e9a7075b893f1303f Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Mon, 24 Feb 2014 20:21:40 +0400 Subject: tree-diff: move all action-taking code out of compare_tree_entry() - let it do only comparison. This way the code is cleaner and more structured - cmp function only compares, and the driver takes action based on comparison result. There should be no change in performance, as effectively, we just move if series from on place into another, and merge it to was-already-there same switch/if, so the result is maybe a little bit faster. Signed-off-by: Kirill Smelkov Signed-off-by: Junio C Hamano diff --git a/tree-diff.c b/tree-diff.c index 6a677da..ce3ca72 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -9,8 +9,7 @@ static void show_path(struct strbuf *base, struct diff_options *opt, struct tree_desc *t1, struct tree_desc *t2); -static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2, - struct strbuf *base, struct diff_options *opt) +static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2) { unsigned mode1, mode2; const char *path1, *path2; @@ -28,19 +27,7 @@ static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2, * even when having the same name. */ cmp = base_name_compare(path1, pathlen1, mode1, path2, pathlen2, mode2); - if (cmp < 0) { - show_path(base, opt, t1, /*t2=*/NULL); - return -1; - } - if (cmp > 0) { - show_path(base, opt, /*t1=*/NULL, t2); - return 1; - } - if (!DIFF_OPT_TST(opt, FIND_COPIES_HARDER) && !hashcmp(sha1, sha2) && mode1 == mode2) - return 0; - - show_path(base, opt, t1, t2); - return 0; + return cmp; } @@ -160,6 +147,8 @@ int diff_tree(struct tree_desc *t1, struct tree_desc *t2, strbuf_add(&base, base_str, baselen); for (;;) { + int cmp; + if (diff_can_quit_early(opt)) break; if (opt->pathspec.nr) { @@ -179,21 +168,28 @@ int diff_tree(struct tree_desc *t1, struct tree_desc *t2, continue; } - cmp = compare_tree_entry(t1, t2, &base, opt); + cmp = compare_tree_entry(t1, t2); /* t1 = t2 */ if (cmp == 0) { + if (DIFF_OPT_TST(opt, FIND_COPIES_HARDER) || + hashcmp(t1->entry.sha1, t2->entry.sha1) || + (t1->entry.mode != t2->entry.mode)) + show_path(&base, opt, t1, t2); + update_tree_entry(t1); update_tree_entry(t2); } /* t1 < t2 */ else if (cmp < 0) { + show_path(&base, opt, t1, /*t2=*/NULL); update_tree_entry(t1); } /* t1 > t2 */ else { + show_path(&base, opt, /*t1=*/NULL, t2); update_tree_entry(t2); } } -- cgit v0.10.2-6-g49f6 From 9bc0619655dfd183eb5d0d47b4d63a511acd5c64 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Mon, 24 Feb 2014 20:21:41 +0400 Subject: tree-diff: rename compare_tree_entry -> tree_entry_pathcmp Since previous commit, this function does not compare entry hashes, and mode are compared fully outside of it. So what it does is compare entry names and DIR bit in modes. Reflect this in its name. Add documentation stating the semantics, and move the note about files/dirs comparison to it. Signed-off-by: Kirill Smelkov Signed-off-by: Junio C Hamano diff --git a/tree-diff.c b/tree-diff.c index ce3ca72..58e790a 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -9,7 +9,14 @@ static void show_path(struct strbuf *base, struct diff_options *opt, struct tree_desc *t1, struct tree_desc *t2); -static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2) +/* + * Compare two tree entries, taking into account only path/S_ISDIR(mode), + * but not their sha1's. + * + * NOTE files and directories *always* compare differently, even when having + * the same name - thanks to base_name_compare(). + */ +static int tree_entry_pathcmp(struct tree_desc *t1, struct tree_desc *t2) { unsigned mode1, mode2; const char *path1, *path2; @@ -22,10 +29,6 @@ static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2) pathlen1 = tree_entry_len(&t1->entry); pathlen2 = tree_entry_len(&t2->entry); - /* - * NOTE files and directories *always* compare differently, - * even when having the same name. - */ cmp = base_name_compare(path1, pathlen1, mode1, path2, pathlen2, mode2); return cmp; } @@ -168,7 +171,7 @@ int diff_tree(struct tree_desc *t1, struct tree_desc *t2, continue; } - cmp = compare_tree_entry(t1, t2); + cmp = tree_entry_pathcmp(t1, t2); /* t1 = t2 */ if (cmp == 0) { -- cgit v0.10.2-6-g49f6 From 5acabd84a6656a4f568f0e934cc9e1b344ed39d4 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Mon, 24 Feb 2014 20:21:42 +0400 Subject: tree-diff: show_path prototype is not needed anymore We moved all action-taking code below show_path() in recent HEAD~~ (tree-diff: move all action-taking code out of compare_tree_entry). Signed-off-by: Kirill Smelkov Signed-off-by: Junio C Hamano diff --git a/tree-diff.c b/tree-diff.c index 58e790a..73fa9bea 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -6,9 +6,6 @@ #include "diffcore.h" #include "tree.h" -static void show_path(struct strbuf *base, struct diff_options *opt, - struct tree_desc *t1, struct tree_desc *t2); - /* * Compare two tree entries, taking into account only path/S_ISDIR(mode), * but not their sha1's. -- cgit v0.10.2-6-g49f6 From 1a27a1545230a5a19a3c05ed41efb8530d2fd2cd Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Mon, 24 Feb 2014 20:21:43 +0400 Subject: tree-diff: simplify tree_entry_pathcmp Since an earlier "Finally switch over tree descriptors to contain a pre-parsed entry", we can safely access all tree_desc->entry fields directly instead of first "extracting" them through tree_entry_extract. Use it. The code generated stays the same - only it now visually looks cleaner. Signed-off-by: Kirill Smelkov Signed-off-by: Junio C Hamano diff --git a/tree-diff.c b/tree-diff.c index 73fa9bea..f8b2607 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -15,18 +15,13 @@ */ static int tree_entry_pathcmp(struct tree_desc *t1, struct tree_desc *t2) { - unsigned mode1, mode2; - const char *path1, *path2; - const unsigned char *sha1, *sha2; - int cmp, pathlen1, pathlen2; + struct name_entry *e1, *e2; + int cmp; - sha1 = tree_entry_extract(t1, &path1, &mode1); - sha2 = tree_entry_extract(t2, &path2, &mode2); - - pathlen1 = tree_entry_len(&t1->entry); - pathlen2 = tree_entry_len(&t2->entry); - - cmp = base_name_compare(path1, pathlen1, mode1, path2, pathlen2, mode2); + e1 = &t1->entry; + e2 = &t2->entry; + cmp = base_name_compare(e1->path, tree_entry_len(e1), e1->mode, + e2->path, tree_entry_len(e2), e2->mode); return cmp; } -- cgit v0.10.2-6-g49f6 From 6ca844e9f5ca93efd0a1323a9c9aaa882c0043e8 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Mon, 24 Feb 2014 20:21:44 +0400 Subject: tree-diff: remove special-case diff-emitting code for empty-tree cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While walking trees, we iterate their entries from lowest to highest in sort order, so empty tree means all entries were already went over. If we artificially assign +infinity value to such tree "entry", it will go after all usual entries, and through the usual driver loop we will be taking the same actions, which were hand-coded for special cases, i.e. t1 empty, t2 non-empty pathcmp(+∞, t2) -> +1 show_path(/*t1=*/NULL, t2); /* = t1 > t2 case in main loop */ t1 non-empty, t2-empty pathcmp(t1, +∞) -> -1 show_path(t1, /*t2=*/NULL); /* = t1 < t2 case in main loop */ In other words when we have t1 and t2, we return a sign that tells the caller to indicate the "earlier" one to be emitted, and by returning the sign that causes the non-empty side to be emitted, we will automatically cause the entries from the remaining side to be emitted, without attempting to touch the empty side at all. We can teach tree_entry_pathcmp() to pretend that an empty tree has an element that sorts after anything else to achieve this. Right now we never go to when compared tree descriptors are both infinity, as this condition is checked in the loop beginning as finishing criteria, but will do so in the future, when there will be several parents iterated simultaneously, and some pair of them would run to the end. Signed-off-by: Kirill Smelkov Signed-off-by: Junio C Hamano diff --git a/tree-diff.c b/tree-diff.c index f8b2607..8e04002 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -12,12 +12,24 @@ * * NOTE files and directories *always* compare differently, even when having * the same name - thanks to base_name_compare(). + * + * NOTE empty (=invalid) descriptor(s) take part in comparison as +infty, + * so that they sort *after* valid tree entries. + * + * Due to this convention, if trees are scanned in sorted order, all + * non-empty descriptors will be processed first. */ static int tree_entry_pathcmp(struct tree_desc *t1, struct tree_desc *t2) { struct name_entry *e1, *e2; int cmp; + /* empty descriptors sort after valid tree entries */ + if (!t1->size) + return t2->size ? 1 : 0; + else if (!t2->size) + return -1; + e1 = &t1->entry; e2 = &t2->entry; cmp = base_name_compare(e1->path, tree_entry_len(e1), e1->mode, @@ -150,18 +162,8 @@ int diff_tree(struct tree_desc *t1, struct tree_desc *t2, skip_uninteresting(t1, &base, opt); skip_uninteresting(t2, &base, opt); } - if (!t1->size) { - if (!t2->size) - break; - show_path(&base, opt, /*t1=*/NULL, t2); - update_tree_entry(t2); - continue; - } - if (!t2->size) { - show_path(&base, opt, t1, /*t2=*/NULL); - update_tree_entry(t1); - continue; - } + if (!t1->size && !t2->size) + break; cmp = tree_entry_pathcmp(t1, t2); -- cgit v0.10.2-6-g49f6 From ad6f3cc7d2eaec5247b39e9dca8e55a0f98123e7 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Mon, 24 Feb 2014 20:21:45 +0400 Subject: tree-diff: diff_tree() should now be static We reworked all its users to use the functionality through diff_tree_sha1 variant in recent patches (see "tree-diff: allow diff_tree_sha1 to accept NULL sha1" and what comes next). diff_tree() is now not used outside tree-diff.c - make it static. Signed-off-by: Kirill Smelkov Signed-off-by: Junio C Hamano diff --git a/diff.h b/diff.h index e79f3b3..5d7b9f7 100644 --- a/diff.h +++ b/diff.h @@ -189,8 +189,6 @@ const char *diff_line_prefix(struct diff_options *); extern const char mime_boundary_leader[]; -extern int diff_tree(struct tree_desc *t1, struct tree_desc *t2, - const char *base, struct diff_options *opt); extern int diff_tree_sha1(const unsigned char *old, const unsigned char *new, const char *base, struct diff_options *opt); extern int diff_root_tree_sha1(const unsigned char *new, const char *base, diff --git a/tree-diff.c b/tree-diff.c index 8e04002..0e43906 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -141,8 +141,8 @@ static void skip_uninteresting(struct tree_desc *t, struct strbuf *base, } } -int diff_tree(struct tree_desc *t1, struct tree_desc *t2, - const char *base_str, struct diff_options *opt) +static int diff_tree(struct tree_desc *t1, struct tree_desc *t2, + const char *base_str, struct diff_options *opt) { struct strbuf base; int baselen = strlen(base_str); -- cgit v0.10.2-6-g49f6 From 52894e70951518e44c064cef2561aed38202fd36 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Thu, 27 Mar 2014 18:24:38 +0400 Subject: tree-diff: rework diff_tree interface to be sha1 based In the next commit this will allow to reduce intermediate calls, when recursing into subtrees - at that stage we know only subtree sha1, and it is natural for tree walker to start from that phase. For now we do diff_tree show_path diff_tree_sha1 diff_tree ... and the change will allow to reduce it to diff_tree show_path diff_tree Also, it will allow to omit allocating strbuf for each subtree, and just reuse the common strbuf via playing with its len. The above-mentioned improvements go in the next 2 patches. The downside is that try_to_follow_renames(), if active, we cause re-reading of 2 initial trees, which was negligible based on my timings, and which is outweighed cogently by the upsides. NOTE To keep with the current interface and semantics, I needed to rename the function from diff_tree() to diff_tree_sha1(). As diff_tree_sha1() was already used, and the function we are talking here is its more low-level helper, let's use convention for prefixing such helpers with "ll_". So the final renaming is diff_tree() -> ll_diff_tree_sha1() Signed-off-by: Kirill Smelkov Signed-off-by: Junio C Hamano diff --git a/tree-diff.c b/tree-diff.c index 0e43906..55d8146 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -141,12 +141,17 @@ static void skip_uninteresting(struct tree_desc *t, struct strbuf *base, } } -static int diff_tree(struct tree_desc *t1, struct tree_desc *t2, - const char *base_str, struct diff_options *opt) +static int ll_diff_tree_sha1(const unsigned char *old, const unsigned char *new, + const char *base_str, struct diff_options *opt) { + struct tree_desc t1, t2; + void *t1tree, *t2tree; struct strbuf base; int baselen = strlen(base_str); + t1tree = fill_tree_descriptor(&t1, old); + t2tree = fill_tree_descriptor(&t2, new); + /* Enable recursion indefinitely */ opt->pathspec.recursive = DIFF_OPT_TST(opt, RECURSIVE); @@ -159,39 +164,41 @@ static int diff_tree(struct tree_desc *t1, struct tree_desc *t2, if (diff_can_quit_early(opt)) break; if (opt->pathspec.nr) { - skip_uninteresting(t1, &base, opt); - skip_uninteresting(t2, &base, opt); + skip_uninteresting(&t1, &base, opt); + skip_uninteresting(&t2, &base, opt); } - if (!t1->size && !t2->size) + if (!t1.size && !t2.size) break; - cmp = tree_entry_pathcmp(t1, t2); + cmp = tree_entry_pathcmp(&t1, &t2); /* t1 = t2 */ if (cmp == 0) { if (DIFF_OPT_TST(opt, FIND_COPIES_HARDER) || - hashcmp(t1->entry.sha1, t2->entry.sha1) || - (t1->entry.mode != t2->entry.mode)) - show_path(&base, opt, t1, t2); + hashcmp(t1.entry.sha1, t2.entry.sha1) || + (t1.entry.mode != t2.entry.mode)) + show_path(&base, opt, &t1, &t2); - update_tree_entry(t1); - update_tree_entry(t2); + update_tree_entry(&t1); + update_tree_entry(&t2); } /* t1 < t2 */ else if (cmp < 0) { - show_path(&base, opt, t1, /*t2=*/NULL); - update_tree_entry(t1); + show_path(&base, opt, &t1, /*t2=*/NULL); + update_tree_entry(&t1); } /* t1 > t2 */ else { - show_path(&base, opt, /*t1=*/NULL, t2); - update_tree_entry(t2); + show_path(&base, opt, /*t1=*/NULL, &t2); + update_tree_entry(&t2); } } strbuf_release(&base); + free(t2tree); + free(t1tree); return 0; } @@ -206,7 +213,7 @@ static inline int diff_might_be_rename(void) !DIFF_FILE_VALID(diff_queued_diff.queue[0]->one); } -static void try_to_follow_renames(struct tree_desc *t1, struct tree_desc *t2, const char *base, struct diff_options *opt) +static void try_to_follow_renames(const unsigned char *old, const unsigned char *new, const char *base, struct diff_options *opt) { struct diff_options diff_opts; struct diff_queue_struct *q = &diff_queued_diff; @@ -244,7 +251,7 @@ static void try_to_follow_renames(struct tree_desc *t1, struct tree_desc *t2, co diff_opts.break_opt = opt->break_opt; diff_opts.rename_score = opt->rename_score; diff_setup_done(&diff_opts); - diff_tree(t1, t2, base, &diff_opts); + ll_diff_tree_sha1(old, new, base, &diff_opts); diffcore_std(&diff_opts); free_pathspec(&diff_opts.pathspec); @@ -305,23 +312,12 @@ static void try_to_follow_renames(struct tree_desc *t1, struct tree_desc *t2, co int diff_tree_sha1(const unsigned char *old, const unsigned char *new, const char *base, struct diff_options *opt) { - void *tree1, *tree2; - struct tree_desc t1, t2; - unsigned long size1, size2; int retval; - tree1 = fill_tree_descriptor(&t1, old); - tree2 = fill_tree_descriptor(&t2, new); - size1 = t1.size; - size2 = t2.size; - retval = diff_tree(&t1, &t2, base, opt); - if (!*base && DIFF_OPT_TST(opt, FOLLOW_RENAMES) && diff_might_be_rename()) { - init_tree_desc(&t1, tree1, size1); - init_tree_desc(&t2, tree2, size2); - try_to_follow_renames(&t1, &t2, base, opt); - } - free(tree1); - free(tree2); + retval = ll_diff_tree_sha1(old, new, base, opt); + if (!*base && DIFF_OPT_TST(opt, FOLLOW_RENAMES) && diff_might_be_rename()) + try_to_follow_renames(old, new, base, opt); + return retval; } -- cgit v0.10.2-6-g49f6 From b9081a657446ac2c5e2129de183edb41d4b4f4fb Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Thu, 27 Mar 2014 18:21:29 +0400 Subject: tree-diff: no need to call "full" diff_tree_sha1 from show_path() As described in previous commit, when recursing into sub-trees, we can use lower-level tree walker, since its interface is now sha1 based. The change is ok, because diff_tree_sha1() only invokes ll_diff_tree_sha1(), and also, if base is empty, try_to_follow_renames(). But base is not empty here, as we have added a path and '/' before recursing. Signed-off-by: Kirill Smelkov Signed-off-by: Junio C Hamano diff --git a/tree-diff.c b/tree-diff.c index 55d8146..80527c0 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -6,6 +6,10 @@ #include "diffcore.h" #include "tree.h" + +static int ll_diff_tree_sha1(const unsigned char *old, const unsigned char *new, + const char *base_str, struct diff_options *opt); + /* * Compare two tree entries, taking into account only path/S_ISDIR(mode), * but not their sha1's. @@ -118,8 +122,8 @@ static void show_path(struct strbuf *base, struct diff_options *opt, if (recurse) { strbuf_addch(base, '/'); - diff_tree_sha1(t1 ? t1->entry.sha1 : NULL, - t2 ? t2->entry.sha1 : NULL, base->buf, opt); + ll_diff_tree_sha1(t1 ? t1->entry.sha1 : NULL, + t2 ? t2->entry.sha1 : NULL, base->buf, opt); } strbuf_setlen(base, old_baselen); -- cgit v0.10.2-6-g49f6 From 12cd81743dc4645ef909b0c38582f5714c9a8ff7 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Thu, 27 Mar 2014 18:22:07 +0400 Subject: tree-diff: reuse base str(buf) memory on sub-tree recursion Instead of allocating it all the time for every subtree in ll_diff_tree_sha1, let's allocate it once in diff_tree_sha1, and then all callee just use it in stacking style, without memory allocations. This should be faster, and for me this change gives the following slight speedups for git log --raw --no-abbrev --no-renames --format='%H' navy.git linux.git v3.10..v3.11 before 0.618s 1.903s after 0.611s 1.889s speedup 1.1% 0.7% Signed-off-by: Kirill Smelkov Signed-off-by: Junio C Hamano diff --git a/tree-diff.c b/tree-diff.c index 80527c0..278acc8 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -8,7 +8,7 @@ static int ll_diff_tree_sha1(const unsigned char *old, const unsigned char *new, - const char *base_str, struct diff_options *opt); + struct strbuf *base, struct diff_options *opt); /* * Compare two tree entries, taking into account only path/S_ISDIR(mode), @@ -123,7 +123,7 @@ static void show_path(struct strbuf *base, struct diff_options *opt, if (recurse) { strbuf_addch(base, '/'); ll_diff_tree_sha1(t1 ? t1->entry.sha1 : NULL, - t2 ? t2->entry.sha1 : NULL, base->buf, opt); + t2 ? t2->entry.sha1 : NULL, base, opt); } strbuf_setlen(base, old_baselen); @@ -146,12 +146,10 @@ static void skip_uninteresting(struct tree_desc *t, struct strbuf *base, } static int ll_diff_tree_sha1(const unsigned char *old, const unsigned char *new, - const char *base_str, struct diff_options *opt) + struct strbuf *base, struct diff_options *opt) { struct tree_desc t1, t2; void *t1tree, *t2tree; - struct strbuf base; - int baselen = strlen(base_str); t1tree = fill_tree_descriptor(&t1, old); t2tree = fill_tree_descriptor(&t2, new); @@ -159,17 +157,14 @@ static int ll_diff_tree_sha1(const unsigned char *old, const unsigned char *new, /* Enable recursion indefinitely */ opt->pathspec.recursive = DIFF_OPT_TST(opt, RECURSIVE); - strbuf_init(&base, PATH_MAX); - strbuf_add(&base, base_str, baselen); - for (;;) { int cmp; if (diff_can_quit_early(opt)) break; if (opt->pathspec.nr) { - skip_uninteresting(&t1, &base, opt); - skip_uninteresting(&t2, &base, opt); + skip_uninteresting(&t1, base, opt); + skip_uninteresting(&t2, base, opt); } if (!t1.size && !t2.size) break; @@ -181,7 +176,7 @@ static int ll_diff_tree_sha1(const unsigned char *old, const unsigned char *new, if (DIFF_OPT_TST(opt, FIND_COPIES_HARDER) || hashcmp(t1.entry.sha1, t2.entry.sha1) || (t1.entry.mode != t2.entry.mode)) - show_path(&base, opt, &t1, &t2); + show_path(base, opt, &t1, &t2); update_tree_entry(&t1); update_tree_entry(&t2); @@ -189,18 +184,17 @@ static int ll_diff_tree_sha1(const unsigned char *old, const unsigned char *new, /* t1 < t2 */ else if (cmp < 0) { - show_path(&base, opt, &t1, /*t2=*/NULL); + show_path(base, opt, &t1, /*t2=*/NULL); update_tree_entry(&t1); } /* t1 > t2 */ else { - show_path(&base, opt, /*t1=*/NULL, &t2); + show_path(base, opt, /*t1=*/NULL, &t2); update_tree_entry(&t2); } } - strbuf_release(&base); free(t2tree); free(t1tree); return 0; @@ -217,7 +211,7 @@ static inline int diff_might_be_rename(void) !DIFF_FILE_VALID(diff_queued_diff.queue[0]->one); } -static void try_to_follow_renames(const unsigned char *old, const unsigned char *new, const char *base, struct diff_options *opt) +static void try_to_follow_renames(const unsigned char *old, const unsigned char *new, struct strbuf *base, struct diff_options *opt) { struct diff_options diff_opts; struct diff_queue_struct *q = &diff_queued_diff; @@ -314,13 +308,19 @@ static void try_to_follow_renames(const unsigned char *old, const unsigned char q->nr = 1; } -int diff_tree_sha1(const unsigned char *old, const unsigned char *new, const char *base, struct diff_options *opt) +int diff_tree_sha1(const unsigned char *old, const unsigned char *new, const char *base_str, struct diff_options *opt) { + struct strbuf base; int retval; - retval = ll_diff_tree_sha1(old, new, base, opt); - if (!*base && DIFF_OPT_TST(opt, FOLLOW_RENAMES) && diff_might_be_rename()) - try_to_follow_renames(old, new, base, opt); + strbuf_init(&base, PATH_MAX); + strbuf_addstr(&base, base_str); + + retval = ll_diff_tree_sha1(old, new, &base, opt); + if (!*base_str && DIFF_OPT_TST(opt, FOLLOW_RENAMES) && diff_might_be_rename()) + try_to_follow_renames(old, new, &base, opt); + + strbuf_release(&base); return retval; } -- cgit v0.10.2-6-g49f6 From 61f76a3612db199a9eb9090c2605d8fc35ffc41c Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Thu, 27 Mar 2014 18:22:50 +0400 Subject: Portable alloca for Git In the next patch we'll have to use alloca() for performance reasons, but since alloca is non-standardized and is not portable, let's have a trick with compatibility wrappers: 1. at configure time, determine, do we have working alloca() through alloca.h, and define #define HAVE_ALLOCA_H if yes. 2. in code #ifdef HAVE_ALLOCA_H # include # define xalloca(size) (alloca(size)) # define xalloca_free(p) do {} while(0) #else # define xalloca(size) (xmalloc(size)) # define xalloca_free(p) (free(p)) #endif and use it like func() { p = xalloca(size); ... xalloca_free(p); } This way, for systems, where alloca is available, we'll have optimal on-stack allocations with fast executions. On the other hand, on systems, where alloca is not available, this gracefully fallbacks to xmalloc/free. Both autoconf and config.mak.uname configurations were updated. For autoconf, we are not bothering considering cases, when no alloca.h is available, but alloca() works some other way - its simply alloca.h is available and works or not, everything else is deep legacy. For config.mak.uname, I've tried to make my almost-sure guess for where alloca() is available, but since I only have access to Linux it is the only change I can be sure about myself, with relevant to other changed systems people Cc'ed. NOTE SunOS and Windows had explicit -DHAVE_ALLOCA_H in their configurations. I've changed that to now-common HAVE_ALLOCA_H=YesPlease which should be correct. Cc: Brandon Casey Cc: Marius Storm-Olsen Cc: Johannes Sixt Cc: Johannes Schindelin Cc: Ramsay Jones Cc: Gerrit Pape Cc: Petr Salinger Cc: Jonathan Nieder Acked-by: Thomas Schwinge (GNU Hurd changes) Signed-off-by: Kirill Smelkov Signed-off-by: Junio C Hamano diff --git a/Makefile b/Makefile index dddaf4f..0334806 100644 --- a/Makefile +++ b/Makefile @@ -30,6 +30,8 @@ all:: # Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in # /foo/bar/include and /foo/bar/lib directories. # +# Define HAVE_ALLOCA_H if you have working alloca(3) defined in that header. +# # Define NO_CURL if you do not have libcurl installed. git-http-fetch and # git-http-push are not built, and you cannot use http:// and https:// # transports (neither smart nor dumb). @@ -1099,6 +1101,10 @@ ifdef USE_LIBPCRE EXTLIBS += -lpcre endif +ifdef HAVE_ALLOCA_H + BASIC_CFLAGS += -DHAVE_ALLOCA_H +endif + ifdef NO_CURL BASIC_CFLAGS += -DNO_CURL REMOTE_CURL_PRIMARY = diff --git a/config.mak.uname b/config.mak.uname index 7d31fad..71602ee 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -28,6 +28,7 @@ ifeq ($(uname_S),OSF1) NO_NSEC = YesPlease endif ifeq ($(uname_S),Linux) + HAVE_ALLOCA_H = YesPlease NO_STRLCPY = YesPlease NO_MKSTEMPS = YesPlease HAVE_PATHS_H = YesPlease @@ -35,6 +36,7 @@ ifeq ($(uname_S),Linux) HAVE_DEV_TTY = YesPlease endif ifeq ($(uname_S),GNU/kFreeBSD) + HAVE_ALLOCA_H = YesPlease NO_STRLCPY = YesPlease NO_MKSTEMPS = YesPlease HAVE_PATHS_H = YesPlease @@ -103,6 +105,7 @@ ifeq ($(uname_S),SunOS) NEEDS_NSL = YesPlease SHELL_PATH = /bin/bash SANE_TOOL_PATH = /usr/xpg6/bin:/usr/xpg4/bin + HAVE_ALLOCA_H = YesPlease NO_STRCASESTR = YesPlease NO_MEMMEM = YesPlease NO_MKDTEMP = YesPlease @@ -146,7 +149,7 @@ ifeq ($(uname_S),SunOS) endif INSTALL = /usr/ucb/install TAR = gtar - BASIC_CFLAGS += -D__EXTENSIONS__ -D__sun__ -DHAVE_ALLOCA_H + BASIC_CFLAGS += -D__EXTENSIONS__ -D__sun__ endif ifeq ($(uname_O),Cygwin) ifeq ($(shell expr "$(uname_R)" : '1\.[1-6]\.'),4) @@ -166,6 +169,7 @@ ifeq ($(uname_O),Cygwin) else NO_REGEX = UnfortunatelyYes endif + HAVE_ALLOCA_H = YesPlease NEEDS_LIBICONV = YesPlease NO_FAST_WORKING_DIRECTORY = UnfortunatelyYes NO_ST_BLOCKS_IN_STRUCT_STAT = YesPlease @@ -239,6 +243,7 @@ ifeq ($(uname_S),AIX) endif ifeq ($(uname_S),GNU) # GNU/Hurd + HAVE_ALLOCA_H = YesPlease NO_STRLCPY = YesPlease NO_MKSTEMPS = YesPlease HAVE_PATHS_H = YesPlease @@ -316,6 +321,7 @@ endif ifeq ($(uname_S),Windows) GIT_VERSION := $(GIT_VERSION).MSVC pathsep = ; + HAVE_ALLOCA_H = YesPlease NO_PREAD = YesPlease NEEDS_CRYPTO_WITH_SSL = YesPlease NO_LIBGEN_H = YesPlease @@ -363,7 +369,7 @@ ifeq ($(uname_S),Windows) COMPAT_OBJS = compat/msvc.o compat/winansi.o \ compat/win32/pthread.o compat/win32/syslog.o \ compat/win32/dirent.o - COMPAT_CFLAGS = -D__USE_MINGW_ACCESS -DNOGDI -DHAVE_STRING_H -DHAVE_ALLOCA_H -Icompat -Icompat/regex -Icompat/win32 -DSTRIP_EXTENSION=\".exe\" + COMPAT_CFLAGS = -D__USE_MINGW_ACCESS -DNOGDI -DHAVE_STRING_H -Icompat -Icompat/regex -Icompat/win32 -DSTRIP_EXTENSION=\".exe\" BASIC_LDFLAGS = -IGNORE:4217 -IGNORE:4049 -NOLOGO -SUBSYSTEM:CONSOLE -NODEFAULTLIB:MSVCRT.lib EXTLIBS = user32.lib advapi32.lib shell32.lib wininet.lib ws2_32.lib PTHREAD_LIBS = diff --git a/configure.ac b/configure.ac index 2f43393..0eae704 100644 --- a/configure.ac +++ b/configure.ac @@ -272,6 +272,14 @@ AS_HELP_STRING([], [ARG can be also prefix for libpcre library and hea GIT_CONF_SUBST([LIBPCREDIR]) fi) # +# Define HAVE_ALLOCA_H if you have working alloca(3) defined in that header. +AC_FUNC_ALLOCA +case $ac_cv_working_alloca_h in + yes) HAVE_ALLOCA_H=YesPlease;; + *) HAVE_ALLOCA_H='';; +esac +GIT_CONF_SUBST([HAVE_ALLOCA_H]) +# # Define NO_CURL if you do not have curl installed. git-http-pull and # git-http-push are not built, and you cannot use http:// and https:// # transports. diff --git a/git-compat-util.h b/git-compat-util.h index cbd86c3..63b2b3b 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -526,6 +526,14 @@ extern void release_pack_memory(size_t); typedef void (*try_to_free_t)(size_t); extern try_to_free_t set_try_to_free_routine(try_to_free_t); +#ifdef HAVE_ALLOCA_H +# include +# define xalloca(size) (alloca(size)) +# define xalloca_free(p) do {} while (0) +#else +# define xalloca(size) (xmalloc(size)) +# define xalloca_free(p) (free(p)) +#endif extern char *xstrdup(const char *str); extern void *xmalloc(size_t size); extern void *xmallocz(size_t size); -- cgit v0.10.2-6-g49f6 From 72441af7c4e3bde33cdf7edafcf09c227d5d5296 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Mon, 7 Apr 2014 01:46:26 +0400 Subject: tree-diff: rework diff_tree() to generate diffs for multiparent cases as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously diff_tree(), which is now named ll_diff_tree_sha1(), was generating diff_filepair(s) for two trees t1 and t2, and that was usually used for a commit as t1=HEAD~, and t2=HEAD - i.e. to see changes a commit introduces. In Git, however, we have fundamentally built flexibility in that a commit can have many parents - 1 for a plain commit, 2 for a simple merge, but also more than 2 for merging several heads at once. For merges there is a so called combine-diff, which shows diff, a merge introduces by itself, omitting changes done by any parent. That works through first finding paths, that are different to all parents, and then showing generalized diff, with separate columns for +/- for each parent. The code lives in combine-diff.c . There is an impedance mismatch, however, in that a commit could generally have any number of parents, and that while diffing trees, we divide cases for 2-tree diffs and more-than-2-tree diffs. I mean there is no special casing for multiple parents commits in e.g. revision-walker . That impedance mismatch *hurts* *performance* *badly* for generating combined diffs - in "combine-diff: optimize combine_diff_path sets intersection" I've already removed some slowness from it, but from the timings provided there, it could be seen, that combined diffs still cost more than an order of magnitude more cpu time, compared to diff for usual commits, and that would only be an optimistic estimate, if we take into account that for e.g. linux.git there is only one merge for several dozens of plain commits. That slowness comes from the fact that currently, while generating combined diff, a lot of time is spent computing diff(commit,commit^2) just to only then intersect that huge diff to almost small set of files from diff(commit,commit^1). That's because at present, to compute combine-diff, for first finding paths, that "every parent touches", we use the following combine-diff property/definition: D(A,P1...Pn) = D(A,P1) ^ ... ^ D(A,Pn) (w.r.t. paths) where D(A,P1...Pn) is combined diff between commit A, and parents Pi and D(A,Pi) is usual two-tree diff Pi..A So if any of that D(A,Pi) is huge, tracting 1 n-parent combine-diff as n 1-parent diffs and intersecting results will be slow. And usually, for linux.git and other topic-based workflows, that D(A,P2) is huge, because, if merge-base of A and P2, is several dozens of merges (from A, via first parent) below, that D(A,P2) will be diffing sum of merges from several subsystems to 1 subsystem. The solution is to avoid computing n 1-parent diffs, and to find changed-to-all-parents paths via scanning A's and all Pi's trees simultaneously, at each step comparing their entries, and based on that comparison, populate paths result, and deduce we could *skip* *recursing* into subdirectories, if at least for 1 parent, sha1 of that dir tree is the same as in A. That would save us from doing significant amount of needless work. Such approach is very similar to what diff_tree() does, only there we deal with scanning only 2 trees simultaneously, and for n+1 tree, the logic is a bit more complex: D(T,P1...Pn) calculation scheme ------------------------------- D(T,P1...Pn) = D(T,P1) ^ ... ^ D(T,Pn) (regarding resulting paths set) D(T,Pj) - diff between T..Pj D(T,P1...Pn) - combined diff from T to parents P1,...,Pn We start from all trees, which are sorted, and compare their entries in lock-step: T P1 Pn - - - |t| |p1| |pn| |-| |--| ... |--| imin = argmin(p1...pn) | | | | | | |-| |--| |--| |.| |. | |. | . . . . . . at any time there could be 3 cases: 1) t < p[imin]; 2) t > p[imin]; 3) t = p[imin]. Schematic deduction of what every case means, and what to do, follows: 1) t < p[imin] -> ∀j t ∉ Pj -> "+t" ∈ D(T,Pj) -> D += "+t"; t↓ 2) t > p[imin] 2.1) ∃j: pj > p[imin] -> "-p[imin]" ∉ D(T,Pj) -> D += ø; ∀ pi=p[imin] pi↓ 2.2) ∀i pi = p[imin] -> pi ∉ T -> "-pi" ∈ D(T,Pi) -> D += "-p[imin]"; ∀i pi↓ 3) t = p[imin] 3.1) ∃j: pj > p[imin] -> "+t" ∈ D(T,Pj) -> only pi=p[imin] remains to investigate 3.2) pi = p[imin] -> investigate δ(t,pi) | | v 3.1+3.2) looking at δ(t,pi) ∀i: pi=p[imin] - if all != ø -> ⎧δ(t,pi) - if pi=p[imin] -> D += ⎨ ⎩"+t" - if pi>p[imin] in any case t↓ ∀ pi=p[imin] pi↓ ~ For comparison, here is how diff_tree() works: D(A,B) calculation scheme ------------------------- A B - - |a| |b| a < b -> a ∉ B -> D(A,B) += +a a↓ |-| |-| a > b -> b ∉ A -> D(A,B) += -b b↓ | | | | a = b -> investigate δ(a,b) a↓ b↓ |-| |-| |.| |.| . . . . ~~~~~~~~ This patch generalizes diff tree-walker to work with arbitrary number of parents as described above - i.e. now there is a resulting tree t, and some parents trees tp[i] i=[0..nparent). The generalization builds on the fact that usual diff D(A,B) is by definition the same as combined diff D(A,[B]), so if we could rework the code for common case and make it be not slower for nparent=1 case, usual diff(t1,t2) generation will not be slower, and multiparent diff tree-walker would greatly benefit generating combine-diff. What we do is as follows: 1) diff tree-walker ll_diff_tree_sha1() is internally reworked to be a paths generator (new name diff_tree_paths()), with each generated path being `struct combine_diff_path` with info for path, new sha1,mode and for every parent which sha1,mode it was in it. 2) From that info, we can still generate usual diff queue with struct diff_filepairs, via "exporting" generated combine_diff_path, if we know we run for nparent=1 case. (see emit_diff() which is now named emit_diff_first_parent_only()) 3) In order for diff_can_quit_early(), which checks DIFF_OPT_TST(opt, HAS_CHANGES)) to work, that exporting have to be happening not in bulk, but incrementally, one diff path at a time. For such consumers, there is a new callback in diff_options introduced: ->pathchange(opt, struct combine_diff_path *) which, if set to !NULL, is called for every generated path. (see new compat ll_diff_tree_sha1() wrapper around new paths generator for setup) 4) The paths generation itself, is reworked from previous ll_diff_tree_sha1() code according to "D(A,P1...Pn) calculation scheme" provided above: On the start we allocate [nparent] arrays in place what was earlier just for one parent tree. then we just generalize loops, and comparison according to the algorithm. Some notes(*): 1) alloca(), for small arrays, is used for "runs not slower for nparent=1 case than before" goal - if we change it to xmalloc()/free() the timings get ~1% worse. For alloca() we use just-introduced xalloca/xalloca_free compatibility wrappers, so it should not be a portability problem. 2) For every parent tree, we need to keep a tag, whether entry from that parent equals to entry from minimal parent. For performance reasons I'm keeping that tag in entry's mode field in unused bit - see S_IFXMIN_NEQ. Not doing so, we'd need to alloca another [nparent] array, which hurts performance. 3) For emitted paths, memory could be reused, if we know the path was processed via callback and will not be needed later. We use efficient hand-made realloc-style path_appendnew(), that saves us from ~1-1.5% of potential additional slowdown. 4) goto(s) are used in several places, as the code executes a little bit faster with lowered register pressure. Also - we should now check for FIND_COPIES_HARDER not only when two entries names are the same, and their hashes are equal, but also for a case, when a path was removed from some of all parents having it. The reason is, if we don't, that path won't be emitted at all (see "a > xi" case), and we'll just skip it, and FIND_COPIES_HARDER wants all paths - with diff or without - to be emitted, to be later analyzed for being copies sources. The new check is only necessary for nparent >1, as for nparent=1 case xmin_eqtotal always =1 =nparent, and a path is always added to diff as removal. ~~~~~~~~ Timings for # without -c, i.e. testing only nparent=1 case `git log --raw --no-abbrev --no-renames` before and after the patch are as follows: navy.git linux.git v3.10..v3.11 before 0.611s 1.889s after 0.619s 1.907s slowdown 1.3% 0.9% This timings show we did no harm to usual diff(tree1,tree2) generation. From the table we can see that we actually did ~1% slowdown, but I think I've "earned" that 1% in the previous patch ("tree-diff: reuse base str(buf) memory on sub-tree recursion", HEAD~~) so for nparent=1 case, net timings stays approximately the same. The output also stayed the same. (*) If we revert 1)-4) to more usual techniques, for nparent=1 case, we'll get ~2-2.5% of additional slowdown, which I've tried to avoid, as "do no harm for nparent=1 case" rule. For linux.git, combined diff will run an order of magnitude faster and appropriate timings will be provided in the next commit, as we'll be taking advantage of the new diff tree-walker for combined-diff generation there. P.S. and combined diff is not some exotic/for-play-only stuff - for example for a program I write to represent Git archives as readonly filesystem, there is initial scan with `git log --reverse --raw --no-abbrev --no-renames -c` to extract log of what was created/changed when, as a result building a map {} sha1 -> in which commit (and date) a content was added that `-c` means also show combined diff for merges, and without them, if a merge is non-trivial (merges changes from two parents with both having separate changes to a file), or an evil one, the map will not be full, i.e. some valid sha1 would be absent from it. That case was my initial motivation for combined diffs speedup. Signed-off-by: Kirill Smelkov Signed-off-by: Junio C Hamano diff --git a/cache.h b/cache.h index dc040fb..e7f5a0c 100644 --- a/cache.h +++ b/cache.h @@ -75,6 +75,21 @@ unsigned long git_deflate_bound(git_zstream *, unsigned long); #define S_ISGITLINK(m) (((m) & S_IFMT) == S_IFGITLINK) /* + * Some mode bits are also used internally for computations. + * + * They *must* not overlap with any valid modes, and they *must* not be emitted + * to outside world - i.e. appear on disk or network. In other words, it's just + * temporary fields, which we internally use, but they have to stay in-house. + * + * ( such approach is valid, as standard S_IF* fits into 16 bits, and in Git + * codebase mode is `unsigned int` which is assumed to be at least 32 bits ) + */ + +/* used internally in tree-diff */ +#define S_DIFFTREE_IFXMIN_NEQ 0x80000000 + + +/* * Intensive research over the course of many years has shown that * port 9418 is totally unused by anything else. Or * diff --git a/diff.c b/diff.c index 8e4a6a9..cda4aa8 100644 --- a/diff.c +++ b/diff.c @@ -3216,6 +3216,7 @@ void diff_setup(struct diff_options *options) options->context = diff_context_default; DIFF_OPT_SET(options, RENAME_EMPTY); + /* pathchange left =NULL by default */ options->change = diff_change; options->add_remove = diff_addremove; options->use_color = diff_use_color_default; diff --git a/diff.h b/diff.h index 5d7b9f7..0abd735 100644 --- a/diff.h +++ b/diff.h @@ -15,6 +15,10 @@ struct diff_filespec; struct userdiff_driver; struct sha1_array; struct commit; +struct combine_diff_path; + +typedef int (*pathchange_fn_t)(struct diff_options *options, + struct combine_diff_path *path); typedef void (*change_fn_t)(struct diff_options *options, unsigned old_mode, unsigned new_mode, @@ -157,6 +161,7 @@ struct diff_options { int close_file; struct pathspec pathspec; + pathchange_fn_t pathchange; change_fn_t change; add_remove_fn_t add_remove; diff_format_fn_t format_callback; @@ -189,6 +194,10 @@ const char *diff_line_prefix(struct diff_options *); extern const char mime_boundary_leader[]; +extern struct combine_diff_path *diff_tree_paths( + struct combine_diff_path *p, const unsigned char *sha1, + const unsigned char **parent_sha1, int nparent, + struct strbuf *base, struct diff_options *opt); extern int diff_tree_sha1(const unsigned char *old, const unsigned char *new, const char *base, struct diff_options *opt); extern int diff_root_tree_sha1(const unsigned char *new, const char *base, diff --git a/tree-diff.c b/tree-diff.c index 278acc8..e7b378c 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -6,7 +6,19 @@ #include "diffcore.h" #include "tree.h" +/* + * internal mode marker, saying a tree entry != entry of tp[imin] + * (see ll_diff_tree_paths for what it means there) + * + * we will update/use/emit entry for diff only with it unset. + */ +#define S_IFXMIN_NEQ S_DIFFTREE_IFXMIN_NEQ + +static struct combine_diff_path *ll_diff_tree_paths( + struct combine_diff_path *p, const unsigned char *sha1, + const unsigned char **parents_sha1, int nparent, + struct strbuf *base, struct diff_options *opt); static int ll_diff_tree_sha1(const unsigned char *old, const unsigned char *new, struct strbuf *base, struct diff_options *opt); @@ -42,71 +54,151 @@ static int tree_entry_pathcmp(struct tree_desc *t1, struct tree_desc *t2) } -/* convert path, t1/t2 -> opt->diff_*() callbacks */ -static void emit_diff(struct diff_options *opt, struct strbuf *path, - struct tree_desc *t1, struct tree_desc *t2) +/* + * convert path -> opt->diff_*() callbacks + * + * emits diff to first parent only, and tells diff tree-walker that we are done + * with p and it can be freed. + */ +static int emit_diff_first_parent_only(struct diff_options *opt, struct combine_diff_path *p) { - unsigned int mode1 = t1 ? t1->entry.mode : 0; - unsigned int mode2 = t2 ? t2->entry.mode : 0; - - if (mode1 && mode2) { - opt->change(opt, mode1, mode2, t1->entry.sha1, t2->entry.sha1, - 1, 1, path->buf, 0, 0); + struct combine_diff_parent *p0 = &p->parent[0]; + if (p->mode && p0->mode) { + opt->change(opt, p0->mode, p->mode, p0->sha1, p->sha1, + 1, 1, p->path, 0, 0); } else { const unsigned char *sha1; unsigned int mode; int addremove; - if (mode2) { + if (p->mode) { addremove = '+'; - sha1 = t2->entry.sha1; - mode = mode2; + sha1 = p->sha1; + mode = p->mode; } else { addremove = '-'; - sha1 = t1->entry.sha1; - mode = mode1; + sha1 = p0->sha1; + mode = p0->mode; } - opt->add_remove(opt, addremove, mode, sha1, 1, path->buf, 0); + opt->add_remove(opt, addremove, mode, sha1, 1, p->path, 0); } + + return 0; /* we are done with p */ } -/* new path should be added to diff +/* + * Make a new combine_diff_path from path/mode/sha1 + * and append it to paths list tail. + * + * Memory for created elements could be reused: + * + * - if last->next == NULL, the memory is allocated; + * + * - if last->next != NULL, it is assumed that p=last->next was returned + * earlier by this function, and p->next was *not* modified. + * The memory is then reused from p. + * + * so for clients, + * + * - if you do need to keep the element + * + * p = path_appendnew(p, ...); + * process(p); + * p->next = NULL; + * + * - if you don't need to keep the element after processing + * + * pprev = p; + * p = path_appendnew(p, ...); + * process(p); + * p = pprev; + * ; don't forget to free tail->next in the end + * + * p->parent[] remains uninitialized. + */ +static struct combine_diff_path *path_appendnew(struct combine_diff_path *last, + int nparent, const struct strbuf *base, const char *path, int pathlen, + unsigned mode, const unsigned char *sha1) +{ + struct combine_diff_path *p; + int len = base->len + pathlen; + int alloclen = combine_diff_path_size(nparent, len); + + /* if last->next is !NULL - it is a pre-allocated memory, we can reuse */ + p = last->next; + if (p && (alloclen > (intptr_t)p->next)) { + free(p); + p = NULL; + } + + if (!p) { + p = xmalloc(alloclen); + + /* + * until we go to it next round, .next holds how many bytes we + * allocated (for faster realloc - we don't need copying old data). + */ + p->next = (struct combine_diff_path *)(intptr_t)alloclen; + } + + last->next = p; + + p->path = (char *)&(p->parent[nparent]); + memcpy(p->path, base->buf, base->len); + memcpy(p->path + base->len, path, pathlen); + p->path[len] = 0; + p->mode = mode; + hashcpy(p->sha1, sha1 ? sha1 : null_sha1); + + return p; +} + +/* + * new path should be added to combine diff * * 3 cases on how/when it should be called and behaves: * - * !t1, t2 -> path added, parent lacks it - * t1, !t2 -> path removed from parent - * t1, t2 -> path modified + * t, !tp -> path added, all parents lack it + * !t, tp -> path removed from all parents + * t, tp -> path modified/added + * (M for tp[i]=tp[imin], A otherwise) */ -static void show_path(struct strbuf *base, struct diff_options *opt, - struct tree_desc *t1, struct tree_desc *t2) +static struct combine_diff_path *emit_path(struct combine_diff_path *p, + struct strbuf *base, struct diff_options *opt, int nparent, + struct tree_desc *t, struct tree_desc *tp, + int imin) { unsigned mode; const char *path; + const unsigned char *sha1; int pathlen; int old_baselen = base->len; - int isdir, recurse = 0, emitthis = 1; + int i, isdir, recurse = 0, emitthis = 1; /* at least something has to be valid */ - assert(t1 || t2); + assert(t || tp); - if (t2) { + if (t) { /* path present in resulting tree */ - tree_entry_extract(t2, &path, &mode); - pathlen = tree_entry_len(&t2->entry); + sha1 = tree_entry_extract(t, &path, &mode); + pathlen = tree_entry_len(&t->entry); isdir = S_ISDIR(mode); } else { /* - * a path was removed - take path from parent. Also take - * mode from parent, to decide on recursion. + * a path was removed - take path from imin parent. Also take + * mode from that parent, to decide on recursion(1). + * + * 1) all modes for tp[i]=tp[imin] should be the same wrt + * S_ISDIR, thanks to base_name_compare(). */ - tree_entry_extract(t1, &path, &mode); - pathlen = tree_entry_len(&t1->entry); + tree_entry_extract(&tp[imin], &path, &mode); + pathlen = tree_entry_len(&tp[imin].entry); isdir = S_ISDIR(mode); + sha1 = NULL; mode = 0; } @@ -115,18 +207,81 @@ static void show_path(struct strbuf *base, struct diff_options *opt, emitthis = DIFF_OPT_TST(opt, TREE_IN_RECURSIVE); } - strbuf_add(base, path, pathlen); + if (emitthis) { + int keep; + struct combine_diff_path *pprev = p; + p = path_appendnew(p, nparent, base, path, pathlen, mode, sha1); + + for (i = 0; i < nparent; ++i) { + /* + * tp[i] is valid, if present and if tp[i]==tp[imin] - + * otherwise, we should ignore it. + */ + int tpi_valid = tp && !(tp[i].entry.mode & S_IFXMIN_NEQ); + + const unsigned char *sha1_i; + unsigned mode_i; + + p->parent[i].status = + !t ? DIFF_STATUS_DELETED : + tpi_valid ? + DIFF_STATUS_MODIFIED : + DIFF_STATUS_ADDED; + + if (tpi_valid) { + sha1_i = tp[i].entry.sha1; + mode_i = tp[i].entry.mode; + } + else { + sha1_i = NULL; + mode_i = 0; + } + + p->parent[i].mode = mode_i; + hashcpy(p->parent[i].sha1, sha1_i ? sha1_i : null_sha1); + } - if (emitthis) - emit_diff(opt, base, t1, t2); + keep = 1; + if (opt->pathchange) + keep = opt->pathchange(opt, p); + + /* + * If a path was filtered or consumed - we don't need to add it + * to the list and can reuse its memory, leaving it as + * pre-allocated element on the tail. + * + * On the other hand, if path needs to be kept, we need to + * correct its .next to NULL, as it was pre-initialized to how + * much memory was allocated. + * + * see path_appendnew() for details. + */ + if (!keep) + p = pprev; + else + p->next = NULL; + } if (recurse) { + const unsigned char **parents_sha1; + + parents_sha1 = xalloca(nparent * sizeof(parents_sha1[0])); + for (i = 0; i < nparent; ++i) { + /* same rule as in emitthis */ + int tpi_valid = tp && !(tp[i].entry.mode & S_IFXMIN_NEQ); + + parents_sha1[i] = tpi_valid ? tp[i].entry.sha1 + : NULL; + } + + strbuf_add(base, path, pathlen); strbuf_addch(base, '/'); - ll_diff_tree_sha1(t1 ? t1->entry.sha1 : NULL, - t2 ? t2->entry.sha1 : NULL, base, opt); + p = ll_diff_tree_paths(p, sha1, parents_sha1, nparent, base, opt); + xalloca_free(parents_sha1); } strbuf_setlen(base, old_baselen); + return p; } static void skip_uninteresting(struct tree_desc *t, struct strbuf *base, @@ -145,59 +300,260 @@ static void skip_uninteresting(struct tree_desc *t, struct strbuf *base, } } -static int ll_diff_tree_sha1(const unsigned char *old, const unsigned char *new, - struct strbuf *base, struct diff_options *opt) + +/* + * generate paths for combined diff D(sha1,parents_sha1[]) + * + * Resulting paths are appended to combine_diff_path linked list, and also, are + * emitted on the go via opt->pathchange() callback, so it is possible to + * process the result as batch or incrementally. + * + * The paths are generated scanning new tree and all parents trees + * simultaneously, similarly to what diff_tree() was doing for 2 trees. + * The theory behind such scan is as follows: + * + * + * D(T,P1...Pn) calculation scheme + * ------------------------------- + * + * D(T,P1...Pn) = D(T,P1) ^ ... ^ D(T,Pn) (regarding resulting paths set) + * + * D(T,Pj) - diff between T..Pj + * D(T,P1...Pn) - combined diff from T to parents P1,...,Pn + * + * + * We start from all trees, which are sorted, and compare their entries in + * lock-step: + * + * T P1 Pn + * - - - + * |t| |p1| |pn| + * |-| |--| ... |--| imin = argmin(p1...pn) + * | | | | | | + * |-| |--| |--| + * |.| |. | |. | + * . . . + * . . . + * + * at any time there could be 3 cases: + * + * 1) t < p[imin]; + * 2) t > p[imin]; + * 3) t = p[imin]. + * + * Schematic deduction of what every case means, and what to do, follows: + * + * 1) t < p[imin] -> ∀j t ∉ Pj -> "+t" ∈ D(T,Pj) -> D += "+t"; t↓ + * + * 2) t > p[imin] + * + * 2.1) ∃j: pj > p[imin] -> "-p[imin]" ∉ D(T,Pj) -> D += ø; ∀ pi=p[imin] pi↓ + * 2.2) ∀i pi = p[imin] -> pi ∉ T -> "-pi" ∈ D(T,Pi) -> D += "-p[imin]"; ∀i pi↓ + * + * 3) t = p[imin] + * + * 3.1) ∃j: pj > p[imin] -> "+t" ∈ D(T,Pj) -> only pi=p[imin] remains to investigate + * 3.2) pi = p[imin] -> investigate δ(t,pi) + * | + * | + * v + * + * 3.1+3.2) looking at δ(t,pi) ∀i: pi=p[imin] - if all != ø -> + * + * ⎧δ(t,pi) - if pi=p[imin] + * -> D += ⎨ + * ⎩"+t" - if pi>p[imin] + * + * + * in any case t↓ ∀ pi=p[imin] pi↓ + * + * + * ~~~~~~~~ + * + * NOTE + * + * Usual diff D(A,B) is by definition the same as combined diff D(A,[B]), + * so this diff paths generator can, and is used, for plain diffs + * generation too. + * + * Please keep attention to the common D(A,[B]) case when working on the + * code, in order not to slow it down. + * + * NOTE + * nparent must be > 0. + */ + + +/* ∀ pi=p[imin] pi↓ */ +static inline void update_tp_entries(struct tree_desc *tp, int nparent) { - struct tree_desc t1, t2; - void *t1tree, *t2tree; + int i; + for (i = 0; i < nparent; ++i) + if (!(tp[i].entry.mode & S_IFXMIN_NEQ)) + update_tree_entry(&tp[i]); +} - t1tree = fill_tree_descriptor(&t1, old); - t2tree = fill_tree_descriptor(&t2, new); +static struct combine_diff_path *ll_diff_tree_paths( + struct combine_diff_path *p, const unsigned char *sha1, + const unsigned char **parents_sha1, int nparent, + struct strbuf *base, struct diff_options *opt) +{ + struct tree_desc t, *tp; + void *ttree, **tptree; + int i; + + tp = xalloca(nparent * sizeof(tp[0])); + tptree = xalloca(nparent * sizeof(tptree[0])); + + /* + * load parents first, as they are probably already cached. + * + * ( log_tree_diff() parses commit->parent before calling here via + * diff_tree_sha1(parent, commit) ) + */ + for (i = 0; i < nparent; ++i) + tptree[i] = fill_tree_descriptor(&tp[i], parents_sha1[i]); + ttree = fill_tree_descriptor(&t, sha1); /* Enable recursion indefinitely */ opt->pathspec.recursive = DIFF_OPT_TST(opt, RECURSIVE); for (;;) { - int cmp; + int imin, cmp; if (diff_can_quit_early(opt)) break; + if (opt->pathspec.nr) { - skip_uninteresting(&t1, base, opt); - skip_uninteresting(&t2, base, opt); + skip_uninteresting(&t, base, opt); + for (i = 0; i < nparent; i++) + skip_uninteresting(&tp[i], base, opt); + } + + /* comparing is finished when all trees are done */ + if (!t.size) { + int done = 1; + for (i = 0; i < nparent; ++i) + if (tp[i].size) { + done = 0; + break; + } + if (done) + break; + } + + /* + * lookup imin = argmin(p1...pn), + * mark entries whether they =p[imin] along the way + */ + imin = 0; + tp[0].entry.mode &= ~S_IFXMIN_NEQ; + + for (i = 1; i < nparent; ++i) { + cmp = tree_entry_pathcmp(&tp[i], &tp[imin]); + if (cmp < 0) { + imin = i; + tp[i].entry.mode &= ~S_IFXMIN_NEQ; + } + else if (cmp == 0) { + tp[i].entry.mode &= ~S_IFXMIN_NEQ; + } + else { + tp[i].entry.mode |= S_IFXMIN_NEQ; + } } - if (!t1.size && !t2.size) - break; - cmp = tree_entry_pathcmp(&t1, &t2); + /* fixup markings for entries before imin */ + for (i = 0; i < imin; ++i) + tp[i].entry.mode |= S_IFXMIN_NEQ; /* pi > p[imin] */ - /* t1 = t2 */ - if (cmp == 0) { - if (DIFF_OPT_TST(opt, FIND_COPIES_HARDER) || - hashcmp(t1.entry.sha1, t2.entry.sha1) || - (t1.entry.mode != t2.entry.mode)) - show_path(base, opt, &t1, &t2); - update_tree_entry(&t1); - update_tree_entry(&t2); + + /* compare t vs p[imin] */ + cmp = tree_entry_pathcmp(&t, &tp[imin]); + + /* t = p[imin] */ + if (cmp == 0) { + /* are either pi > p[imin] or diff(t,pi) != ø ? */ + if (!DIFF_OPT_TST(opt, FIND_COPIES_HARDER)) { + for (i = 0; i < nparent; ++i) { + /* p[i] > p[imin] */ + if (tp[i].entry.mode & S_IFXMIN_NEQ) + continue; + + /* diff(t,pi) != ø */ + if (hashcmp(t.entry.sha1, tp[i].entry.sha1) || + (t.entry.mode != tp[i].entry.mode)) + continue; + + goto skip_emit_t_tp; + } + } + + /* D += {δ(t,pi) if pi=p[imin]; "+a" if pi > p[imin]} */ + p = emit_path(p, base, opt, nparent, + &t, tp, imin); + + skip_emit_t_tp: + /* t↓, ∀ pi=p[imin] pi↓ */ + update_tree_entry(&t); + update_tp_entries(tp, nparent); } - /* t1 < t2 */ + /* t < p[imin] */ else if (cmp < 0) { - show_path(base, opt, &t1, /*t2=*/NULL); - update_tree_entry(&t1); + /* D += "+t" */ + p = emit_path(p, base, opt, nparent, + &t, /*tp=*/NULL, -1); + + /* t↓ */ + update_tree_entry(&t); } - /* t1 > t2 */ + /* t > p[imin] */ else { - show_path(base, opt, /*t1=*/NULL, &t2); - update_tree_entry(&t2); + /* ∀i pi=p[imin] -> D += "-p[imin]" */ + if (!DIFF_OPT_TST(opt, FIND_COPIES_HARDER)) { + for (i = 0; i < nparent; ++i) + if (tp[i].entry.mode & S_IFXMIN_NEQ) + goto skip_emit_tp; + } + + p = emit_path(p, base, opt, nparent, + /*t=*/NULL, tp, imin); + + skip_emit_tp: + /* ∀ pi=p[imin] pi↓ */ + update_tp_entries(tp, nparent); } } - free(t2tree); - free(t1tree); - return 0; + free(ttree); + for (i = nparent-1; i >= 0; i--) + free(tptree[i]); + xalloca_free(tptree); + xalloca_free(tp); + + return p; +} + +struct combine_diff_path *diff_tree_paths( + struct combine_diff_path *p, const unsigned char *sha1, + const unsigned char **parents_sha1, int nparent, + struct strbuf *base, struct diff_options *opt) +{ + p = ll_diff_tree_paths(p, sha1, parents_sha1, nparent, base, opt); + + /* + * free pre-allocated last element, if any + * (see path_appendnew() for details about why) + */ + if (p->next) { + free(p->next); + p->next = NULL; + } + + return p; } /* @@ -308,6 +664,26 @@ static void try_to_follow_renames(const unsigned char *old, const unsigned char q->nr = 1; } +static int ll_diff_tree_sha1(const unsigned char *old, const unsigned char *new, + struct strbuf *base, struct diff_options *opt) +{ + struct combine_diff_path phead, *p; + pathchange_fn_t pathchange_old = opt->pathchange; + + phead.next = NULL; + opt->pathchange = emit_diff_first_parent_only; + diff_tree_paths(&phead, new, &old, 1, base, opt); + + for (p = phead.next; p;) { + struct combine_diff_path *pprev = p; + p = p->next; + free(pprev); + } + + opt->pathchange = pathchange_old; + return 0; +} + int diff_tree_sha1(const unsigned char *old, const unsigned char *new, const char *base_str, struct diff_options *opt) { struct strbuf base; -- cgit v0.10.2-6-g49f6 From 7195fbfaf5a539b8e8358097e02b63991e78a565 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Mon, 24 Feb 2014 20:21:51 +0400 Subject: combine-diff: speed it up, by using multiparent diff tree-walker directly As was recently shown in "combine-diff: optimize combine_diff_path sets intersection", combine-diff runs very slowly. In that commit we optimized paths sets intersection, but that accounted only for ~ 25% of the slowness, and as my tracing showed, for linux.git v3.10..v3.11, for merges a lot of time is spent computing diff(commit,commit^2) just to only then intersect that huge diff to almost small set of files from diff(commit,commit^1). In previous commit, we described the problem in more details, and reworked the diff tree-walker to be general one - i.e. to work in multiple parent case too. Now is the time to take advantage of it for finding paths for combine diff. The implementation is straightforward - if we know, we can get generated diff paths directly, and at present that means no diff filtering or rename/copy detection was requested(*), we can call multiparent tree-walker directly and get ready paths. (*) because e.g. at present, all diffcore transformations work on diff_filepair queues, but in the future, that limitation can be lifted, if filters would operate directly on combine_diff_paths. Timings for `git log --raw --no-abbrev --no-renames` without `-c` ("git log") and with `-c` ("git log -c") and with `-c --merges` ("git log -c --merges") before and after the patch are as follows: linux.git v3.10..v3.11 log log -c log -c --merges before 1.9s 16.4s 15.2s after 1.9s 2.4s 1.1s The result stayed the same. Signed-off-by: Kirill Smelkov Signed-off-by: Junio C Hamano diff --git a/combine-diff.c b/combine-diff.c index 1732dfd..12764fb 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -1303,7 +1303,7 @@ static const char *path_path(void *obj) /* find set of paths that every parent touches */ -static struct combine_diff_path *find_paths(const unsigned char *sha1, +static struct combine_diff_path *find_paths_generic(const unsigned char *sha1, const struct sha1_array *parents, struct diff_options *opt) { struct combine_diff_path *paths = NULL; @@ -1316,6 +1316,7 @@ static struct combine_diff_path *find_paths(const unsigned char *sha1, /* tell diff_tree to emit paths in sorted (=tree) order */ opt->orderfile = NULL; + /* D(A,P1...Pn) = D(A,P1) ^ ... ^ D(A,Pn) (wrt paths) */ for (i = 0; i < num_parent; i++) { /* * show stat against the first parent even when doing @@ -1346,6 +1347,35 @@ static struct combine_diff_path *find_paths(const unsigned char *sha1, } +/* + * find set of paths that everybody touches, assuming diff is run without + * rename/copy detection, etc, comparing all trees simultaneously (= faster). + */ +static struct combine_diff_path *find_paths_multitree( + const unsigned char *sha1, const struct sha1_array *parents, + struct diff_options *opt) +{ + int i, nparent = parents->nr; + const unsigned char **parents_sha1; + struct combine_diff_path paths_head; + struct strbuf base; + + parents_sha1 = xmalloc(nparent * sizeof(parents_sha1[0])); + for (i = 0; i < nparent; i++) + parents_sha1[i] = parents->sha1[i]; + + /* fake list head, so worker can assume it is non-NULL */ + paths_head.next = NULL; + + strbuf_init(&base, PATH_MAX); + diff_tree_paths(&paths_head, sha1, parents_sha1, nparent, &base, opt); + + strbuf_release(&base); + free(parents_sha1); + return paths_head.next; +} + + void diff_tree_combined(const unsigned char *sha1, const struct sha1_array *parents, int dense, @@ -1355,6 +1385,7 @@ void diff_tree_combined(const unsigned char *sha1, struct diff_options diffopts; struct combine_diff_path *p, *paths; int i, num_paths, needsep, show_log_first, num_parent = parents->nr; + int need_generic_pathscan; /* nothing to do, if no parents */ if (!num_parent) @@ -1377,11 +1408,58 @@ void diff_tree_combined(const unsigned char *sha1, /* find set of paths that everybody touches * - * NOTE find_paths() also handles --stat, as it computes - * diff(sha1,parent_i) for all i to do the job, specifically - * for parent0. + * NOTE + * + * Diffcore transformations are bound to diff_filespec and logic + * comparing two entries - i.e. they do not apply directly to combine + * diff. + * + * If some of such transformations is requested - we launch generic + * path scanning, which works significantly slower compared to + * simultaneous all-trees-in-one-go scan in find_paths_multitree(). + * + * TODO some of the filters could be ported to work on + * combine_diff_paths - i.e. all functionality that skips paths, so in + * theory, we could end up having only multitree path scanning. + * + * NOTE please keep this semantically in sync with diffcore_std() */ - paths = find_paths(sha1, parents, &diffopts); + need_generic_pathscan = opt->skip_stat_unmatch || + DIFF_OPT_TST(opt, FOLLOW_RENAMES) || + opt->break_opt != -1 || + opt->detect_rename || + opt->pickaxe || + opt->filter; + + + if (need_generic_pathscan) { + /* + * NOTE generic case also handles --stat, as it computes + * diff(sha1,parent_i) for all i to do the job, specifically + * for parent0. + */ + paths = find_paths_generic(sha1, parents, &diffopts); + } + else { + int stat_opt; + paths = find_paths_multitree(sha1, parents, &diffopts); + + /* + * show stat against the first parent even + * when doing combined diff. + */ + stat_opt = (opt->output_format & + (DIFF_FORMAT_NUMSTAT|DIFF_FORMAT_DIFFSTAT)); + if (stat_opt) { + diffopts.output_format = stat_opt; + + diff_tree_sha1(parents->sha1[0], sha1, "", &diffopts); + diffcore_std(&diffopts); + if (opt->orderfile) + diffcore_order(opt->orderfile); + diff_flush(&diffopts); + } + } /* find out number of surviving paths */ for (num_paths = 0, p = paths; p; p = p->next) diff --git a/diff.c b/diff.c index cda4aa8..f2fff46 100644 --- a/diff.c +++ b/diff.c @@ -4764,6 +4764,7 @@ void diffcore_fix_diff_index(struct diff_options *options) void diffcore_std(struct diff_options *options) { + /* NOTE please keep the following in sync with diff_tree_combined() */ if (options->skip_stat_unmatch) diffcore_skip_stat_unmatch(options); if (!options->found_follow) { -- cgit v0.10.2-6-g49f6 From 22f4c27e68f448d5fce316a73ea3f7bab6aa1268 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Wed, 9 Apr 2014 16:48:27 +0400 Subject: mingw: activate alloca Both MSVC and MINGW have alloca(3) definitions in malloc.h, so by moving win32-compat alloca.h from compat/vcbuild/include/ to compat/win32/ , which is included by both MSVC and MINGW CFLAGS, we can make alloca() work on both those Windows environments. In MINGW, malloc.h has explicit check for GNUC and if it is so, defines alloca to __builtin_alloca, so it looks like we don't need to add any code to here-shipped alloca.h to get optimum performance. Compile-tested on Windows in MSysGit. Signed-off-by: Kirill Smelkov Acked-by: Erik Faye-Lund Signed-off-by: Junio C Hamano diff --git a/compat/vcbuild/include/alloca.h b/compat/vcbuild/include/alloca.h deleted file mode 100644 index c0d7985..0000000 --- a/compat/vcbuild/include/alloca.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/compat/win32/alloca.h b/compat/win32/alloca.h new file mode 100644 index 0000000..c0d7985 --- /dev/null +++ b/compat/win32/alloca.h @@ -0,0 +1 @@ +#include diff --git a/config.mak.uname b/config.mak.uname index 71602ee..9967de6 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -480,6 +480,7 @@ ifeq ($(uname_S),NONSTOP_KERNEL) endif ifneq (,$(findstring MINGW,$(uname_S))) pathsep = ; + HAVE_ALLOCA_H = YesPlease NO_PREAD = YesPlease NEEDS_CRYPTO_WITH_SSL = YesPlease NO_LIBGEN_H = YesPlease -- cgit v0.10.2-6-g49f6