From c46c406ae1ee30f64a13083edfa5683d2685fd61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sat, 18 Aug 2018 16:41:22 +0200 Subject: trace.h: support nested performance tracing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Performance measurements are listed right now as a flat list, which is fine when we measure big blocks. But when we start adding more and more measurements, some of them could be just part of a bigger measurement and a flat list gives a wrong impression that they are executed at the same level instead of nested. Add trace_performance_enter() and trace_performance_leave() to allow indent these nested measurements. For now it does not help much because the only nested thing is (lazy) name hash initialization (e.g. called in diff-index from "git status"). This will help more because I'm going to add some more tracing that's actually nested. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano diff --git a/diff-lib.c b/diff-lib.c index 732f684..d5bbb7e 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -518,11 +518,11 @@ static int diff_cache(struct rev_info *revs, int run_diff_index(struct rev_info *revs, int cached) { struct object_array_entry *ent; - uint64_t start = getnanotime(); if (revs->pending.nr != 1) BUG("run_diff_index must be passed exactly one tree"); + trace_performance_enter(); ent = revs->pending.objects; if (diff_cache(revs, &ent->item->oid, ent->name, cached)) exit(128); @@ -531,7 +531,7 @@ int run_diff_index(struct rev_info *revs, int cached) diffcore_fix_diff_index(&revs->diffopt); diffcore_std(&revs->diffopt); diff_flush(&revs->diffopt); - trace_performance_since(start, "diff-index"); + trace_performance_leave("diff-index"); return 0; } diff --git a/dir.c b/dir.c index 32f5f72..18b57b9 100644 --- a/dir.c +++ b/dir.c @@ -2263,10 +2263,13 @@ int read_directory(struct dir_struct *dir, struct index_state *istate, const char *path, int len, const struct pathspec *pathspec) { struct untracked_cache_dir *untracked; - uint64_t start = getnanotime(); - if (has_symlink_leading_path(path, len)) + trace_performance_enter(); + + if (has_symlink_leading_path(path, len)) { + trace_performance_leave("read directory %.*s", len, path); return dir->nr; + } untracked = validate_untracked_cache(dir, len, pathspec); if (!untracked) @@ -2302,7 +2305,7 @@ int read_directory(struct dir_struct *dir, struct index_state *istate, dir->nr = i; } - trace_performance_since(start, "read directory %.*s", len, path); + trace_performance_leave("read directory %.*s", len, path); if (dir->untracked) { static int force_untracked_cache = -1; static struct trace_key trace_untracked_stats = TRACE_KEY_INIT(UNTRACKED_STATS); diff --git a/name-hash.c b/name-hash.c index 1638498..1fcda73 100644 --- a/name-hash.c +++ b/name-hash.c @@ -578,10 +578,10 @@ static void threaded_lazy_init_name_hash( static void lazy_init_name_hash(struct index_state *istate) { - uint64_t start = getnanotime(); if (istate->name_hash_initialized) return; + trace_performance_enter(); hashmap_init(&istate->name_hash, cache_entry_cmp, NULL, istate->cache_nr); hashmap_init(&istate->dir_hash, dir_entry_cmp, NULL, istate->cache_nr); @@ -602,7 +602,7 @@ static void lazy_init_name_hash(struct index_state *istate) } istate->name_hash_initialized = 1; - trace_performance_since(start, "initialize name hash"); + trace_performance_leave("initialize name hash"); } /* diff --git a/preload-index.c b/preload-index.c index 4d08d44..d7f7919 100644 --- a/preload-index.c +++ b/preload-index.c @@ -78,7 +78,6 @@ static void preload_index(struct index_state *index, { int threads, i, work, offset; struct thread_data data[MAX_PARALLEL]; - uint64_t start = getnanotime(); if (!core_preload_index) return; @@ -88,6 +87,7 @@ static void preload_index(struct index_state *index, threads = 2; if (threads < 2) return; + trace_performance_enter(); if (threads > MAX_PARALLEL) threads = MAX_PARALLEL; offset = 0; @@ -109,7 +109,7 @@ static void preload_index(struct index_state *index, if (pthread_join(p->pthread, NULL)) die("unable to join threaded lstat"); } - trace_performance_since(start, "preload index"); + trace_performance_leave("preload index"); } #endif diff --git a/read-cache.c b/read-cache.c index c5fabc8..1c9c88c 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1476,8 +1476,8 @@ int refresh_index(struct index_state *istate, unsigned int flags, const char *typechange_fmt; const char *added_fmt; const char *unmerged_fmt; - uint64_t start = getnanotime(); + trace_performance_enter(); modified_fmt = (in_porcelain ? "M\t%s\n" : "%s: needs update\n"); deleted_fmt = (in_porcelain ? "D\t%s\n" : "%s: needs update\n"); typechange_fmt = (in_porcelain ? "T\t%s\n" : "%s needs update\n"); @@ -1547,7 +1547,7 @@ int refresh_index(struct index_state *istate, unsigned int flags, replace_index_entry(istate, i, new_entry); } - trace_performance_since(start, "refresh index"); + trace_performance_leave("refresh index"); return has_errors; } @@ -2002,7 +2002,6 @@ static void freshen_shared_index(const char *shared_index, int warn) int read_index_from(struct index_state *istate, const char *path, const char *gitdir) { - uint64_t start = getnanotime(); struct split_index *split_index; int ret; char *base_oid_hex; @@ -2012,8 +2011,9 @@ int read_index_from(struct index_state *istate, const char *path, if (istate->initialized) return istate->cache_nr; + trace_performance_enter(); ret = do_read_index(istate, path, 0); - trace_performance_since(start, "read cache %s", path); + trace_performance_leave("read cache %s", path); split_index = istate->split_index; if (!split_index || is_null_oid(&split_index->base_oid)) { @@ -2021,6 +2021,7 @@ int read_index_from(struct index_state *istate, const char *path, return ret; } + trace_performance_enter(); if (split_index->base) discard_index(split_index->base); else @@ -2037,8 +2038,8 @@ int read_index_from(struct index_state *istate, const char *path, freshen_shared_index(base_path, 0); merge_base_index(istate); post_read_index_from(istate); - trace_performance_since(start, "read cache %s", base_path); free(base_path); + trace_performance_leave("read cache %s", base_path); return ret; } diff --git a/trace.c b/trace.c index fc623e9..fa4a2e7 100644 --- a/trace.c +++ b/trace.c @@ -176,10 +176,30 @@ void trace_strbuf_fl(const char *file, int line, struct trace_key *key, strbuf_release(&buf); } +static uint64_t perf_start_times[10]; +static int perf_indent; + +uint64_t trace_performance_enter(void) +{ + uint64_t now; + + if (!trace_want(&trace_perf_key)) + return 0; + + now = getnanotime(); + perf_start_times[perf_indent] = now; + if (perf_indent + 1 < ARRAY_SIZE(perf_start_times)) + perf_indent++; + else + BUG("Too deep indentation"); + return now; +} + static void trace_performance_vprintf_fl(const char *file, int line, uint64_t nanos, const char *format, va_list ap) { + static const char space[] = " "; struct strbuf buf = STRBUF_INIT; if (!prepare_trace_line(file, line, &trace_perf_key, &buf)) @@ -188,7 +208,10 @@ static void trace_performance_vprintf_fl(const char *file, int line, strbuf_addf(&buf, "performance: %.9f s", (double) nanos / 1000000000); if (format && *format) { - strbuf_addstr(&buf, ": "); + if (perf_indent >= strlen(space)) + BUG("Too deep indentation"); + + strbuf_addf(&buf, ":%.*s ", perf_indent, space); strbuf_vaddf(&buf, format, ap); } @@ -244,6 +267,24 @@ void trace_performance_since(uint64_t start, const char *format, ...) va_end(ap); } +void trace_performance_leave(const char *format, ...) +{ + va_list ap; + uint64_t since; + + if (perf_indent) + perf_indent--; + + if (!format) /* Allow callers to leave without tracing anything */ + return; + + since = perf_start_times[perf_indent]; + va_start(ap, format); + trace_performance_vprintf_fl(NULL, 0, getnanotime() - since, + format, ap); + va_end(ap); +} + #else void trace_printf_key_fl(const char *file, int line, struct trace_key *key, @@ -273,6 +314,24 @@ void trace_performance_fl(const char *file, int line, uint64_t nanos, va_end(ap); } +void trace_performance_leave_fl(const char *file, int line, + uint64_t nanos, const char *format, ...) +{ + va_list ap; + uint64_t since; + + if (perf_indent) + perf_indent--; + + if (!format) /* Allow callers to leave without tracing anything */ + return; + + since = perf_start_times[perf_indent]; + va_start(ap, format); + trace_performance_vprintf_fl(file, line, nanos - since, format, ap); + va_end(ap); +} + #endif /* HAVE_VARIADIC_MACROS */ @@ -411,13 +470,11 @@ uint64_t getnanotime(void) } } -static uint64_t command_start_time; static struct strbuf command_line = STRBUF_INIT; static void print_command_performance_atexit(void) { - trace_performance_since(command_start_time, "git command:%s", - command_line.buf); + trace_performance_leave("git command:%s", command_line.buf); } void trace_command_performance(const char **argv) @@ -425,10 +482,10 @@ void trace_command_performance(const char **argv) if (!trace_want(&trace_perf_key)) return; - if (!command_start_time) + if (!command_line.len) atexit(print_command_performance_atexit); strbuf_reset(&command_line); sq_quote_argv_pretty(&command_line, argv); - command_start_time = getnanotime(); + trace_performance_enter(); } diff --git a/trace.h b/trace.h index 2b6a1bc..171b256 100644 --- a/trace.h +++ b/trace.h @@ -23,6 +23,7 @@ extern void trace_disable(struct trace_key *key); extern uint64_t getnanotime(void); extern void trace_command_performance(const char **argv); extern void trace_verbatim(struct trace_key *key, const void *buf, unsigned len); +uint64_t trace_performance_enter(void); #ifndef HAVE_VARIADIC_MACROS @@ -45,6 +46,9 @@ extern void trace_performance(uint64_t nanos, const char *format, ...); __attribute__((format (printf, 2, 3))) extern void trace_performance_since(uint64_t start, const char *format, ...); +__attribute__((format (printf, 1, 2))) +void trace_performance_leave(const char *format, ...); + #else /* @@ -118,6 +122,14 @@ extern void trace_performance_since(uint64_t start, const char *format, ...); __VA_ARGS__); \ } while (0) +#define trace_performance_leave(...) \ + do { \ + if (trace_pass_fl(&trace_perf_key)) \ + trace_performance_leave_fl(TRACE_CONTEXT, __LINE__, \ + getnanotime(), \ + __VA_ARGS__); \ + } while (0) + /* backend functions, use non-*fl macros instead */ __attribute__((format (printf, 4, 5))) extern void trace_printf_key_fl(const char *file, int line, struct trace_key *key, @@ -130,6 +142,9 @@ extern void trace_strbuf_fl(const char *file, int line, struct trace_key *key, __attribute__((format (printf, 4, 5))) extern void trace_performance_fl(const char *file, int line, uint64_t nanos, const char *fmt, ...); +__attribute__((format (printf, 4, 5))) +extern void trace_performance_leave_fl(const char *file, int line, + uint64_t nanos, const char *fmt, ...); static inline int trace_pass_fl(struct trace_key *key) { return key->fd || !key->initialized; -- cgit v0.10.2-6-g49f6 From 0d1ed5963da5d7410ad86eeaa64b3710e4aaa0c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sat, 18 Aug 2018 16:41:23 +0200 Subject: unpack-trees: add performance tracing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're going to optimize unpack_trees() a bit in the following patches. Let's add some tracing to measure how long it takes before and after. This is the baseline ("git checkout -" on webkit.git, 275k files on worktree) performance: 0.056651714 s: read cache .git/index performance: 0.183101080 s: preload index performance: 0.008584433 s: refresh index performance: 0.633767589 s: traverse_trees performance: 0.340265448 s: check_updates performance: 0.381884638 s: cache_tree_update performance: 1.401562947 s: unpack_trees performance: 0.338687914 s: write index, changed mask = 2e performance: 0.411927922 s: traverse_trees performance: 0.000023335 s: check_updates performance: 0.423697246 s: unpack_trees performance: 0.423708360 s: diff-index performance: 2.559524127 s: git command: git checkout - Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano diff --git a/cache-tree.c b/cache-tree.c index 181d591..caafbff 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -433,7 +433,9 @@ int cache_tree_update(struct index_state *istate, int flags) if (i) return i; + trace_performance_enter(); i = update_one(it, cache, entries, "", 0, &skip, flags); + trace_performance_leave("cache_tree_update"); if (i < 0) return i; istate->cache_changed |= CACHE_TREE_CHANGED; diff --git a/unpack-trees.c b/unpack-trees.c index f9efee08..6d9f692 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -345,6 +345,7 @@ static int check_updates(struct unpack_trees_options *o) struct checkout state = CHECKOUT_INIT; int i; + trace_performance_enter(); state.force = 1; state.quiet = 1; state.refresh_cache = 1; @@ -414,6 +415,7 @@ static int check_updates(struct unpack_trees_options *o) errs |= finish_delayed_checkout(&state); if (o->update) git_attr_set_direction(GIT_ATTR_CHECKIN, NULL); + trace_performance_leave("check_updates"); return errs != 0; } @@ -1285,6 +1287,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options if (len > MAX_UNPACK_TREES) die("unpack_trees takes at most %d trees", MAX_UNPACK_TREES); + trace_performance_enter(); memset(&el, 0, sizeof(el)); if (!core_apply_sparse_checkout || !o->update) o->skip_sparse_checkout = 1; @@ -1357,7 +1360,10 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options } } - if (traverse_trees(len, t, &info) < 0) + trace_performance_enter(); + ret = traverse_trees(len, t, &info); + trace_performance_leave("traverse_trees"); + if (ret < 0) goto return_failed; } @@ -1449,6 +1455,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options o->src_index = NULL; done: + trace_performance_leave("unpack_trees"); clear_exclude_list(&el); return ret; -- cgit v0.10.2-6-g49f6 From b4da37380b7774248086f42bcd59397a44e1ac79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sat, 18 Aug 2018 16:41:24 +0200 Subject: unpack-trees: optimize walking same trees with cache-tree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to merge one or many trees with the index, unpack-trees code walks multiple trees in parallel with the index and performs n-way merge. If we find out at start of a directory that all trees are the same (by comparing OID) and cache-tree happens to be available for that directory as well, we could avoid walking the trees because we already know what these trees contain: it's flattened in what's called "the index". The upside is of course a lot less I/O since we can potentially skip lots of trees (think subtrees). We also save CPU because we don't have to inflate and apply the deltas. The downside is of course more fragile code since the logic in some functions are now duplicated elsewhere. "checkout -" with this patch on webkit.git (275k files): baseline new -------------------------------------------------------------------- 0.056651714 0.080394752 s: read cache .git/index 0.183101080 0.216010838 s: preload index 0.008584433 0.008534301 s: refresh index 0.633767589 0.251992198 s: traverse_trees 0.340265448 0.377031383 s: check_updates 0.381884638 0.372768105 s: cache_tree_update 1.401562947 1.045887251 s: unpack_trees 0.338687914 0.314983512 s: write index, changed mask = 2e 0.411927922 0.062572653 s: traverse_trees 0.000023335 0.000022544 s: check_updates 0.423697246 0.073795585 s: unpack_trees 0.423708360 0.073807557 s: diff-index 2.559524127 1.938191592 s: git command: git checkout - Another measurement from Ben's running "git checkout" with over 500k trees (on the whole series): baseline new ---------------------------------------------------------------------- 0.535510167 0.556558733 s: read cache .git/index 0.3057373 0.3147105 s: initialize name hash 0.0184082 0.023558433 s: preload index 0.086910967 0.089085967 s: refresh index 7.889590767 2.191554433 s: unpack trees 0.120760833 0.131941267 s: update worktree after a merge 2.2583504 2.572663167 s: repair cache-tree 0.8916137 0.959495233 s: write index, changed mask = 28 3.405199233 0.2710663 s: unpack trees 0.000999667 0.0021554 s: update worktree after a merge 3.4063306 0.273318333 s: diff-index 16.9524923 9.462943133 s: git command: git.exe checkout This command calls unpack_trees() twice, the first time on 2way merge and the second 1way merge. In both times, "unpack trees" time is reduced to one third. Overall time reduction is not that impressive of course because index operations take a big chunk. And there's that repair cache-tree line. PS. A note about cache-tree invalidation and the use of it in this code. We do invalidate cache-tree in _source_ index when we add new entries to the (temporary) "result" index. But we also use the cache-tree from source index in this optimization. Does this mean we end up having no cache-tree in the source index to activate this optimization? The answer is twisted: the order of finding a good cache-tree and invalidating it matters. In this case we check for a good cache-tree first in all_trees_same_as_cache_tree(), then we start to merge things and potentially invalidate that same cache-tree in the process. Since cache-tree invalidation happens after the optimization kicks in, we're still good. But we may lose that cache-tree at the very first call_unpack_fn() call in traverse_by_cache_tree(). Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano diff --git a/unpack-trees.c b/unpack-trees.c index 6d9f692..8376663 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -635,6 +635,102 @@ static inline int are_same_oid(struct name_entry *name_j, struct name_entry *nam return name_j->oid && name_k->oid && !oidcmp(name_j->oid, name_k->oid); } +static int all_trees_same_as_cache_tree(int n, unsigned long dirmask, + struct name_entry *names, + struct traverse_info *info) +{ + struct unpack_trees_options *o = info->data; + int i; + + if (!o->merge || dirmask != ((1 << n) - 1)) + return 0; + + for (i = 1; i < n; i++) + if (!are_same_oid(names, names + i)) + return 0; + + return cache_tree_matches_traversal(o->src_index->cache_tree, names, info); +} + +static int index_pos_by_traverse_info(struct name_entry *names, + struct traverse_info *info) +{ + struct unpack_trees_options *o = info->data; + int len = traverse_path_len(info, names); + char *name = xmalloc(len + 1 /* slash */ + 1 /* NUL */); + int pos; + + make_traverse_path(name, info, names); + name[len++] = '/'; + name[len] = '\0'; + pos = index_name_pos(o->src_index, name, len); + if (pos >= 0) + BUG("This is a directory and should not exist in index"); + pos = -pos - 1; + if (!starts_with(o->src_index->cache[pos]->name, name) || + (pos > 0 && starts_with(o->src_index->cache[pos-1]->name, name))) + BUG("pos must point at the first entry in this directory"); + free(name); + return pos; +} + +/* + * Fast path if we detect that all trees are the same as cache-tree at this + * path. We'll walk these trees recursively using cache-tree/index instead of + * ODB since already know what these trees contain. + */ +static int traverse_by_cache_tree(int pos, int nr_entries, int nr_names, + struct name_entry *names, + struct traverse_info *info) +{ + struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, }; + struct unpack_trees_options *o = info->data; + int i, d; + + if (!o->merge) + BUG("We need cache-tree to do this optimization"); + + /* + * Do what unpack_callback() and unpack_nondirectories() normally + * do. But we walk all paths in an iterative loop instead. + * + * D/F conflicts and higher stage entries are not a concern + * because cache-tree would be invalidated and we would never + * get here in the first place. + */ + for (i = 0; i < nr_entries; i++) { + struct cache_entry *tree_ce; + int len, rc; + + src[0] = o->src_index->cache[pos + i]; + + len = ce_namelen(src[0]); + tree_ce = xcalloc(1, cache_entry_size(len)); + + tree_ce->ce_mode = src[0]->ce_mode; + tree_ce->ce_flags = create_ce_flags(0); + tree_ce->ce_namelen = len; + oidcpy(&tree_ce->oid, &src[0]->oid); + memcpy(tree_ce->name, src[0]->name, len + 1); + + for (d = 1; d <= nr_names; d++) + src[d] = tree_ce; + + rc = call_unpack_fn((const struct cache_entry * const *)src, o); + free(tree_ce); + if (rc < 0) + return rc; + + mark_ce_used(src[0], o); + } + if (o->debug_unpack) + printf("Unpacked %d entries from %s to %s using cache-tree\n", + nr_entries, + o->src_index->cache[pos]->name, + o->src_index->cache[pos + nr_entries - 1]->name); + return 0; +} + static int traverse_trees_recursive(int n, unsigned long dirmask, unsigned long df_conflicts, struct name_entry *names, @@ -646,6 +742,27 @@ static int traverse_trees_recursive(int n, unsigned long dirmask, void *buf[MAX_UNPACK_TREES]; struct traverse_info newinfo; struct name_entry *p; + int nr_entries; + + nr_entries = all_trees_same_as_cache_tree(n, dirmask, names, info); + if (nr_entries > 0) { + struct unpack_trees_options *o = info->data; + int pos = index_pos_by_traverse_info(names, info); + + if (!o->merge || df_conflicts) + BUG("Wrong condition to get here buddy"); + + /* + * All entries up to 'pos' must have been processed + * (i.e. marked CE_UNPACKED) at this point. But to be safe, + * save and restore cache_bottom anyway to not miss + * unprocessed entries before 'pos'. + */ + bottom = o->cache_bottom; + ret = traverse_by_cache_tree(pos, nr_entries, n, names, info); + o->cache_bottom = bottom; + return ret; + } p = names; while (!p->mode) @@ -812,6 +929,11 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info, return ce; } +/* + * Note that traverse_by_cache_tree() duplicates some logic in this function + * without actually calling it. If you change the logic here you may need to + * check and change there as well. + */ static int unpack_nondirectories(int n, unsigned long mask, unsigned long dirmask, struct cache_entry **src, @@ -1004,6 +1126,11 @@ static void debug_unpack_callback(int n, debug_name_entry(i, names + i); } +/* + * Note that traverse_by_cache_tree() duplicates some logic in this function + * without actually calling it. If you change the logic here you may need to + * check and change there as well. + */ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, struct name_entry *names, struct traverse_info *info) { struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, }; -- cgit v0.10.2-6-g49f6 From f1e11c6510846321574e0ad9443843d7ba3eb758 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sat, 18 Aug 2018 16:41:25 +0200 Subject: unpack-trees: reduce malloc in cache-tree walk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a micro optimization that probably only shines on repos with deep directory structure. Instead of allocating and freeing a new cache_entry in every iteration, we reuse the last one and only update the parts that are new each iteration. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano diff --git a/unpack-trees.c b/unpack-trees.c index 8376663..dbef6e1 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -685,6 +685,8 @@ static int traverse_by_cache_tree(int pos, int nr_entries, int nr_names, { struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, }; struct unpack_trees_options *o = info->data; + struct cache_entry *tree_ce = NULL; + int ce_len = 0; int i, d; if (!o->merge) @@ -699,30 +701,39 @@ static int traverse_by_cache_tree(int pos, int nr_entries, int nr_names, * get here in the first place. */ for (i = 0; i < nr_entries; i++) { - struct cache_entry *tree_ce; - int len, rc; + int new_ce_len, len, rc; src[0] = o->src_index->cache[pos + i]; len = ce_namelen(src[0]); - tree_ce = xcalloc(1, cache_entry_size(len)); + new_ce_len = cache_entry_size(len); + + if (new_ce_len > ce_len) { + new_ce_len <<= 1; + tree_ce = xrealloc(tree_ce, new_ce_len); + memset(tree_ce, 0, new_ce_len); + ce_len = new_ce_len; + + tree_ce->ce_flags = create_ce_flags(0); + + for (d = 1; d <= nr_names; d++) + src[d] = tree_ce; + } tree_ce->ce_mode = src[0]->ce_mode; - tree_ce->ce_flags = create_ce_flags(0); tree_ce->ce_namelen = len; oidcpy(&tree_ce->oid, &src[0]->oid); memcpy(tree_ce->name, src[0]->name, len + 1); - for (d = 1; d <= nr_names; d++) - src[d] = tree_ce; - rc = call_unpack_fn((const struct cache_entry * const *)src, o); - free(tree_ce); - if (rc < 0) + if (rc < 0) { + free(tree_ce); return rc; + } mark_ce_used(src[0], o); } + free(tree_ce); if (o->debug_unpack) printf("Unpacked %d entries from %s to %s using cache-tree\n", nr_entries, -- cgit v0.10.2-6-g49f6 From 836ef2b69f3a8668c35a537715cf3bbc08fdcf39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sat, 18 Aug 2018 16:41:26 +0200 Subject: unpack-trees: reuse (still valid) cache-tree from src_index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do n-way merge by walking the source index and n trees at the same time and add merge results to a new temporary index called o->result. The merge result for any given path could be either - keep_entry(): same old index entry in o->src_index is reused - merged_entry(): either a new entry is added, or an existing one updated - deleted_entry(): one entry from o->src_index is removed For some reason [1] we keep making sure that the source index's cache-tree is still valid if used by o->result: for all those merged/deleted entries, we invalidate the same path in o->src_index, so only cache-trees covering the "keep_entry" parts remain good. Because of this, the cache-tree from o->src_index can be perfectly reused in o->result. And in fact we already rely on this logic to reuse untracked cache in edf3b90553 (unpack-trees: preserve index extensions - 2017-05-08). Move the cache-tree to o->result before doing cache_tree_update() to reduce hashing cost. Since cache_tree_update() has risen up as one of the most expensive parts in unpack_trees() after the last few patches. This does help reduce unpack_trees() time significantly (on webkit.git): before after -------------------------------------------------------------------- 0.080394752 0.051258167 s: read cache .git/index 0.216010838 0.212106298 s: preload index 0.008534301 0.280521764 s: refresh index 0.251992198 0.218160442 s: traverse_trees 0.377031383 0.374948191 s: check_updates 0.372768105 0.037040114 s: cache_tree_update 1.045887251 0.672031609 s: unpack_trees 0.314983512 0.317456290 s: write index, changed mask = 2e 0.062572653 0.038382654 s: traverse_trees 0.000022544 0.000042731 s: check_updates 0.073795585 0.050930053 s: unpack_trees 0.073807557 0.051099735 s: diff-index 1.938191592 1.614241153 s: git command: git checkout - [1] I'm pretty sure the reason is an oversight in 34110cd4e3 (Make 'unpack_trees()' have a separate source and destination index - 2008-03-06). That patch aims to _not_ update the source index at all. The invalidation should have been done on o->result in that patch. But then there was no cache-tree on o->result even then so it's pointless to do so. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano diff --git a/read-cache.c b/read-cache.c index 1c9c88c..5ce40f3 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2940,6 +2940,8 @@ void move_index_extensions(struct index_state *dst, struct index_state *src) { dst->untracked = src->untracked; src->untracked = NULL; + dst->cache_tree = src->cache_tree; + src->cache_tree = NULL; } struct cache_entry *dup_cache_entry(const struct cache_entry *ce, diff --git a/unpack-trees.c b/unpack-trees.c index dbef6e1..aa80b65 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1576,6 +1576,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options ret = check_updates(o) ? (-2) : 0; if (o->dst_index) { + move_index_extensions(&o->result, o->src_index); if (!ret) { if (!o->result.cache_tree) o->result.cache_tree = cache_tree(); @@ -1584,7 +1585,6 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options WRITE_TREE_SILENT | WRITE_TREE_REPAIR); } - move_index_extensions(&o->result, o->src_index); discard_index(o->dst_index); *o->dst_index = o->result; } else { -- cgit v0.10.2-6-g49f6 From 5697ca9aa562c1f0b624b4f273685351734162e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sat, 18 Aug 2018 16:41:27 +0200 Subject: unpack-trees: add missing cache invalidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Any changes to the output index should be (confusingly) marked in the source index with invalidate_ce_path(). This is used to make sure we still have valid untracked cache and cache-tree extensions in the end. We do a pretty good job of invalidating except in two places. verify_clean_subdirectory() is part of verify_absent() and verify_absent_sparse(). The former is usually called by merged_entry() or directly in threeway_merge(). The latter is obviously used by sparse checkout. In these three call sites, only merged_entry() follows up with invalidate_ce_path(). The other two don't, but they should not trigger this ce removal because this is about D/F conflicts [1]. But let's be safe and invalidate_ce_path() here as well. The second place is keep_entry() which is also used by threeway_merge() to keep higher stage entries. In order to reuse cache-tree we need to invalidate these paths as well. It's not a problem in the past because whenever a higher stage entry is present, cache-tree will not be created [2]. Now we salvage cache-tree even when higher stage entries are present, we need more invalidation. [1] c81935348b (Fix switching to a branch with D/F when current branch has file D. - 2007-03-15) [2] This is probably too strict. We should be able to create and save cache-tree for the directories that do not have conflict entries in cache_tree_update(). And this becomes more important when cache-tree plays bigger role in terms of performance. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano diff --git a/unpack-trees.c b/unpack-trees.c index aa80b65..bc43922 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1774,6 +1774,7 @@ static int verify_clean_subdirectory(const struct cache_entry *ce, if (verify_uptodate(ce2, o)) return -1; add_entry(o, ce2, CE_REMOVE, 0); + invalidate_ce_path(ce, o); mark_ce_used(ce2, o); } cnt++; @@ -2033,6 +2034,8 @@ static int keep_entry(const struct cache_entry *ce, struct unpack_trees_options *o) { add_entry(o, ce, 0, 0); + if (ce_stage(ce)) + invalidate_ce_path(ce, o); return 1; } -- cgit v0.10.2-6-g49f6 From 4592e6080ff0f9eb0218162be0e40b2d6abc979a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sat, 18 Aug 2018 16:41:28 +0200 Subject: cache-tree: verify valid cache-tree in the test suite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes sure that cache-tree is consistent with the index. The main purpose is to catch potential problems by saving the index in unpack_trees() but the line in write_index() would also help spot missing invalidation in other code. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano diff --git a/cache-tree.c b/cache-tree.c index caafbff..c3c2064 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -4,6 +4,7 @@ #include "tree-walk.h" #include "cache-tree.h" #include "object-store.h" +#include "replace-object.h" #ifndef DEBUG #define DEBUG 0 @@ -732,3 +733,80 @@ int update_main_cache_tree(int flags) the_index.cache_tree = cache_tree(); return cache_tree_update(&the_index, flags); } + +static void verify_one(struct index_state *istate, + struct cache_tree *it, + struct strbuf *path) +{ + int i, pos, len = path->len; + struct strbuf tree_buf = STRBUF_INIT; + struct object_id new_oid; + + for (i = 0; i < it->subtree_nr; i++) { + strbuf_addf(path, "%s/", it->down[i]->name); + verify_one(istate, it->down[i]->cache_tree, path); + strbuf_setlen(path, len); + } + + if (it->entry_count < 0 || + /* no verification on tests (t7003) that replace trees */ + lookup_replace_object(the_repository, &it->oid) != &it->oid) + return; + + if (path->len) { + pos = index_name_pos(istate, path->buf, path->len); + pos = -pos - 1; + } else { + pos = 0; + } + + i = 0; + while (i < it->entry_count) { + struct cache_entry *ce = istate->cache[pos + i]; + const char *slash; + struct cache_tree_sub *sub = NULL; + const struct object_id *oid; + const char *name; + unsigned mode; + int entlen; + + if (ce->ce_flags & (CE_STAGEMASK | CE_INTENT_TO_ADD | CE_REMOVE)) + BUG("%s with flags 0x%x should not be in cache-tree", + ce->name, ce->ce_flags); + name = ce->name + path->len; + slash = strchr(name, '/'); + if (slash) { + entlen = slash - name; + sub = find_subtree(it, ce->name + path->len, entlen, 0); + if (!sub || sub->cache_tree->entry_count < 0) + BUG("bad subtree '%.*s'", entlen, name); + oid = &sub->cache_tree->oid; + mode = S_IFDIR; + i += sub->cache_tree->entry_count; + } else { + oid = &ce->oid; + mode = ce->ce_mode; + entlen = ce_namelen(ce) - path->len; + i++; + } + strbuf_addf(&tree_buf, "%o %.*s%c", mode, entlen, name, '\0'); + strbuf_add(&tree_buf, oid->hash, the_hash_algo->rawsz); + } + hash_object_file(tree_buf.buf, tree_buf.len, tree_type, &new_oid); + if (oidcmp(&new_oid, &it->oid)) + BUG("cache-tree for path %.*s does not match. " + "Expected %s got %s", len, path->buf, + oid_to_hex(&new_oid), oid_to_hex(&it->oid)); + strbuf_setlen(path, len); + strbuf_release(&tree_buf); +} + +void cache_tree_verify(struct index_state *istate) +{ + struct strbuf path = STRBUF_INIT; + + if (!istate->cache_tree) + return; + verify_one(istate, istate->cache_tree, &path); + strbuf_release(&path); +} diff --git a/cache-tree.h b/cache-tree.h index 9799e89..c1fde53 100644 --- a/cache-tree.h +++ b/cache-tree.h @@ -32,6 +32,7 @@ struct cache_tree *cache_tree_read(const char *buffer, unsigned long size); int cache_tree_fully_valid(struct cache_tree *); int cache_tree_update(struct index_state *, int); +void cache_tree_verify(struct index_state *); int update_main_cache_tree(int); diff --git a/read-cache.c b/read-cache.c index 5ce40f3..41f313b 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2744,6 +2744,9 @@ int write_locked_index(struct index_state *istate, struct lock_file *lock, int new_shared_index, ret; struct split_index *si = istate->split_index; + if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0)) + cache_tree_verify(istate); + if ((flags & SKIP_IF_UNCHANGED) && !istate->cache_changed) { if (flags & COMMIT_LOCK) rollback_lock_file(lock); diff --git a/t/test-lib.sh b/t/test-lib.sh index 78f7097..5b50f6e 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1083,6 +1083,12 @@ else test_set_prereq C_LOCALE_OUTPUT fi +if test -z "$GIT_TEST_CHECK_CACHE_TREE" +then + GIT_TEST_CHECK_CACHE_TREE=true + export GIT_TEST_CHECK_CACHE_TREE +fi + test_lazy_prereq PIPE ' # test whether the filesystem supports FIFOs test_have_prereq !MINGW,!CYGWIN && diff --git a/unpack-trees.c b/unpack-trees.c index bc43922..3394540 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1578,6 +1578,8 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options if (o->dst_index) { move_index_extensions(&o->result, o->src_index); if (!ret) { + if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0)) + cache_tree_verify(&o->result); if (!o->result.cache_tree) o->result.cache_tree = cache_tree(); if (!cache_tree_fully_valid(o->result.cache_tree)) -- cgit v0.10.2-6-g49f6 From 5f4436a7211de7cd7552f6cf3bbb35147db1a070 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sat, 25 Aug 2018 15:02:09 +0200 Subject: Document update for nd/unpack-trees-with-cache-tree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix an incorrect comment in the new code added in b4da37380b (unpack-trees: optimize walking same trees with cache-tree - 2018-08-18) and document about the new test variable that is enabled by default in test-lib.sh in 4592e6080f (cache-tree: verify valid cache-tree in the test suite - 2018-08-18) Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano diff --git a/t/README b/t/README index 8373a27..0e7cc23 100644 --- a/t/README +++ b/t/README @@ -315,6 +315,10 @@ packs on demand. This normally only happens when the object size is over 2GB. This variable forces the code path on any object larger than bytes. +GIT_TEST_VALIDATE_INDEX_CACHE_ENTRIES= checks that cache-tree +records are valid when the index is written out or after a merge. This +is mostly to catch missing invalidation. Default is true. + Naming Tests ------------ diff --git a/unpack-trees.c b/unpack-trees.c index 3394540..515c374 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -676,8 +676,8 @@ static int index_pos_by_traverse_info(struct name_entry *names, /* * Fast path if we detect that all trees are the same as cache-tree at this - * path. We'll walk these trees recursively using cache-tree/index instead of - * ODB since already know what these trees contain. + * path. We'll walk these trees in an iterative loop using cache-tree/index + * instead of ODB since we already know what these trees contain. */ static int traverse_by_cache_tree(int pos, int nr_entries, int nr_names, struct name_entry *names, -- cgit v0.10.2-6-g49f6