diff options
Diffstat (limited to 'fsmonitor.c')
-rw-r--r-- | fsmonitor.c | 514 |
1 files changed, 443 insertions, 71 deletions
diff --git a/fsmonitor.c b/fsmonitor.c index ab9bfc6..2b17d60 100644 --- a/fsmonitor.c +++ b/fsmonitor.c @@ -1,10 +1,14 @@ -#include "cache.h" +#include "git-compat-util.h" #include "config.h" #include "dir.h" +#include "environment.h" #include "ewah/ewok.h" #include "fsmonitor.h" +#include "fsmonitor-ipc.h" +#include "name-hash.h" #include "run-command.h" #include "strbuf.h" +#include "trace2.h" #define INDEX_EXTENSION_VERSION1 (1) #define INDEX_EXTENSION_VERSION2 (2) @@ -148,15 +152,18 @@ void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate) /* * Call the query-fsmonitor hook passing the last update token of the saved results. */ -static int query_fsmonitor(int version, const char *last_update, struct strbuf *query_result) +static int query_fsmonitor_hook(struct repository *r, + int version, + const char *last_update, + struct strbuf *query_result) { struct child_process cp = CHILD_PROCESS_INIT; int result; - if (!core_fsmonitor) + if (fsm_settings__get_mode(r) != FSMONITOR_MODE_HOOK) return -1; - strvec_push(&cp.args, core_fsmonitor); + strvec_push(&cp.args, fsm_settings__get_hook_path(r)); strvec_pushf(&cp.args, "%d", version); strvec_pushf(&cp.args, "%s", last_update); cp.use_shell = 1; @@ -168,69 +175,333 @@ static int query_fsmonitor(int version, const char *last_update, struct strbuf * if (result) trace2_data_intmax("fsm_hook", NULL, "query/failed", result); - else { + else trace2_data_intmax("fsm_hook", NULL, "query/response-length", query_result->len); - if (fsmonitor_is_trivial_response(query_result)) - trace2_data_intmax("fsm_hook", NULL, - "query/trivial-response", 1); - } - trace2_region_leave("fsm_hook", "query", NULL); return result; } -int fsmonitor_is_trivial_response(const struct strbuf *query_result) +/* + * Invalidate the FSM bit on this CE. This is like mark_fsmonitor_invalid() + * but we've already handled the untracked-cache, so let's not repeat that + * work. This also lets us have a different trace message so that we can + * see everything that was done as part of the refresh-callback. + */ +static void invalidate_ce_fsm(struct cache_entry *ce) +{ + if (ce->ce_flags & CE_FSMONITOR_VALID) { + trace_printf_key(&trace_fsmonitor, + "fsmonitor_refresh_callback INV: '%s'", + ce->name); + ce->ce_flags &= ~CE_FSMONITOR_VALID; + } +} + +static size_t handle_path_with_trailing_slash( + struct index_state *istate, const char *name, int pos); + +/* + * Use the name-hash to do a case-insensitive cache-entry lookup with + * the pathname and invalidate the cache-entry. + * + * Returns the number of cache-entries that we invalidated. + */ +static size_t handle_using_name_hash_icase( + struct index_state *istate, const char *name) { - static char trivial_response[3] = { '\0', '/', '\0' }; + struct cache_entry *ce = NULL; - return query_result->len >= 3 && - !memcmp(trivial_response, - &query_result->buf[query_result->len - 3], 3); + ce = index_file_exists(istate, name, strlen(name), 1); + if (!ce) + return 0; + + /* + * A case-insensitive search in the name-hash using the + * observed pathname found a cache-entry, so the observed path + * is case-incorrect. Invalidate the cache-entry and use the + * correct spelling from the cache-entry to invalidate the + * untracked-cache. Since we now have sparse-directories in + * the index, the observed pathname may represent a regular + * file or a sparse-index directory. + * + * Note that we should not have seen FSEvents for a + * sparse-index directory, but we handle it just in case. + * + * Either way, we know that there are not any cache-entries for + * children inside the cone of the directory, so we don't need to + * do the usual scan. + */ + trace_printf_key(&trace_fsmonitor, + "fsmonitor_refresh_callback MAP: '%s' '%s'", + name, ce->name); + + /* + * NEEDSWORK: We used the name-hash to find the correct + * case-spelling of the pathname in the cache-entry[], so + * technically this is a tracked file or a sparse-directory. + * It should not have any entries in the untracked-cache, so + * we should not need to use the case-corrected spelling to + * invalidate the the untracked-cache. So we may not need to + * do this. For now, I'm going to be conservative and always + * do it; we can revisit this later. + */ + untracked_cache_invalidate_trimmed_path(istate, ce->name, 0); + + invalidate_ce_fsm(ce); + return 1; } -static void fsmonitor_refresh_callback(struct index_state *istate, char *name) +/* + * Use the dir-name-hash to find the correct-case spelling of the + * directory. Use the canonical spelling to invalidate all of the + * cache-entries within the matching cone. + * + * Returns the number of cache-entries that we invalidated. + */ +static size_t handle_using_dir_name_hash_icase( + struct index_state *istate, const char *name) { - int i, len = strlen(name); - if (name[len - 1] == '/') { + struct strbuf canonical_path = STRBUF_INIT; + int pos; + size_t len = strlen(name); + size_t nr_in_cone; + + if (name[len - 1] == '/') + len--; + + if (!index_dir_find(istate, name, len, &canonical_path)) + return 0; /* name is untracked */ + if (!memcmp(name, canonical_path.buf, canonical_path.len)) { + strbuf_release(&canonical_path); /* - * TODO We should binary search to find the first path with - * TODO this directory prefix. Then linearly update entries - * TODO while the prefix matches. Taking care to search without - * TODO the trailing slash -- because '/' sorts after a few - * TODO interesting special chars, like '.' and ' '. + * NEEDSWORK: Our caller already tried an exact match + * and failed to find one. They called us to do an + * ICASE match, so we should never get an exact match, + * so we could promote this to a BUG() here if we + * wanted to. It doesn't hurt anything to just return + * 0 and go on because we should never get here. Or we + * could just get rid of the memcmp() and this "if" + * clause completely. */ + BUG("handle_using_dir_name_hash_icase(%s) did not exact match", + name); + } - /* Mark all entries for the folder invalid */ - for (i = 0; i < istate->cache_nr; i++) { - if (istate->cache[i]->ce_flags & CE_FSMONITOR_VALID && - starts_with(istate->cache[i]->name, name)) - istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID; - } - /* Need to remove the / from the path for the untracked cache */ - name[len - 1] = '\0'; + trace_printf_key(&trace_fsmonitor, + "fsmonitor_refresh_callback MAP: '%s' '%s'", + name, canonical_path.buf); + + /* + * The dir-name-hash only tells us the corrected spelling of + * the prefix. We have to use this canonical path to do a + * lookup in the cache-entry array so that we repeat the + * original search using the case-corrected spelling. + */ + strbuf_addch(&canonical_path, '/'); + pos = index_name_pos(istate, canonical_path.buf, + canonical_path.len); + nr_in_cone = handle_path_with_trailing_slash( + istate, canonical_path.buf, pos); + strbuf_release(&canonical_path); + return nr_in_cone; +} + +/* + * The daemon sent an observed pathname without a trailing slash. + * (This is the normal case.) We do not know if it is a tracked or + * untracked file, a sparse-directory, or a populated directory (on a + * platform such as Windows where FSEvents are not qualified). + * + * The pathname contains the observed case reported by the FS. We + * do not know it is case-correct or -incorrect. + * + * Assume it is case-correct and try an exact match. + * + * Return the number of cache-entries that we invalidated. + */ +static size_t handle_path_without_trailing_slash( + struct index_state *istate, const char *name, int pos) +{ + /* + * Mark the untracked cache dirty for this path (regardless of + * whether or not we find an exact match for it in the index). + * Since the path is unqualified (no trailing slash hint in the + * FSEvent), it may refer to a file or directory. So we should + * not assume one or the other and should always let the untracked + * cache decide what needs to invalidated. + */ + untracked_cache_invalidate_trimmed_path(istate, name, 0); + + if (pos >= 0) { + /* + * An exact match on a tracked file. We assume that we + * do not need to scan forward for a sparse-directory + * cache-entry with the same pathname, nor for a cone + * at that directory. (That is, assume no D/F conflicts.) + */ + invalidate_ce_fsm(istate->cache[pos]); + return 1; } else { - int pos = index_name_pos(istate, name, strlen(name)); + size_t nr_in_cone; + struct strbuf work_path = STRBUF_INIT; - if (pos >= 0) { - struct cache_entry *ce = istate->cache[pos]; - ce->ce_flags &= ~CE_FSMONITOR_VALID; - } + /* + * The negative "pos" gives us the suggested insertion + * point for the pathname (without the trailing slash). + * We need to see if there is a directory with that + * prefix, but there can be lots of pathnames between + * "foo" and "foo/" like "foo-" or "foo-bar", so we + * don't want to do our own scan. + */ + strbuf_add(&work_path, name, strlen(name)); + strbuf_addch(&work_path, '/'); + pos = index_name_pos(istate, work_path.buf, work_path.len); + nr_in_cone = handle_path_with_trailing_slash( + istate, work_path.buf, pos); + strbuf_release(&work_path); + return nr_in_cone; } +} + +/* + * The daemon can decorate directory events, such as a move or rename, + * by adding a trailing slash to the observed name. Use this to + * explicitly invalidate the entire cone under that directory. + * + * The daemon can only reliably do that if the OS FSEvent contains + * sufficient information in the event. + * + * macOS FSEvents have enough information. + * + * Other platforms may or may not be able to do it (and it might + * depend on the type of event (for example, a daemon could lstat() an + * observed pathname after a rename, but not after a delete)). + * + * If we find an exact match in the index for a path with a trailing + * slash, it means that we matched a sparse-index directory in a + * cone-mode sparse-checkout (since that's the only time we have + * directories in the index). We should never see this in practice + * (because sparse directories should not be present and therefore + * not generating FS events). Either way, we can treat them in the + * same way and just invalidate the cache-entry and the untracked + * cache (and in this case, the forward cache-entry scan won't find + * anything and it doesn't hurt to let it run). + * + * Return the number of cache-entries that we invalidated. We will + * use this later to determine if we need to attempt a second + * case-insensitive search on case-insensitive file systems. That is, + * if the search using the observed-case in the FSEvent yields any + * results, we assume the prefix is case-correct. If there are no + * matches, we still don't know if the observed path is simply + * untracked or case-incorrect. + */ +static size_t handle_path_with_trailing_slash( + struct index_state *istate, const char *name, int pos) +{ + int i; + size_t nr_in_cone = 0; + + /* + * Mark the untracked cache dirty for this directory path + * (regardless of whether or not we find an exact match for it + * in the index or find it to be proper prefix of one or more + * files in the index), since the FSEvent is hinting that + * there may be changes on or within the directory. + */ + untracked_cache_invalidate_trimmed_path(istate, name, 0); + + if (pos < 0) + pos = -pos - 1; + + /* Mark all entries for the folder invalid */ + for (i = pos; i < istate->cache_nr; i++) { + if (!starts_with(istate->cache[i]->name, name)) + break; + invalidate_ce_fsm(istate->cache[i]); + nr_in_cone++; + } + + return nr_in_cone; +} + +static void fsmonitor_refresh_callback(struct index_state *istate, char *name) +{ + int len = strlen(name); + int pos = index_name_pos(istate, name, len); + size_t nr_in_cone; + + trace_printf_key(&trace_fsmonitor, + "fsmonitor_refresh_callback '%s' (pos %d)", + name, pos); + + if (name[len - 1] == '/') + nr_in_cone = handle_path_with_trailing_slash(istate, name, pos); + else + nr_in_cone = handle_path_without_trailing_slash(istate, name, pos); /* - * Mark the untracked cache dirty even if it wasn't found in the index - * as it could be a new untracked file. + * If we did not find an exact match for this pathname or any + * cache-entries with this directory prefix and we're on a + * case-insensitive file system, try again using the name-hash + * and dir-name-hash. */ - trace_printf_key(&trace_fsmonitor, "fsmonitor_refresh_callback '%s'", name); - untracked_cache_invalidate_path(istate, name, 0); + if (!nr_in_cone && ignore_case) { + nr_in_cone = handle_using_name_hash_icase(istate, name); + if (!nr_in_cone) + nr_in_cone = handle_using_dir_name_hash_icase( + istate, name); + } + + if (nr_in_cone) + trace_printf_key(&trace_fsmonitor, + "fsmonitor_refresh_callback CNT: %d", + (int)nr_in_cone); } +/* + * The number of pathnames that we need to receive from FSMonitor + * before we force the index to be updated. + * + * Note that any pathname within the set of received paths MAY cause + * cache-entry or istate flag bits to be updated and thus cause the + * index to be updated on disk. + * + * However, the response may contain many paths (such as ignored + * paths) that will not update any flag bits. And thus not force the + * index to be updated. (This is fine and normal.) It also means + * that the token will not be updated in the FSMonitor index + * extension. So the next Git command will find the same token in the + * index, make the same token-relative request, and receive the same + * response (plus any newly changed paths). If this response is large + * (and continues to grow), performance could be impacted. + * + * For example, if the user runs a build and it writes 100K object + * files but doesn't modify any source files, the index would not need + * to be updated. The FSMonitor response (after the build and + * relative to a pre-build token) might be 5MB. Each subsequent Git + * command will receive that same 100K/5MB response until something + * causes the index to be updated. And `refresh_fsmonitor()` will + * have to iterate over those 100K paths each time. + * + * Performance could be improved if we optionally force update the + * index after a very large response and get an updated token into + * the FSMonitor index extension. This should allow subsequent + * commands to get smaller and more current responses. + * + * The value chosen here does not need to be precise. The index + * will be updated automatically the first time the user touches + * a tracked file and causes a command like `git status` to + * update an mtime to be updated and/or set a flag bit. + */ +static int fsmonitor_force_update_threshold = 100; + void refresh_fsmonitor(struct index_state *istate) { + static int warn_once = 0; struct strbuf query_result = STRBUF_INIT; int query_success = 0, hook_version = -1; size_t bol = 0; /* beginning of line */ @@ -238,17 +509,70 @@ void refresh_fsmonitor(struct index_state *istate) struct strbuf last_update_token = STRBUF_INIT; char *buf; unsigned int i; + int is_trivial = 0; + struct repository *r = istate->repo; + enum fsmonitor_mode fsm_mode = fsm_settings__get_mode(r); + enum fsmonitor_reason reason = fsm_settings__get_reason(r); + + if (!warn_once && reason > FSMONITOR_REASON_OK) { + char *msg = fsm_settings__get_incompatible_msg(r, reason); + warn_once = 1; + warning("%s", msg); + free(msg); + } - if (!core_fsmonitor || istate->fsmonitor_has_run_once) + if (fsm_mode <= FSMONITOR_MODE_DISABLED || + istate->fsmonitor_has_run_once) return; - hook_version = fsmonitor_hook_version(); - istate->fsmonitor_has_run_once = 1; trace_printf_key(&trace_fsmonitor, "refresh fsmonitor"); + + if (fsm_mode == FSMONITOR_MODE_IPC) { + query_success = !fsmonitor_ipc__send_query( + istate->fsmonitor_last_update ? + istate->fsmonitor_last_update : "builtin:fake", + &query_result); + if (query_success) { + /* + * The response contains a series of nul terminated + * strings. The first is the new token. + * + * Use `char *buf` as an interlude to trick the CI + * static analysis to let us use `strbuf_addstr()` + * here (and only copy the token) rather than + * `strbuf_addbuf()`. + */ + buf = query_result.buf; + strbuf_addstr(&last_update_token, buf); + bol = last_update_token.len + 1; + is_trivial = query_result.buf[bol] == '/'; + if (is_trivial) + trace2_data_intmax("fsm_client", NULL, + "query/trivial-response", 1); + } else { + /* + * The builtin daemon is not available on this + * platform -OR- we failed to get a response. + * + * Generate a fake token (rather than a V1 + * timestamp) for the index extension. (If + * they switch back to the hook API, we don't + * want ambiguous state.) + */ + strbuf_addstr(&last_update_token, "builtin:fake"); + } + + goto apply_results; + } + + assert(fsm_mode == FSMONITOR_MODE_HOOK); + + hook_version = fsmonitor_hook_version(); + /* - * This could be racy so save the date/time now and query_fsmonitor + * This could be racy so save the date/time now and query_fsmonitor_hook * should be inclusive to ensure we don't miss potential changes. */ last_update = getnanotime(); @@ -256,13 +580,14 @@ void refresh_fsmonitor(struct index_state *istate) strbuf_addf(&last_update_token, "%"PRIu64"", last_update); /* - * If we have a last update token, call query_fsmonitor for the set of + * If we have a last update token, call query_fsmonitor_hook for the set of * changes since that token, else assume everything is possibly dirty * and check it all. */ if (istate->fsmonitor_last_update) { if (hook_version == -1 || hook_version == HOOK_INTERFACE_VERSION2) { - query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION2, + query_success = !query_fsmonitor_hook( + r, HOOK_INTERFACE_VERSION2, istate->fsmonitor_last_update, &query_result); if (query_success) { @@ -283,6 +608,7 @@ void refresh_fsmonitor(struct index_state *istate) query_success = 0; } else { bol = last_update_token.len + 1; + is_trivial = query_result.buf[bol] == '/'; } } else if (hook_version < 0) { hook_version = HOOK_INTERFACE_VERSION1; @@ -292,37 +618,83 @@ void refresh_fsmonitor(struct index_state *istate) } if (hook_version == HOOK_INTERFACE_VERSION1) { - query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION1, + query_success = !query_fsmonitor_hook( + r, HOOK_INTERFACE_VERSION1, istate->fsmonitor_last_update, &query_result); + if (query_success) + is_trivial = query_result.buf[0] == '/'; } - trace_performance_since(last_update, "fsmonitor process '%s'", core_fsmonitor); - trace_printf_key(&trace_fsmonitor, "fsmonitor process '%s' returned %s", - core_fsmonitor, query_success ? "success" : "failure"); + if (is_trivial) + trace2_data_intmax("fsm_hook", NULL, + "query/trivial-response", 1); + + trace_performance_since(last_update, "fsmonitor process '%s'", + fsm_settings__get_hook_path(r)); + trace_printf_key(&trace_fsmonitor, + "fsmonitor process '%s' returned %s", + fsm_settings__get_hook_path(r), + query_success ? "success" : "failure"); } - /* a fsmonitor process can return '/' to indicate all entries are invalid */ - if (query_success && query_result.buf[bol] != '/') { - /* Mark all entries returned by the monitor as dirty */ +apply_results: + /* + * The response from FSMonitor (excluding the header token) is + * either: + * + * [a] a (possibly empty) list of NUL delimited relative + * pathnames of changed paths. This list can contain + * files and directories. Directories have a trailing + * slash. + * + * [b] a single '/' to indicate the provider had no + * information and that we should consider everything + * invalid. We call this a trivial response. + */ + trace2_region_enter("fsmonitor", "apply_results", istate->repo); + + if (query_success && !is_trivial) { + /* + * Mark all pathnames returned by the monitor as dirty. + * + * This updates both the cache-entries and the untracked-cache. + */ + int count = 0; + buf = query_result.buf; for (i = bol; i < query_result.len; i++) { if (buf[i] != '\0') continue; fsmonitor_refresh_callback(istate, buf + bol); bol = i + 1; + count++; } - if (bol < query_result.len) + if (bol < query_result.len) { fsmonitor_refresh_callback(istate, buf + bol); + count++; + } /* Now mark the untracked cache for fsmonitor usage */ if (istate->untracked) istate->untracked->use_fsmonitor = 1; - } else { - /* We only want to run the post index changed hook if we've actually changed entries, so keep track - * if we actually changed entries or not */ + if (count > fsmonitor_force_update_threshold) + istate->cache_changed |= FSMONITOR_CHANGED; + + trace2_data_intmax("fsmonitor", istate->repo, "apply_count", + count); + + } else { + /* + * We failed to get a response or received a trivial response, + * so invalidate everything. + * + * We only want to run the post index changed hook if + * we've actually changed entries, so keep track if we + * actually changed entries or not. + */ int is_cache_changed = 0; - /* Mark all entries invalid */ + for (i = 0; i < istate->cache_nr; i++) { if (istate->cache[i]->ce_flags & CE_FSMONITOR_VALID) { is_cache_changed = 1; @@ -330,13 +702,18 @@ void refresh_fsmonitor(struct index_state *istate) } } - /* If we're going to check every file, ensure we save the results */ + /* + * If we're going to check every file, ensure we save + * the results. + */ if (is_cache_changed) istate->cache_changed |= FSMONITOR_CHANGED; if (istate->untracked) istate->untracked->use_fsmonitor = 0; } + trace2_region_leave("fsmonitor", "apply_results", istate->repo); + strbuf_release(&query_result); /* Now that we've updated istate, save the last_update_token */ @@ -411,12 +788,15 @@ void remove_fsmonitor(struct index_state *istate) void tweak_fsmonitor(struct index_state *istate) { unsigned int i; - int fsmonitor_enabled = git_config_get_fsmonitor(); + int fsmonitor_enabled = (fsm_settings__get_mode(istate->repo) + > FSMONITOR_MODE_DISABLED); if (istate->fsmonitor_dirty) { if (fsmonitor_enabled) { /* Mark all entries valid */ for (i = 0; i < istate->cache_nr; i++) { + if (S_ISGITLINK(istate->cache[i]->ce_mode)) + continue; istate->cache[i]->ce_flags |= CE_FSMONITOR_VALID; } @@ -431,16 +811,8 @@ void tweak_fsmonitor(struct index_state *istate) istate->fsmonitor_dirty = NULL; } - switch (fsmonitor_enabled) { - case -1: /* keep: do nothing */ - break; - case 0: /* false */ - remove_fsmonitor(istate); - break; - case 1: /* true */ + if (fsmonitor_enabled) add_fsmonitor(istate); - break; - default: /* unknown value: do nothing */ - break; - } + else + remove_fsmonitor(istate); } |