summaryrefslogtreecommitdiff
path: root/fsmonitor.c
diff options
context:
space:
mode:
Diffstat (limited to 'fsmonitor.c')
-rw-r--r--fsmonitor.c514
1 files changed, 443 insertions, 71 deletions
diff --git a/fsmonitor.c b/fsmonitor.c
index ab9bfc6..2b17d60 100644
--- a/fsmonitor.c
+++ b/fsmonitor.c
@@ -1,10 +1,14 @@
-#include "cache.h"
+#include "git-compat-util.h"
#include "config.h"
#include "dir.h"
+#include "environment.h"
#include "ewah/ewok.h"
#include "fsmonitor.h"
+#include "fsmonitor-ipc.h"
+#include "name-hash.h"
#include "run-command.h"
#include "strbuf.h"
+#include "trace2.h"
#define INDEX_EXTENSION_VERSION1 (1)
#define INDEX_EXTENSION_VERSION2 (2)
@@ -148,15 +152,18 @@ void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate)
/*
* Call the query-fsmonitor hook passing the last update token of the saved results.
*/
-static int query_fsmonitor(int version, const char *last_update, struct strbuf *query_result)
+static int query_fsmonitor_hook(struct repository *r,
+ int version,
+ const char *last_update,
+ struct strbuf *query_result)
{
struct child_process cp = CHILD_PROCESS_INIT;
int result;
- if (!core_fsmonitor)
+ if (fsm_settings__get_mode(r) != FSMONITOR_MODE_HOOK)
return -1;
- strvec_push(&cp.args, core_fsmonitor);
+ strvec_push(&cp.args, fsm_settings__get_hook_path(r));
strvec_pushf(&cp.args, "%d", version);
strvec_pushf(&cp.args, "%s", last_update);
cp.use_shell = 1;
@@ -168,69 +175,333 @@ static int query_fsmonitor(int version, const char *last_update, struct strbuf *
if (result)
trace2_data_intmax("fsm_hook", NULL, "query/failed", result);
- else {
+ else
trace2_data_intmax("fsm_hook", NULL, "query/response-length",
query_result->len);
- if (fsmonitor_is_trivial_response(query_result))
- trace2_data_intmax("fsm_hook", NULL,
- "query/trivial-response", 1);
- }
-
trace2_region_leave("fsm_hook", "query", NULL);
return result;
}
-int fsmonitor_is_trivial_response(const struct strbuf *query_result)
+/*
+ * Invalidate the FSM bit on this CE. This is like mark_fsmonitor_invalid()
+ * but we've already handled the untracked-cache, so let's not repeat that
+ * work. This also lets us have a different trace message so that we can
+ * see everything that was done as part of the refresh-callback.
+ */
+static void invalidate_ce_fsm(struct cache_entry *ce)
+{
+ if (ce->ce_flags & CE_FSMONITOR_VALID) {
+ trace_printf_key(&trace_fsmonitor,
+ "fsmonitor_refresh_callback INV: '%s'",
+ ce->name);
+ ce->ce_flags &= ~CE_FSMONITOR_VALID;
+ }
+}
+
+static size_t handle_path_with_trailing_slash(
+ struct index_state *istate, const char *name, int pos);
+
+/*
+ * Use the name-hash to do a case-insensitive cache-entry lookup with
+ * the pathname and invalidate the cache-entry.
+ *
+ * Returns the number of cache-entries that we invalidated.
+ */
+static size_t handle_using_name_hash_icase(
+ struct index_state *istate, const char *name)
{
- static char trivial_response[3] = { '\0', '/', '\0' };
+ struct cache_entry *ce = NULL;
- return query_result->len >= 3 &&
- !memcmp(trivial_response,
- &query_result->buf[query_result->len - 3], 3);
+ ce = index_file_exists(istate, name, strlen(name), 1);
+ if (!ce)
+ return 0;
+
+ /*
+ * A case-insensitive search in the name-hash using the
+ * observed pathname found a cache-entry, so the observed path
+ * is case-incorrect. Invalidate the cache-entry and use the
+ * correct spelling from the cache-entry to invalidate the
+ * untracked-cache. Since we now have sparse-directories in
+ * the index, the observed pathname may represent a regular
+ * file or a sparse-index directory.
+ *
+ * Note that we should not have seen FSEvents for a
+ * sparse-index directory, but we handle it just in case.
+ *
+ * Either way, we know that there are not any cache-entries for
+ * children inside the cone of the directory, so we don't need to
+ * do the usual scan.
+ */
+ trace_printf_key(&trace_fsmonitor,
+ "fsmonitor_refresh_callback MAP: '%s' '%s'",
+ name, ce->name);
+
+ /*
+ * NEEDSWORK: We used the name-hash to find the correct
+ * case-spelling of the pathname in the cache-entry[], so
+ * technically this is a tracked file or a sparse-directory.
+ * It should not have any entries in the untracked-cache, so
+ * we should not need to use the case-corrected spelling to
+ * invalidate the the untracked-cache. So we may not need to
+ * do this. For now, I'm going to be conservative and always
+ * do it; we can revisit this later.
+ */
+ untracked_cache_invalidate_trimmed_path(istate, ce->name, 0);
+
+ invalidate_ce_fsm(ce);
+ return 1;
}
-static void fsmonitor_refresh_callback(struct index_state *istate, char *name)
+/*
+ * Use the dir-name-hash to find the correct-case spelling of the
+ * directory. Use the canonical spelling to invalidate all of the
+ * cache-entries within the matching cone.
+ *
+ * Returns the number of cache-entries that we invalidated.
+ */
+static size_t handle_using_dir_name_hash_icase(
+ struct index_state *istate, const char *name)
{
- int i, len = strlen(name);
- if (name[len - 1] == '/') {
+ struct strbuf canonical_path = STRBUF_INIT;
+ int pos;
+ size_t len = strlen(name);
+ size_t nr_in_cone;
+
+ if (name[len - 1] == '/')
+ len--;
+
+ if (!index_dir_find(istate, name, len, &canonical_path))
+ return 0; /* name is untracked */
+ if (!memcmp(name, canonical_path.buf, canonical_path.len)) {
+ strbuf_release(&canonical_path);
/*
- * TODO We should binary search to find the first path with
- * TODO this directory prefix. Then linearly update entries
- * TODO while the prefix matches. Taking care to search without
- * TODO the trailing slash -- because '/' sorts after a few
- * TODO interesting special chars, like '.' and ' '.
+ * NEEDSWORK: Our caller already tried an exact match
+ * and failed to find one. They called us to do an
+ * ICASE match, so we should never get an exact match,
+ * so we could promote this to a BUG() here if we
+ * wanted to. It doesn't hurt anything to just return
+ * 0 and go on because we should never get here. Or we
+ * could just get rid of the memcmp() and this "if"
+ * clause completely.
*/
+ BUG("handle_using_dir_name_hash_icase(%s) did not exact match",
+ name);
+ }
- /* Mark all entries for the folder invalid */
- for (i = 0; i < istate->cache_nr; i++) {
- if (istate->cache[i]->ce_flags & CE_FSMONITOR_VALID &&
- starts_with(istate->cache[i]->name, name))
- istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID;
- }
- /* Need to remove the / from the path for the untracked cache */
- name[len - 1] = '\0';
+ trace_printf_key(&trace_fsmonitor,
+ "fsmonitor_refresh_callback MAP: '%s' '%s'",
+ name, canonical_path.buf);
+
+ /*
+ * The dir-name-hash only tells us the corrected spelling of
+ * the prefix. We have to use this canonical path to do a
+ * lookup in the cache-entry array so that we repeat the
+ * original search using the case-corrected spelling.
+ */
+ strbuf_addch(&canonical_path, '/');
+ pos = index_name_pos(istate, canonical_path.buf,
+ canonical_path.len);
+ nr_in_cone = handle_path_with_trailing_slash(
+ istate, canonical_path.buf, pos);
+ strbuf_release(&canonical_path);
+ return nr_in_cone;
+}
+
+/*
+ * The daemon sent an observed pathname without a trailing slash.
+ * (This is the normal case.) We do not know if it is a tracked or
+ * untracked file, a sparse-directory, or a populated directory (on a
+ * platform such as Windows where FSEvents are not qualified).
+ *
+ * The pathname contains the observed case reported by the FS. We
+ * do not know it is case-correct or -incorrect.
+ *
+ * Assume it is case-correct and try an exact match.
+ *
+ * Return the number of cache-entries that we invalidated.
+ */
+static size_t handle_path_without_trailing_slash(
+ struct index_state *istate, const char *name, int pos)
+{
+ /*
+ * Mark the untracked cache dirty for this path (regardless of
+ * whether or not we find an exact match for it in the index).
+ * Since the path is unqualified (no trailing slash hint in the
+ * FSEvent), it may refer to a file or directory. So we should
+ * not assume one or the other and should always let the untracked
+ * cache decide what needs to invalidated.
+ */
+ untracked_cache_invalidate_trimmed_path(istate, name, 0);
+
+ if (pos >= 0) {
+ /*
+ * An exact match on a tracked file. We assume that we
+ * do not need to scan forward for a sparse-directory
+ * cache-entry with the same pathname, nor for a cone
+ * at that directory. (That is, assume no D/F conflicts.)
+ */
+ invalidate_ce_fsm(istate->cache[pos]);
+ return 1;
} else {
- int pos = index_name_pos(istate, name, strlen(name));
+ size_t nr_in_cone;
+ struct strbuf work_path = STRBUF_INIT;
- if (pos >= 0) {
- struct cache_entry *ce = istate->cache[pos];
- ce->ce_flags &= ~CE_FSMONITOR_VALID;
- }
+ /*
+ * The negative "pos" gives us the suggested insertion
+ * point for the pathname (without the trailing slash).
+ * We need to see if there is a directory with that
+ * prefix, but there can be lots of pathnames between
+ * "foo" and "foo/" like "foo-" or "foo-bar", so we
+ * don't want to do our own scan.
+ */
+ strbuf_add(&work_path, name, strlen(name));
+ strbuf_addch(&work_path, '/');
+ pos = index_name_pos(istate, work_path.buf, work_path.len);
+ nr_in_cone = handle_path_with_trailing_slash(
+ istate, work_path.buf, pos);
+ strbuf_release(&work_path);
+ return nr_in_cone;
}
+}
+
+/*
+ * The daemon can decorate directory events, such as a move or rename,
+ * by adding a trailing slash to the observed name. Use this to
+ * explicitly invalidate the entire cone under that directory.
+ *
+ * The daemon can only reliably do that if the OS FSEvent contains
+ * sufficient information in the event.
+ *
+ * macOS FSEvents have enough information.
+ *
+ * Other platforms may or may not be able to do it (and it might
+ * depend on the type of event (for example, a daemon could lstat() an
+ * observed pathname after a rename, but not after a delete)).
+ *
+ * If we find an exact match in the index for a path with a trailing
+ * slash, it means that we matched a sparse-index directory in a
+ * cone-mode sparse-checkout (since that's the only time we have
+ * directories in the index). We should never see this in practice
+ * (because sparse directories should not be present and therefore
+ * not generating FS events). Either way, we can treat them in the
+ * same way and just invalidate the cache-entry and the untracked
+ * cache (and in this case, the forward cache-entry scan won't find
+ * anything and it doesn't hurt to let it run).
+ *
+ * Return the number of cache-entries that we invalidated. We will
+ * use this later to determine if we need to attempt a second
+ * case-insensitive search on case-insensitive file systems. That is,
+ * if the search using the observed-case in the FSEvent yields any
+ * results, we assume the prefix is case-correct. If there are no
+ * matches, we still don't know if the observed path is simply
+ * untracked or case-incorrect.
+ */
+static size_t handle_path_with_trailing_slash(
+ struct index_state *istate, const char *name, int pos)
+{
+ int i;
+ size_t nr_in_cone = 0;
+
+ /*
+ * Mark the untracked cache dirty for this directory path
+ * (regardless of whether or not we find an exact match for it
+ * in the index or find it to be proper prefix of one or more
+ * files in the index), since the FSEvent is hinting that
+ * there may be changes on or within the directory.
+ */
+ untracked_cache_invalidate_trimmed_path(istate, name, 0);
+
+ if (pos < 0)
+ pos = -pos - 1;
+
+ /* Mark all entries for the folder invalid */
+ for (i = pos; i < istate->cache_nr; i++) {
+ if (!starts_with(istate->cache[i]->name, name))
+ break;
+ invalidate_ce_fsm(istate->cache[i]);
+ nr_in_cone++;
+ }
+
+ return nr_in_cone;
+}
+
+static void fsmonitor_refresh_callback(struct index_state *istate, char *name)
+{
+ int len = strlen(name);
+ int pos = index_name_pos(istate, name, len);
+ size_t nr_in_cone;
+
+ trace_printf_key(&trace_fsmonitor,
+ "fsmonitor_refresh_callback '%s' (pos %d)",
+ name, pos);
+
+ if (name[len - 1] == '/')
+ nr_in_cone = handle_path_with_trailing_slash(istate, name, pos);
+ else
+ nr_in_cone = handle_path_without_trailing_slash(istate, name, pos);
/*
- * Mark the untracked cache dirty even if it wasn't found in the index
- * as it could be a new untracked file.
+ * If we did not find an exact match for this pathname or any
+ * cache-entries with this directory prefix and we're on a
+ * case-insensitive file system, try again using the name-hash
+ * and dir-name-hash.
*/
- trace_printf_key(&trace_fsmonitor, "fsmonitor_refresh_callback '%s'", name);
- untracked_cache_invalidate_path(istate, name, 0);
+ if (!nr_in_cone && ignore_case) {
+ nr_in_cone = handle_using_name_hash_icase(istate, name);
+ if (!nr_in_cone)
+ nr_in_cone = handle_using_dir_name_hash_icase(
+ istate, name);
+ }
+
+ if (nr_in_cone)
+ trace_printf_key(&trace_fsmonitor,
+ "fsmonitor_refresh_callback CNT: %d",
+ (int)nr_in_cone);
}
+/*
+ * The number of pathnames that we need to receive from FSMonitor
+ * before we force the index to be updated.
+ *
+ * Note that any pathname within the set of received paths MAY cause
+ * cache-entry or istate flag bits to be updated and thus cause the
+ * index to be updated on disk.
+ *
+ * However, the response may contain many paths (such as ignored
+ * paths) that will not update any flag bits. And thus not force the
+ * index to be updated. (This is fine and normal.) It also means
+ * that the token will not be updated in the FSMonitor index
+ * extension. So the next Git command will find the same token in the
+ * index, make the same token-relative request, and receive the same
+ * response (plus any newly changed paths). If this response is large
+ * (and continues to grow), performance could be impacted.
+ *
+ * For example, if the user runs a build and it writes 100K object
+ * files but doesn't modify any source files, the index would not need
+ * to be updated. The FSMonitor response (after the build and
+ * relative to a pre-build token) might be 5MB. Each subsequent Git
+ * command will receive that same 100K/5MB response until something
+ * causes the index to be updated. And `refresh_fsmonitor()` will
+ * have to iterate over those 100K paths each time.
+ *
+ * Performance could be improved if we optionally force update the
+ * index after a very large response and get an updated token into
+ * the FSMonitor index extension. This should allow subsequent
+ * commands to get smaller and more current responses.
+ *
+ * The value chosen here does not need to be precise. The index
+ * will be updated automatically the first time the user touches
+ * a tracked file and causes a command like `git status` to
+ * update an mtime to be updated and/or set a flag bit.
+ */
+static int fsmonitor_force_update_threshold = 100;
+
void refresh_fsmonitor(struct index_state *istate)
{
+ static int warn_once = 0;
struct strbuf query_result = STRBUF_INIT;
int query_success = 0, hook_version = -1;
size_t bol = 0; /* beginning of line */
@@ -238,17 +509,70 @@ void refresh_fsmonitor(struct index_state *istate)
struct strbuf last_update_token = STRBUF_INIT;
char *buf;
unsigned int i;
+ int is_trivial = 0;
+ struct repository *r = istate->repo;
+ enum fsmonitor_mode fsm_mode = fsm_settings__get_mode(r);
+ enum fsmonitor_reason reason = fsm_settings__get_reason(r);
+
+ if (!warn_once && reason > FSMONITOR_REASON_OK) {
+ char *msg = fsm_settings__get_incompatible_msg(r, reason);
+ warn_once = 1;
+ warning("%s", msg);
+ free(msg);
+ }
- if (!core_fsmonitor || istate->fsmonitor_has_run_once)
+ if (fsm_mode <= FSMONITOR_MODE_DISABLED ||
+ istate->fsmonitor_has_run_once)
return;
- hook_version = fsmonitor_hook_version();
-
istate->fsmonitor_has_run_once = 1;
trace_printf_key(&trace_fsmonitor, "refresh fsmonitor");
+
+ if (fsm_mode == FSMONITOR_MODE_IPC) {
+ query_success = !fsmonitor_ipc__send_query(
+ istate->fsmonitor_last_update ?
+ istate->fsmonitor_last_update : "builtin:fake",
+ &query_result);
+ if (query_success) {
+ /*
+ * The response contains a series of nul terminated
+ * strings. The first is the new token.
+ *
+ * Use `char *buf` as an interlude to trick the CI
+ * static analysis to let us use `strbuf_addstr()`
+ * here (and only copy the token) rather than
+ * `strbuf_addbuf()`.
+ */
+ buf = query_result.buf;
+ strbuf_addstr(&last_update_token, buf);
+ bol = last_update_token.len + 1;
+ is_trivial = query_result.buf[bol] == '/';
+ if (is_trivial)
+ trace2_data_intmax("fsm_client", NULL,
+ "query/trivial-response", 1);
+ } else {
+ /*
+ * The builtin daemon is not available on this
+ * platform -OR- we failed to get a response.
+ *
+ * Generate a fake token (rather than a V1
+ * timestamp) for the index extension. (If
+ * they switch back to the hook API, we don't
+ * want ambiguous state.)
+ */
+ strbuf_addstr(&last_update_token, "builtin:fake");
+ }
+
+ goto apply_results;
+ }
+
+ assert(fsm_mode == FSMONITOR_MODE_HOOK);
+
+ hook_version = fsmonitor_hook_version();
+
/*
- * This could be racy so save the date/time now and query_fsmonitor
+ * This could be racy so save the date/time now and query_fsmonitor_hook
* should be inclusive to ensure we don't miss potential changes.
*/
last_update = getnanotime();
@@ -256,13 +580,14 @@ void refresh_fsmonitor(struct index_state *istate)
strbuf_addf(&last_update_token, "%"PRIu64"", last_update);
/*
- * If we have a last update token, call query_fsmonitor for the set of
+ * If we have a last update token, call query_fsmonitor_hook for the set of
* changes since that token, else assume everything is possibly dirty
* and check it all.
*/
if (istate->fsmonitor_last_update) {
if (hook_version == -1 || hook_version == HOOK_INTERFACE_VERSION2) {
- query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION2,
+ query_success = !query_fsmonitor_hook(
+ r, HOOK_INTERFACE_VERSION2,
istate->fsmonitor_last_update, &query_result);
if (query_success) {
@@ -283,6 +608,7 @@ void refresh_fsmonitor(struct index_state *istate)
query_success = 0;
} else {
bol = last_update_token.len + 1;
+ is_trivial = query_result.buf[bol] == '/';
}
} else if (hook_version < 0) {
hook_version = HOOK_INTERFACE_VERSION1;
@@ -292,37 +618,83 @@ void refresh_fsmonitor(struct index_state *istate)
}
if (hook_version == HOOK_INTERFACE_VERSION1) {
- query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION1,
+ query_success = !query_fsmonitor_hook(
+ r, HOOK_INTERFACE_VERSION1,
istate->fsmonitor_last_update, &query_result);
+ if (query_success)
+ is_trivial = query_result.buf[0] == '/';
}
- trace_performance_since(last_update, "fsmonitor process '%s'", core_fsmonitor);
- trace_printf_key(&trace_fsmonitor, "fsmonitor process '%s' returned %s",
- core_fsmonitor, query_success ? "success" : "failure");
+ if (is_trivial)
+ trace2_data_intmax("fsm_hook", NULL,
+ "query/trivial-response", 1);
+
+ trace_performance_since(last_update, "fsmonitor process '%s'",
+ fsm_settings__get_hook_path(r));
+ trace_printf_key(&trace_fsmonitor,
+ "fsmonitor process '%s' returned %s",
+ fsm_settings__get_hook_path(r),
+ query_success ? "success" : "failure");
}
- /* a fsmonitor process can return '/' to indicate all entries are invalid */
- if (query_success && query_result.buf[bol] != '/') {
- /* Mark all entries returned by the monitor as dirty */
+apply_results:
+ /*
+ * The response from FSMonitor (excluding the header token) is
+ * either:
+ *
+ * [a] a (possibly empty) list of NUL delimited relative
+ * pathnames of changed paths. This list can contain
+ * files and directories. Directories have a trailing
+ * slash.
+ *
+ * [b] a single '/' to indicate the provider had no
+ * information and that we should consider everything
+ * invalid. We call this a trivial response.
+ */
+ trace2_region_enter("fsmonitor", "apply_results", istate->repo);
+
+ if (query_success && !is_trivial) {
+ /*
+ * Mark all pathnames returned by the monitor as dirty.
+ *
+ * This updates both the cache-entries and the untracked-cache.
+ */
+ int count = 0;
+
buf = query_result.buf;
for (i = bol; i < query_result.len; i++) {
if (buf[i] != '\0')
continue;
fsmonitor_refresh_callback(istate, buf + bol);
bol = i + 1;
+ count++;
}
- if (bol < query_result.len)
+ if (bol < query_result.len) {
fsmonitor_refresh_callback(istate, buf + bol);
+ count++;
+ }
/* Now mark the untracked cache for fsmonitor usage */
if (istate->untracked)
istate->untracked->use_fsmonitor = 1;
- } else {
- /* We only want to run the post index changed hook if we've actually changed entries, so keep track
- * if we actually changed entries or not */
+ if (count > fsmonitor_force_update_threshold)
+ istate->cache_changed |= FSMONITOR_CHANGED;
+
+ trace2_data_intmax("fsmonitor", istate->repo, "apply_count",
+ count);
+
+ } else {
+ /*
+ * We failed to get a response or received a trivial response,
+ * so invalidate everything.
+ *
+ * We only want to run the post index changed hook if
+ * we've actually changed entries, so keep track if we
+ * actually changed entries or not.
+ */
int is_cache_changed = 0;
- /* Mark all entries invalid */
+
for (i = 0; i < istate->cache_nr; i++) {
if (istate->cache[i]->ce_flags & CE_FSMONITOR_VALID) {
is_cache_changed = 1;
@@ -330,13 +702,18 @@ void refresh_fsmonitor(struct index_state *istate)
}
}
- /* If we're going to check every file, ensure we save the results */
+ /*
+ * If we're going to check every file, ensure we save
+ * the results.
+ */
if (is_cache_changed)
istate->cache_changed |= FSMONITOR_CHANGED;
if (istate->untracked)
istate->untracked->use_fsmonitor = 0;
}
+ trace2_region_leave("fsmonitor", "apply_results", istate->repo);
+
strbuf_release(&query_result);
/* Now that we've updated istate, save the last_update_token */
@@ -411,12 +788,15 @@ void remove_fsmonitor(struct index_state *istate)
void tweak_fsmonitor(struct index_state *istate)
{
unsigned int i;
- int fsmonitor_enabled = git_config_get_fsmonitor();
+ int fsmonitor_enabled = (fsm_settings__get_mode(istate->repo)
+ > FSMONITOR_MODE_DISABLED);
if (istate->fsmonitor_dirty) {
if (fsmonitor_enabled) {
/* Mark all entries valid */
for (i = 0; i < istate->cache_nr; i++) {
+ if (S_ISGITLINK(istate->cache[i]->ce_mode))
+ continue;
istate->cache[i]->ce_flags |= CE_FSMONITOR_VALID;
}
@@ -431,16 +811,8 @@ void tweak_fsmonitor(struct index_state *istate)
istate->fsmonitor_dirty = NULL;
}
- switch (fsmonitor_enabled) {
- case -1: /* keep: do nothing */
- break;
- case 0: /* false */
- remove_fsmonitor(istate);
- break;
- case 1: /* true */
+ if (fsmonitor_enabled)
add_fsmonitor(istate);
- break;
- default: /* unknown value: do nothing */
- break;
- }
+ else
+ remove_fsmonitor(istate);
}