From 4bd5b7dacc404e6b733d9ab6744429c5027bb5e1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 10 Nov 2007 00:15:03 -0800 Subject: ce_match_stat, run_diff_files: use symbolic constants for readability ce_match_stat() can be told: (1) to ignore CE_VALID bit (used under "assume unchanged" mode) and perform the stat comparison anyway; (2) not to perform the contents comparison for racily clean entries and report mismatch of cached stat information; using its "option" parameter. Give them symbolic constants. Similarly, run_diff_files() can be told not to report anything on removed paths. Also give it a symbolic constant for that. Signed-off-by: Junio C Hamano diff --git a/builtin-apply.c b/builtin-apply.c index 5cc90e6..0fff02e 100644 --- a/builtin-apply.c +++ b/builtin-apply.c @@ -2099,7 +2099,7 @@ static int verify_index_match(struct cache_entry *ce, struct stat *st) return -1; return 0; } - return ce_match_stat(ce, st, 1); + return ce_match_stat(ce, st, CE_MATCH_IGNORE_VALID); } static int check_patch(struct patch *patch, struct patch *prev_patch) diff --git a/cache.h b/cache.h index fc195bc..31af16a 100644 --- a/cache.h +++ b/cache.h @@ -174,8 +174,8 @@ extern struct index_state the_index; #define remove_file_from_cache(path) remove_file_from_index(&the_index, (path)) #define add_file_to_cache(path, verbose) add_file_to_index(&the_index, (path), (verbose)) #define refresh_cache(flags) refresh_index(&the_index, (flags), NULL, NULL) -#define ce_match_stat(ce, st, really) ie_match_stat(&the_index, (ce), (st), (really)) -#define ce_modified(ce, st, really) ie_modified(&the_index, (ce), (st), (really)) +#define ce_match_stat(ce, st, options) ie_match_stat(&the_index, (ce), (st), (options)) +#define ce_modified(ce, st, options) ie_modified(&the_index, (ce), (st), (options)) #endif enum object_type { @@ -266,8 +266,14 @@ extern int remove_file_from_index(struct index_state *, const char *path); extern int add_file_to_index(struct index_state *, const char *path, int verbose); extern struct cache_entry *make_cache_entry(unsigned int mode, const unsigned char *sha1, const char *path, int stage, int refresh); extern int ce_same_name(struct cache_entry *a, struct cache_entry *b); -extern int ie_match_stat(struct index_state *, struct cache_entry *, struct stat *, int); -extern int ie_modified(struct index_state *, struct cache_entry *, struct stat *, int); + +/* do stat comparison even if CE_VALID is true */ +#define CE_MATCH_IGNORE_VALID 01 +/* do not check the contents but report dirty on racily-clean entries */ +#define CE_MATCH_RACY_IS_DIRTY 02 +extern int ie_match_stat(struct index_state *, struct cache_entry *, struct stat *, unsigned int); +extern int ie_modified(struct index_state *, struct cache_entry *, struct stat *, unsigned int); + extern int ce_path_match(const struct cache_entry *ce, const char **pathspec); extern int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, enum object_type type, const char *path); extern int read_fd(int fd, char **return_buf, unsigned long *return_size); diff --git a/check-racy.c b/check-racy.c index d6a08b4..00d92a1 100644 --- a/check-racy.c +++ b/check-racy.c @@ -18,7 +18,7 @@ int main(int ac, char **av) if (ce_match_stat(ce, &st, 0)) dirty++; - else if (ce_match_stat(ce, &st, 2)) + else if (ce_match_stat(ce, &st, CE_MATCH_RACY_IS_DIRTY)) racy++; else clean++; diff --git a/diff-lib.c b/diff-lib.c index da55713..9f8afbe 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -173,9 +173,10 @@ static int is_in_index(const char *path) } static int handle_diff_files_args(struct rev_info *revs, - int argc, const char **argv, int *silent) + int argc, const char **argv, + unsigned int *options) { - *silent = 0; + *options = 0; /* revs->max_count == -2 means --no-index */ while (1 < argc && argv[1][0] == '-') { @@ -192,7 +193,7 @@ static int handle_diff_files_args(struct rev_info *revs, revs->diffopt.no_index = 1; } else if (!strcmp(argv[1], "-q")) - *silent = 1; + *options |= DIFF_SILENT_ON_REMOVED; else return error("invalid option: %s", argv[1]); argv++; argc--; @@ -305,9 +306,9 @@ int setup_diff_no_index(struct rev_info *revs, int run_diff_files_cmd(struct rev_info *revs, int argc, const char **argv) { - int silent_on_removed; + unsigned int options; - if (handle_diff_files_args(revs, argc, argv, &silent_on_removed)) + if (handle_diff_files_args(revs, argc, argv, &options)) return -1; if (revs->diffopt.no_index) { @@ -329,13 +330,14 @@ int run_diff_files_cmd(struct rev_info *revs, int argc, const char **argv) perror("read_cache"); return -1; } - return run_diff_files(revs, silent_on_removed); + return run_diff_files(revs, options); } -int run_diff_files(struct rev_info *revs, int silent_on_removed) +int run_diff_files(struct rev_info *revs, unsigned int option) { int entries, i; int diff_unmerged_stage = revs->max_count; + int silent_on_removed = option & DIFF_SILENT_ON_REMOVED; if (diff_unmerged_stage < 0) diff_unmerged_stage = 2; diff --git a/diff.h b/diff.h index 4546aad..de533da 100644 --- a/diff.h +++ b/diff.h @@ -224,7 +224,9 @@ extern void diff_flush(struct diff_options*); extern const char *diff_unique_abbrev(const unsigned char *, int); -extern int run_diff_files(struct rev_info *revs, int silent_on_removed); +/* do not report anything on removed paths */ +#define DIFF_SILENT_ON_REMOVED 01 +extern int run_diff_files(struct rev_info *revs, unsigned int option); extern int setup_diff_no_index(struct rev_info *revs, int argc, const char ** argv, int nongit, const char *prefix); extern int run_diff_files_cmd(struct rev_info *revs, int argc, const char **argv); diff --git a/entry.c b/entry.c index fc3a506..ef88f62 100644 --- a/entry.c +++ b/entry.c @@ -200,7 +200,7 @@ int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *t strcpy(path + len, ce->name); if (!lstat(path, &st)) { - unsigned changed = ce_match_stat(ce, &st, 1); + unsigned changed = ce_match_stat(ce, &st, CE_MATCH_IGNORE_VALID); if (!changed) return 0; if (!state->force) { diff --git a/read-cache.c b/read-cache.c index 928e8fa..9e4d4a9 100644 --- a/read-cache.c +++ b/read-cache.c @@ -194,11 +194,12 @@ static int ce_match_stat_basic(struct cache_entry *ce, struct stat *st) } int ie_match_stat(struct index_state *istate, - struct cache_entry *ce, struct stat *st, int options) + struct cache_entry *ce, struct stat *st, + unsigned int options) { unsigned int changed; - int ignore_valid = options & 01; - int assume_racy_is_modified = options & 02; + int ignore_valid = options & CE_MATCH_IGNORE_VALID; + int assume_racy_is_modified = options & CE_MATCH_RACY_IS_DIRTY; /* * If it's marked as always valid in the index, it's @@ -238,10 +239,11 @@ int ie_match_stat(struct index_state *istate, } int ie_modified(struct index_state *istate, - struct cache_entry *ce, struct stat *st, int really) + struct cache_entry *ce, struct stat *st, unsigned int options) { int changed, changed_fs; - changed = ie_match_stat(istate, ce, st, really); + + changed = ie_match_stat(istate, ce, st, options); if (!changed) return 0; /* @@ -420,7 +422,7 @@ int add_file_to_index(struct index_state *istate, const char *path, int verbose) pos = index_name_pos(istate, ce->name, namelen); if (0 <= pos && !ce_stage(istate->cache[pos]) && - !ie_modified(istate, istate->cache[pos], &st, 1)) { + !ie_modified(istate, istate->cache[pos], &st, CE_MATCH_IGNORE_VALID)) { /* Nothing changed, really */ free(ce); return 0; @@ -782,11 +784,13 @@ int add_index_entry(struct index_state *istate, struct cache_entry *ce, int opti * to link up the stat cache details with the proper files. */ static struct cache_entry *refresh_cache_ent(struct index_state *istate, - struct cache_entry *ce, int really, int *err) + struct cache_entry *ce, + unsigned int options, int *err) { struct stat st; struct cache_entry *updated; int changed, size; + int ignore_valid = options & CE_MATCH_IGNORE_VALID; if (lstat(ce->name, &st) < 0) { if (err) @@ -794,16 +798,23 @@ static struct cache_entry *refresh_cache_ent(struct index_state *istate, return NULL; } - changed = ie_match_stat(istate, ce, &st, really); + changed = ie_match_stat(istate, ce, &st, options); if (!changed) { - if (really && assume_unchanged && + /* + * The path is unchanged. If we were told to ignore + * valid bit, then we did the actual stat check and + * found that the entry is unmodified. If the entry + * is not marked VALID, this is the place to mark it + * valid again, under "assume unchanged" mode. + */ + if (ignore_valid && assume_unchanged && !(ce->ce_flags & htons(CE_VALID))) ; /* mark this one VALID again */ else return ce; } - if (ie_modified(istate, ce, &st, really)) { + if (ie_modified(istate, ce, &st, options)) { if (err) *err = EINVAL; return NULL; @@ -814,13 +825,14 @@ static struct cache_entry *refresh_cache_ent(struct index_state *istate, memcpy(updated, ce, size); fill_stat_cache_info(updated, &st); - /* In this case, if really is not set, we should leave - * CE_VALID bit alone. Otherwise, paths marked with - * --no-assume-unchanged (i.e. things to be edited) will - * reacquire CE_VALID bit automatically, which is not - * really what we want. + /* + * If ignore_valid is not set, we should leave CE_VALID bit + * alone. Otherwise, paths marked with --no-assume-unchanged + * (i.e. things to be edited) will reacquire CE_VALID bit + * automatically, which is not really what we want. */ - if (!really && assume_unchanged && !(ce->ce_flags & htons(CE_VALID))) + if (!ignore_valid && assume_unchanged && + !(ce->ce_flags & htons(CE_VALID))) updated->ce_flags &= ~htons(CE_VALID); return updated; @@ -834,6 +846,7 @@ int refresh_index(struct index_state *istate, unsigned int flags, const char **p int allow_unmerged = (flags & REFRESH_UNMERGED) != 0; int quiet = (flags & REFRESH_QUIET) != 0; int not_new = (flags & REFRESH_IGNORE_MISSING) != 0; + unsigned int options = really ? CE_MATCH_IGNORE_VALID : 0; for (i = 0; i < istate->cache_nr; i++) { struct cache_entry *ce, *new; @@ -855,7 +868,7 @@ int refresh_index(struct index_state *istate, unsigned int flags, const char **p if (pathspec && !match_pathspec(pathspec, ce->name, strlen(ce->name), 0, seen)) continue; - new = refresh_cache_ent(istate, ce, really, &cache_errno); + new = refresh_cache_ent(istate, ce, options, &cache_errno); if (new == ce) continue; if (!new) { diff --git a/unpack-trees.c b/unpack-trees.c index ccfeb6e..9411c67 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -406,7 +406,7 @@ static void verify_uptodate(struct cache_entry *ce, return; if (!lstat(ce->name, &st)) { - unsigned changed = ce_match_stat(ce, &st, 1); + unsigned changed = ce_match_stat(ce, &st, CE_MATCH_IGNORE_VALID); if (!changed) return; /* @@ -927,7 +927,7 @@ int oneway_merge(struct cache_entry **src, if (o->reset) { struct stat st; if (lstat(old->name, &st) || - ce_match_stat(old, &st, 1)) + ce_match_stat(old, &st, CE_MATCH_IGNORE_VALID)) old->ce_flags |= htons(CE_UPDATE); } return keep_entry(old, o); -- cgit v0.10.2-6-g49f6 From fb63d7f889cf5df417b731b07952689df7f745c8 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 9 Nov 2007 18:22:52 -0800 Subject: git-add: make the entry stat-clean after re-adding the same contents Earlier in commit 0781b8a9b2fe760fc4ed519a3a26e4b9bd6ccffe (add_file_to_index: skip rehashing if the cached stat already matches), add_file_to_index() were taught not to re-add the path if it already matches the index. The change meant well, but was not executed quite right. It used ie_modified() to see if the file on the work tree is really different from the index, and skipped adding the contents if the function says "not modified". This was wrong. There are three possible comparison results between the index and the file in the work tree: - with lstat(2) we _know_ they are different. E.g. if the length or the owner in the cached stat information is different from the length we just obtained from lstat(2), we can tell the file is modified without looking at the actual contents. - with lstat(2) we _know_ they are the same. The same length, the same owner, the same everything (but this has a twist, as described below). - we cannot tell from lstat(2) information alone and need to go to the filesystem to actually compare. The last case arises from what we call 'racy git' situation, that can be caused with this sequence: $ echo hello >file $ git add file $ echo aeiou >file ;# the same length If the second "echo" is done within the same filesystem timestamp granularity as the first "echo", then the timestamp recorded by "git add" and the timestamp we get from lstat(2) will be the same, and we can mistakenly say the file is not modified. The path is called 'racily clean'. We need to reliably detect racily clean paths are in fact modified. To solve this problem, when we write out the index, we mark the index entry that has the same timestamp as the index file itself (that is the time from the point of view of the filesystem) to tell any later code that does the lstat(2) comparison not to trust the cached stat info, and ie_modified() then actually goes to the filesystem to compare the contents for such a path. That's all good, but it should not be used for this "git add" optimization, as the goal of "git add" is to actually update the path in the index and make it stat-clean. With the false optimization, we did _not_ cause any data loss (after all, what we failed to do was only to update the cached stat information), but it made the following sequence leave the file stat dirty: $ echo hello >file $ git add file $ echo hello >file ;# the same contents $ git add file The solution is not to use ie_modified() which goes to the filesystem to see if it is really clean, but instead use ie_match_stat() with "assume racily clean paths are dirty" option, to force re-adding of such a path. There was another problem with "git add -u". The codepath shares the same issue when adding the paths that are found to be modified, but in addition, it asked "git diff-files" machinery run_diff_files() function (which is "git diff-files") to list the paths that are modified. But "git diff-files" machinery uses the same ie_modified() call so that it does not report racily clean _and_ actually clean paths as modified, which is not what we want. The patch allows the callers of run_diff_files() to pass the same "assume racily clean paths are dirty" option, and makes "git-add -u" codepath to use that option, to discover and re-add racily clean _and_ actually clean paths. We could further optimize on top of this patch to differentiate the case where the path really needs re-adding (i.e. the content of the racily clean entry was indeed different) and the case where only the cached stat information needs to be refreshed (i.e. the racily clean entry was actually clean), but I do not think it is worth it. This patch applies to maint and all the way up. Signed-off-by: Junio C Hamano diff --git a/builtin-add.c b/builtin-add.c index 373f87f..e072320 100644 --- a/builtin-add.c +++ b/builtin-add.c @@ -123,7 +123,7 @@ static void update(int verbose, const char *prefix, const char **files) rev.diffopt.format_callback_data = &verbose; if (read_cache() < 0) die("index file corrupt"); - run_diff_files(&rev, 0); + run_diff_files(&rev, DIFF_RACY_IS_MODIFIED); } static void refresh(int verbose, const char **pathspec) diff --git a/diff-lib.c b/diff-lib.c index 9f8afbe..ec1b5e3 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -338,6 +338,8 @@ int run_diff_files(struct rev_info *revs, unsigned int option) int entries, i; int diff_unmerged_stage = revs->max_count; int silent_on_removed = option & DIFF_SILENT_ON_REMOVED; + unsigned ce_option = ((option & DIFF_RACY_IS_MODIFIED) + ? CE_MATCH_RACY_IS_DIRTY : 0); if (diff_unmerged_stage < 0) diff_unmerged_stage = 2; @@ -443,7 +445,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option) ce->sha1, ce->name, NULL); continue; } - changed = ce_match_stat(ce, &st, 0); + changed = ce_match_stat(ce, &st, ce_option); if (!changed && !revs->diffopt.find_copies_harder) continue; oldmode = ntohl(ce->ce_mode); diff --git a/diff.h b/diff.h index de533da..efaa8f7 100644 --- a/diff.h +++ b/diff.h @@ -226,6 +226,8 @@ extern const char *diff_unique_abbrev(const unsigned char *, int); /* do not report anything on removed paths */ #define DIFF_SILENT_ON_REMOVED 01 +/* report racily-clean paths as modified */ +#define DIFF_RACY_IS_MODIFIED 02 extern int run_diff_files(struct rev_info *revs, unsigned int option); extern int setup_diff_no_index(struct rev_info *revs, int argc, const char ** argv, int nongit, const char *prefix); diff --git a/read-cache.c b/read-cache.c index 9e4d4a9..c3dbf89 100644 --- a/read-cache.c +++ b/read-cache.c @@ -388,6 +388,7 @@ int add_file_to_index(struct index_state *istate, const char *path, int verbose) int size, namelen, pos; struct stat st; struct cache_entry *ce; + unsigned ce_option = CE_MATCH_IGNORE_VALID|CE_MATCH_RACY_IS_DIRTY; if (lstat(path, &st)) die("%s: unable to stat (%s)", path, strerror(errno)); @@ -422,7 +423,7 @@ int add_file_to_index(struct index_state *istate, const char *path, int verbose) pos = index_name_pos(istate, ce->name, namelen); if (0 <= pos && !ce_stage(istate->cache[pos]) && - !ie_modified(istate, istate->cache[pos], &st, CE_MATCH_IGNORE_VALID)) { + !ie_match_stat(istate, istate->cache[pos], &st, ce_option)) { /* Nothing changed, really */ free(ce); return 0; -- cgit v0.10.2-6-g49f6 From 25487bde2ab756a423489fc942b37c1550555b93 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 11 Nov 2007 18:44:16 -0800 Subject: t2200: test more cases of "add -u" Signed-off-by: Junio C Hamano diff --git a/t/t2200-add-update.sh b/t/t2200-add-update.sh index eb1ced3..24f892f 100755 --- a/t/t2200-add-update.sh +++ b/t/t2200-add-update.sh @@ -1,6 +1,6 @@ #!/bin/sh -test_description='git add -u with path limiting +test_description='git add -u This test creates a working tree state with three files: @@ -9,7 +9,10 @@ This test creates a working tree state with three files: dir/other (untracked) and issues a git add -u with path limiting on "dir" to add -only the updates to dir/sub.' +only the updates to dir/sub. + +Also tested are "git add -u" without limiting, and "git add -u" +without contents changes.' . ./test-lib.sh @@ -85,4 +88,27 @@ test_expect_success 'replace a file with a symlink' ' ' +test_expect_success 'add everything changed' ' + + git add -u && + test -z "$(git diff-files)" + +' + +test_expect_success 'touch and then add -u' ' + + touch check && + git add -u && + test -z "$(git diff-files)" + +' + +test_expect_success 'touch and then add explicitly' ' + + touch check && + git add check && + test -z "$(git diff-files)" + +' + test_done -- cgit v0.10.2-6-g49f6