summaryrefslogtreecommitdiff
path: root/read-cache.c
diff options
context:
space:
mode:
authorKarsten Blees <karsten.blees@gmail.com>2013-02-27 23:57:48 (GMT)
committerJunio C Hamano <gitster@pobox.com>2013-02-28 07:29:04 (GMT)
commit2092678cd5265e40dfb2b8e3adc8f2075faeece4 (patch)
tree3e697a301abb8500a1f88ea4f4ee278dfd13b000 /read-cache.c
parent15999998fbda60552742275570947431b57108ae (diff)
downloadgit-2092678cd5265e40dfb2b8e3adc8f2075faeece4.zip
git-2092678cd5265e40dfb2b8e3adc8f2075faeece4.tar.gz
git-2092678cd5265e40dfb2b8e3adc8f2075faeece4.tar.bz2
name-hash.c: fix endless loop with core.ignorecase=true
With core.ignorecase=true, name-hash.c builds a case insensitive index of all tracked directories. Currently, the existing cache entry structures are added multiple times to the same hashtable (with different name lengths and hash codes). However, there's only one dir_next pointer, which gets completely messed up in case of hash collisions. In the worst case, this causes an endless loop if ce == ce->dir_next (see t7062). Use a separate hashtable and separate structures for the directory index so that each directory entry has its own next pointer. Use reference counting to track which directory entry contains files. There are only slight changes to the name-hash.c API: - new free_name_hash() used by read_cache.c::discard_index() - remove_name_hash() takes an additional index_state parameter - index_name_exists() for a directory (trailing '/') may return a cache entry that has been removed (CE_UNHASHED). This is not a problem as the return value is only used to check if the directory exists (dir.c) or to normalize casing of directory names (read-cache.c). Getting rid of cache_entry.dir_next reduces memory consumption, especially with core.ignorecase=false (which doesn't use that member at all). With core.ignorecase=true, building the directory index is slightly faster as we add / check the parent directory first (instead of going through all directory levels for each file in the index). E.g. with WebKit (~200k files, ~7k dirs), time spent in lazy_init_name_hash is reduced from 176ms to 130ms. Signed-off-by: Karsten Blees <blees@dcon.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'read-cache.c')
-rw-r--r--read-cache.c9
1 files changed, 4 insertions, 5 deletions
diff --git a/read-cache.c b/read-cache.c
index fda78bc..ffb425c 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -46,7 +46,7 @@ static void replace_index_entry(struct index_state *istate, int nr, struct cache
{
struct cache_entry *old = istate->cache[nr];
- remove_name_hash(old);
+ remove_name_hash(istate, old);
set_index_entry(istate, nr, ce);
istate->cache_changed = 1;
}
@@ -456,7 +456,7 @@ int remove_index_entry_at(struct index_state *istate, int pos)
struct cache_entry *ce = istate->cache[pos];
record_resolve_undo(istate, ce);
- remove_name_hash(ce);
+ remove_name_hash(istate, ce);
istate->cache_changed = 1;
istate->cache_nr--;
if (pos >= istate->cache_nr)
@@ -479,7 +479,7 @@ void remove_marked_cache_entries(struct index_state *istate)
for (i = j = 0; i < istate->cache_nr; i++) {
if (ce_array[i]->ce_flags & CE_REMOVE)
- remove_name_hash(ce_array[i]);
+ remove_name_hash(istate, ce_array[i]);
else
ce_array[j++] = ce_array[i];
}
@@ -1511,8 +1511,7 @@ int discard_index(struct index_state *istate)
istate->cache_changed = 0;
istate->timestamp.sec = 0;
istate->timestamp.nsec = 0;
- istate->name_hash_initialized = 0;
- free_hash(&istate->name_hash);
+ free_name_hash(istate);
cache_tree_free(&(istate->cache_tree));
istate->initialized = 0;