summaryrefslogtreecommitdiff
path: root/hash.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2007-10-25 18:23:26 (GMT)
committerJunio C Hamano <gitster@pobox.com>2007-10-27 06:18:06 (GMT)
commit9027f53cb5051bf83a0254e7f8aeb5d1a206de0b (patch)
tree3ee3d48a47f89d7c41c5a9e4430f0db6507f4d9e /hash.c
parent644797119d7a3b7a043a51a9cccd8758f8451f91 (diff)
downloadgit-9027f53cb5051bf83a0254e7f8aeb5d1a206de0b.zip
git-9027f53cb5051bf83a0254e7f8aeb5d1a206de0b.tar.gz
git-9027f53cb5051bf83a0254e7f8aeb5d1a206de0b.tar.bz2
Do linear-time/space rename logic for exact renames
This implements a smarter rename detector for exact renames, which rather than doing a pairwise comparison (time O(m*n)) will just hash the files into a hash-table (size O(n+m)), and only do pairwise comparisons to renames that have the same hash (time O(n+m) except for unrealistic hash collissions, which we just cull aggressively). Admittedly the exact rename case is not nearly as interesting as the generic case, but it's an important case none-the-less. A similar general approach should work for the generic case too, but even then you do need to handle the exact renames/copies separately (to avoid the inevitable added cost factor that comes from the _size_ of the file), so this is worth doing. In the expectation that we will indeed do the same hashing trick for the general rename case, this code uses a generic hash-table implementation that can be used for other things too. In fact, we might be able to consolidate some of our existing hash tables with the new generic code in hash.[ch]. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'hash.c')
-rw-r--r--hash.c110
1 files changed, 110 insertions, 0 deletions
diff --git a/hash.c b/hash.c
new file mode 100644
index 0000000..7b492d4
--- /dev/null
+++ b/hash.c
@@ -0,0 +1,110 @@
+/*
+ * Some generic hashing helpers.
+ */
+#include "cache.h"
+#include "hash.h"
+
+/*
+ * Look up a hash entry in the hash table. Return the pointer to
+ * the existing entry, or the empty slot if none existed. The caller
+ * can then look at the (*ptr) to see whether it existed or not.
+ */
+static struct hash_table_entry *lookup_hash_entry(unsigned int hash, struct hash_table *table)
+{
+ unsigned int size = table->size, nr = hash % size;
+ struct hash_table_entry *array = table->array;
+
+ while (array[nr].ptr) {
+ if (array[nr].hash == hash)
+ break;
+ nr++;
+ if (nr >= size)
+ nr = 0;
+ }
+ return array + nr;
+}
+
+
+/*
+ * Insert a new hash entry pointer into the table.
+ *
+ * If that hash entry already existed, return the pointer to
+ * the existing entry (and the caller can create a list of the
+ * pointers or do anything else). If it didn't exist, return
+ * NULL (and the caller knows the pointer has been inserted).
+ */
+static void **insert_hash_entry(unsigned int hash, void *ptr, struct hash_table *table)
+{
+ struct hash_table_entry *entry = lookup_hash_entry(hash, table);
+
+ if (!entry->ptr) {
+ entry->ptr = ptr;
+ entry->hash = hash;
+ table->nr++;
+ return NULL;
+ }
+ return &entry->ptr;
+}
+
+static void grow_hash_table(struct hash_table *table)
+{
+ unsigned int i;
+ unsigned int old_size = table->size, new_size;
+ struct hash_table_entry *old_array = table->array, *new_array;
+
+ new_size = alloc_nr(old_size);
+ new_array = xcalloc(sizeof(struct hash_table_entry), new_size);
+ table->size = new_size;
+ table->array = new_array;
+ table->nr = 0;
+ for (i = 0; i < old_size; i++) {
+ unsigned int hash = old_array[i].hash;
+ void *ptr = old_array[i].ptr;
+ if (ptr)
+ insert_hash_entry(hash, ptr, table);
+ }
+ free(old_array);
+}
+
+void *lookup_hash(unsigned int hash, struct hash_table *table)
+{
+ if (!table->array)
+ return NULL;
+ return &lookup_hash_entry(hash, table)->ptr;
+}
+
+void **insert_hash(unsigned int hash, void *ptr, struct hash_table *table)
+{
+ unsigned int nr = table->nr;
+ if (nr >= table->size/2)
+ grow_hash_table(table);
+ return insert_hash_entry(hash, ptr, table);
+}
+
+int for_each_hash(struct hash_table *table, int (*fn)(void *))
+{
+ int sum = 0;
+ unsigned int i;
+ unsigned int size = table->size;
+ struct hash_table_entry *array = table->array;
+
+ for (i = 0; i < size; i++) {
+ void *ptr = array->ptr;
+ array++;
+ if (ptr) {
+ int val = fn(ptr);
+ if (val < 0)
+ return val;
+ sum += val;
+ }
+ }
+ return sum;
+}
+
+void free_hash(struct hash_table *table)
+{
+ free(table->array);
+ table->array = NULL;
+ table->size = 0;
+ table->nr = 0;
+}