summaryrefslogtreecommitdiff
path: root/pack-objects.c
diff options
context:
space:
mode:
authorVicent Marti <tanoku@gmail.com>2013-10-24 18:01:06 (GMT)
committerJunio C Hamano <gitster@pobox.com>2013-10-24 22:44:48 (GMT)
commit2834bc27c16e4ae103768dd6dda530f0e46116af (patch)
tree5c26ff9d0bdf39c34dc7cb1c56d96d730167fb48 /pack-objects.c
parent92e5c77c3791567e299cc858a7ddfed410e2cec5 (diff)
downloadgit-2834bc27c16e4ae103768dd6dda530f0e46116af.zip
git-2834bc27c16e4ae103768dd6dda530f0e46116af.tar.gz
git-2834bc27c16e4ae103768dd6dda530f0e46116af.tar.bz2
pack-objects: refactor the packing list
The hash table that stores the packing list for a given `pack-objects` run was tightly coupled to the pack-objects code. In this commit, we refactor the hash table and the underlying storage array into a `packing_data` struct. The functionality for accessing and adding entries to the packing list is hence accessible from other parts of Git besides the `pack-objects` builtin. This refactoring is a requirement for further patches in this series that will require accessing the commit packing list from outside of `pack-objects`. The hash table implementation has been minimally altered: we now use table sizes which are always a power of two, to ensure a uniform index distribution in the array. Signed-off-by: Vicent Marti <tanoku@gmail.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'pack-objects.c')
-rw-r--r--pack-objects.c111
1 files changed, 111 insertions, 0 deletions
diff --git a/pack-objects.c b/pack-objects.c
new file mode 100644
index 0000000..d01d851
--- /dev/null
+++ b/pack-objects.c
@@ -0,0 +1,111 @@
+#include "cache.h"
+#include "object.h"
+#include "pack.h"
+#include "pack-objects.h"
+
+static uint32_t locate_object_entry_hash(struct packing_data *pdata,
+ const unsigned char *sha1,
+ int *found)
+{
+ uint32_t i, hash, mask = (pdata->index_size - 1);
+
+ memcpy(&hash, sha1, sizeof(uint32_t));
+ i = hash & mask;
+
+ while (pdata->index[i] > 0) {
+ uint32_t pos = pdata->index[i] - 1;
+
+ if (!hashcmp(sha1, pdata->objects[pos].idx.sha1)) {
+ *found = 1;
+ return i;
+ }
+
+ i = (i + 1) & mask;
+ }
+
+ *found = 0;
+ return i;
+}
+
+static inline uint32_t closest_pow2(uint32_t v)
+{
+ v = v - 1;
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ return v + 1;
+}
+
+static void rehash_objects(struct packing_data *pdata)
+{
+ uint32_t i;
+ struct object_entry *entry;
+
+ pdata->index_size = closest_pow2(pdata->nr_objects * 3);
+ if (pdata->index_size < 1024)
+ pdata->index_size = 1024;
+
+ pdata->index = xrealloc(pdata->index, sizeof(uint32_t) * pdata->index_size);
+ memset(pdata->index, 0, sizeof(int) * pdata->index_size);
+
+ entry = pdata->objects;
+
+ for (i = 0; i < pdata->nr_objects; i++) {
+ int found;
+ uint32_t ix = locate_object_entry_hash(pdata, entry->idx.sha1, &found);
+
+ if (found)
+ die("BUG: Duplicate object in hash");
+
+ pdata->index[ix] = i + 1;
+ entry++;
+ }
+}
+
+struct object_entry *packlist_find(struct packing_data *pdata,
+ const unsigned char *sha1,
+ uint32_t *index_pos)
+{
+ uint32_t i;
+ int found;
+
+ if (!pdata->index_size)
+ return NULL;
+
+ i = locate_object_entry_hash(pdata, sha1, &found);
+
+ if (index_pos)
+ *index_pos = i;
+
+ if (!found)
+ return NULL;
+
+ return &pdata->objects[pdata->index[i] - 1];
+}
+
+struct object_entry *packlist_alloc(struct packing_data *pdata,
+ const unsigned char *sha1,
+ uint32_t index_pos)
+{
+ struct object_entry *new_entry;
+
+ if (pdata->nr_objects >= pdata->nr_alloc) {
+ pdata->nr_alloc = (pdata->nr_alloc + 1024) * 3 / 2;
+ pdata->objects = xrealloc(pdata->objects,
+ pdata->nr_alloc * sizeof(*new_entry));
+ }
+
+ new_entry = pdata->objects + pdata->nr_objects++;
+
+ memset(new_entry, 0, sizeof(*new_entry));
+ hashcpy(new_entry->idx.sha1, sha1);
+
+ if (pdata->index_size * 3 <= pdata->nr_objects * 4)
+ rehash_objects(pdata);
+ else
+ pdata->index[index_pos] = pdata->nr_objects;
+
+ return new_entry;
+}