summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2016-10-27 21:58:47 (GMT)
committerJunio C Hamano <gitster@pobox.com>2016-10-27 21:58:47 (GMT)
commitd7ae013a3173c621a3556be6834d459ece60e130 (patch)
tree3fc7d1145c484923f1a006a94029d67ffc56a685
parent580d820ece78100c5e2b8b5874d7aed5d76715f2 (diff)
parent8e3f52d77854a19cb3fd2adee40be84c8a8bdacc (diff)
downloadgit-d7ae013a3173c621a3556be6834d459ece60e130.zip
git-d7ae013a3173c621a3556be6834d459ece60e130.tar.gz
git-d7ae013a3173c621a3556be6834d459ece60e130.tar.bz2
Merge branch 'jk/abbrev-auto'
Updates the way approximate count of total objects is computed while attempting to come up with a unique abbreviated object name, which in turn needs to estimate how many hexdigits are necessary to ensure uniqueness. * jk/abbrev-auto: find_unique_abbrev: move logic out of get_short_sha1()
-rw-r--r--cache.h7
-rw-r--r--sha1_file.c27
-rw-r--r--sha1_name.c60
3 files changed, 68 insertions, 26 deletions
diff --git a/cache.h b/cache.h
index 446d4cb..b7f34b4 100644
--- a/cache.h
+++ b/cache.h
@@ -1211,7 +1211,6 @@ struct object_context {
#define GET_SHA1_TREEISH 020
#define GET_SHA1_BLOB 040
#define GET_SHA1_FOLLOW_SYMLINKS 0100
-#define GET_SHA1_AUTOMATIC 0200
#define GET_SHA1_ONLY_TO_DIE 04000
#define GET_SHA1_DISAMBIGUATORS \
@@ -1494,6 +1493,12 @@ extern void prepare_packed_git(void);
extern void reprepare_packed_git(void);
extern void install_packed_git(struct packed_git *pack);
+/*
+ * Give a rough count of objects in the repository. This sacrifices accuracy
+ * for speed.
+ */
+unsigned long approximate_object_count(void);
+
extern struct packed_git *find_sha1_pack(const unsigned char *sha1,
struct packed_git *packs);
diff --git a/sha1_file.c b/sha1_file.c
index 2eda929..1e41954 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1410,6 +1410,32 @@ static void prepare_packed_git_one(char *objdir, int local)
strbuf_release(&path);
}
+static int approximate_object_count_valid;
+
+/*
+ * Give a fast, rough count of the number of objects in the repository. This
+ * ignores loose objects completely. If you have a lot of them, then either
+ * you should repack because your performance will be awful, or they are
+ * all unreachable objects about to be pruned, in which case they're not really
+ * interesting as a measure of repo size in the first place.
+ */
+unsigned long approximate_object_count(void)
+{
+ static unsigned long count;
+ if (!approximate_object_count_valid) {
+ struct packed_git *p;
+
+ prepare_packed_git();
+ count = 0;
+ for (p = packed_git; p; p = p->next) {
+ if (open_pack_index(p))
+ continue;
+ count += p->num_objects;
+ }
+ }
+ return count;
+}
+
static void *get_next_packed_git(const void *p)
{
return ((const struct packed_git *)p)->next;
@@ -1481,6 +1507,7 @@ void prepare_packed_git(void)
void reprepare_packed_git(void)
{
+ approximate_object_count_valid = 0;
prepare_packed_git_run_once = 0;
prepare_packed_git();
}
diff --git a/sha1_name.c b/sha1_name.c
index 84662a6..c71fc17 100644
--- a/sha1_name.c
+++ b/sha1_name.c
@@ -15,7 +15,6 @@ typedef int (*disambiguate_hint_fn)(const unsigned char *, void *);
struct disambiguate_state {
int len; /* length of prefix in hex chars */
- unsigned int nrobjects;
char hex_pfx[GIT_SHA1_HEXSZ + 1];
unsigned char bin_pfx[GIT_SHA1_RAWSZ];
@@ -112,14 +111,6 @@ static void find_short_object_filename(struct disambiguate_state *ds)
if (strlen(de->d_name) != 38)
continue;
-
- /*
- * We only look at the one subdirectory, and we assume
- * each subdirectory is roughly similar, so each
- * object we find probably has 255 other objects in
- * the other fan-out directories.
- */
- ds->nrobjects += 256;
if (memcmp(de->d_name, ds->hex_pfx + 2, ds->len - 2))
continue;
memcpy(hex + 2, de->d_name, 38);
@@ -153,7 +144,6 @@ static void unique_in_pack(struct packed_git *p,
open_pack_index(p);
num = p->num_objects;
- ds->nrobjects += num;
last = num;
while (first < last) {
uint32_t mid = (first + last) / 2;
@@ -383,9 +373,6 @@ static int show_ambiguous_object(const unsigned char *sha1, void *data)
return 0;
}
-/* start from our historical default before the automatic abbreviation */
-static int default_automatic_abbrev = FALLBACK_DEFAULT_ABBREV;
-
static int get_short_sha1(const char *name, int len, unsigned char *sha1,
unsigned flags)
{
@@ -432,14 +419,6 @@ static int get_short_sha1(const char *name, int len, unsigned char *sha1,
for_each_abbrev(ds.hex_pfx, show_ambiguous_object, &ds);
}
- if (len < 16 && !status && (flags & GET_SHA1_AUTOMATIC)) {
- unsigned int expect_collision = 1 << (len * 2);
- if (ds.nrobjects > expect_collision) {
- default_automatic_abbrev = len+1;
- return SHORT_NAME_AMBIGUOUS;
- }
- }
-
return status;
}
@@ -469,22 +448,53 @@ int for_each_abbrev(const char *prefix, each_abbrev_fn fn, void *cb_data)
return ret;
}
+/*
+ * Return the slot of the most-significant bit set in "val". There are various
+ * ways to do this quickly with fls() or __builtin_clzl(), but speed is
+ * probably not a big deal here.
+ */
+static unsigned msb(unsigned long val)
+{
+ unsigned r = 0;
+ while (val >>= 1)
+ r++;
+ return r;
+}
+
int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len)
{
int status, exists;
- int flags = GET_SHA1_QUIETLY;
if (len < 0) {
- flags |= GET_SHA1_AUTOMATIC;
- len = default_automatic_abbrev;
+ unsigned long count = approximate_object_count();
+ /*
+ * Add one because the MSB only tells us the highest bit set,
+ * not including the value of all the _other_ bits (so "15"
+ * is only one off of 2^4, but the MSB is the 3rd bit.
+ */
+ len = msb(count) + 1;
+ /*
+ * We now know we have on the order of 2^len objects, which
+ * expects a collision at 2^(len/2). But we also care about hex
+ * chars, not bits, and there are 4 bits per hex. So all
+ * together we need to divide by 2; but we also want to round
+ * odd numbers up, hence adding one before dividing.
+ */
+ len = (len + 1) / 2;
+ /*
+ * For very small repos, we stick with our regular fallback.
+ */
+ if (len < FALLBACK_DEFAULT_ABBREV)
+ len = FALLBACK_DEFAULT_ABBREV;
}
+
sha1_to_hex_r(hex, sha1);
if (len == 40 || !len)
return 40;
exists = has_sha1_file(sha1);
while (len < 40) {
unsigned char sha1_ret[20];
- status = get_short_sha1(hex, len, sha1_ret, flags);
+ status = get_short_sha1(hex, len, sha1_ret, GET_SHA1_QUIETLY);
if (exists
? !status
: status == SHORT_NAME_NOT_FOUND) {