From d079837eeeadc37d266113a1fd2deb0a01aaee91 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sat, 26 May 2007 01:24:19 -0400 Subject: Lazily open pack index files on demand In some repository configurations the user may have many packfiles, but all of the recent commits/trees/tags/blobs are likely to be in the most recent packfile (the one with the newest mtime). It is therefore common to be able to complete an entire operation by accessing only one packfile, even if there are 25 packfiles available to the repository. Rather than opening and mmaping the corresponding .idx file for every pack found, we now only open and map the .idx when we suspect there might be an object of interest in there. Of course we cannot known in advance which packfile contains an object, so we still need to scan the entire packed_git list to locate anything. But odds are users want to access objects in the most recently created packfiles first, and that may be all they ever need for the current operation. Junio observed in b867092f that placing recent packfiles before older ones can slightly improve access times for recent objects, without degrading it for historical object access. This change improves upon Junio's observations by trying even harder to avoid the .idx files that we won't need. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano diff --git a/builtin-count-objects.c b/builtin-count-objects.c index ff90ebd..ac65e03 100644 --- a/builtin-count-objects.c +++ b/builtin-count-objects.c @@ -111,6 +111,8 @@ int cmd_count_objects(int ac, const char **av, const char *prefix) for (p = packed_git; p; p = p->next) { if (!p->pack_local) continue; + if (!p->index_data && open_pack_index(p)) + continue; packed += p->num_objects; num_pack++; } diff --git a/cache.h b/cache.h index cd875bc..0f4a05b 100644 --- a/cache.h +++ b/cache.h @@ -485,10 +485,11 @@ extern struct packed_git *find_sha1_pack(const unsigned char *sha1, struct packed_git *packs); extern void pack_report(void); +extern int open_pack_index(struct packed_git *); extern unsigned char* use_pack(struct packed_git *, struct pack_window **, off_t, unsigned int *); extern void unuse_pack(struct pack_window **); extern struct packed_git *add_packed_git(const char *, int, int); -extern const unsigned char *nth_packed_object_sha1(const struct packed_git *, uint32_t); +extern const unsigned char *nth_packed_object_sha1(struct packed_git *, uint32_t); extern off_t find_pack_entry_one(const unsigned char *, struct packed_git *); extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *); extern unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep); diff --git a/pack-check.c b/pack-check.c index c168642..3623c71 100644 --- a/pack-check.c +++ b/pack-check.c @@ -128,12 +128,17 @@ static void show_pack_info(struct packed_git *p) int verify_pack(struct packed_git *p, int verbose) { - off_t index_size = p->index_size; - const unsigned char *index_base = p->index_data; + off_t index_size; + const unsigned char *index_base; SHA_CTX ctx; unsigned char sha1[20]; int ret; + if (open_pack_index(p)) + return error("packfile %s index not opened", p->pack_name); + index_size = p->index_size; + index_base = p->index_data; + ret = 0; /* Verify SHA1 sum of the index file */ SHA1_Init(&ctx); diff --git a/pack-redundant.c b/pack-redundant.c index 87077e1..0617320 100644 --- a/pack-redundant.c +++ b/pack-redundant.c @@ -550,6 +550,9 @@ static struct pack_list * add_pack(struct packed_git *p) l.pack = p; llist_init(&l.all_objects); + if (!p->index_data && open_pack_index(p)) + return NULL; + base = p->index_data; base += 256 * 4 + ((p->index_version < 2) ? 4 : 8); step = (p->index_version < 2) ? 24 : 20; diff --git a/sha1_file.c b/sha1_file.c index 12d2ef2..6a5ba63 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -530,6 +530,21 @@ static int check_packed_git_idx(const char *path, struct packed_git *p) return 0; } +int open_pack_index (struct packed_git *p) +{ + char *idx_name; + int ret; + + if (p->index_data) + return 0; + + idx_name = xstrdup(p->pack_name); + strcpy(idx_name + strlen(idx_name) - strlen(".pack"), ".idx"); + ret = check_packed_git_idx(idx_name, p); + free(idx_name); + return ret; +} + static void scan_windows(struct packed_git *p, struct packed_git **lru_p, struct pack_window **lru_w, @@ -605,6 +620,9 @@ static int open_packed_git_1(struct packed_git *p) unsigned char *idx_sha1; long fd_flag; + if (!p->index_data && open_pack_index(p)) + return error("packfile %s index unavailable", p->pack_name); + p->pack_fd = open(p->pack_name, O_RDONLY); if (p->pack_fd < 0 || fstat(p->pack_fd, &st)) return -1; @@ -757,8 +775,7 @@ struct packed_git *add_packed_git(const char *path, int path_len, int local) return NULL; memcpy(p->pack_name, path, path_len); strcpy(p->pack_name + path_len, ".pack"); - if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode) || - check_packed_git_idx(path, p)) { + if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) { free(p); return NULL; } @@ -766,6 +783,10 @@ struct packed_git *add_packed_git(const char *path, int path_len, int local) /* ok, it looks sane as far as we can check without * actually mapping the pack file. */ + p->index_version = 0; + p->index_data = NULL; + p->index_size = 0; + p->num_objects = 0; p->pack_size = st.st_size; p->next = NULL; p->windows = NULL; @@ -1572,10 +1593,15 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset, return data; } -const unsigned char *nth_packed_object_sha1(const struct packed_git *p, +const unsigned char *nth_packed_object_sha1(struct packed_git *p, uint32_t n) { const unsigned char *index = p->index_data; + if (!index) { + if (open_pack_index(p)) + return NULL; + index = p->index_data; + } if (n >= p->num_objects) return NULL; index += 4 * 256; @@ -1612,6 +1638,12 @@ off_t find_pack_entry_one(const unsigned char *sha1, const unsigned char *index = p->index_data; unsigned hi, lo; + if (!index) { + if (open_pack_index(p)) + return 0; + level1_ofs = p->index_data; + index = p->index_data; + } if (p->index_version > 1) { level1_ofs += 2; index += 8; -- cgit v0.10.2-6-g49f6 From 7dc24aa5a62cc5f77e6637674581c837f4bdf78e Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sat, 26 May 2007 01:24:40 -0400 Subject: Micro-optimize prepare_alt_odb Calling getenv() is not that expensive, but its also not free, and its certainly not cheaper than testing to see if alt_odb_tail is not null. Because we are calling prepare_alt_odb() from within find_sha1_file every time we cannot find an object file locally we want to skip out of prepare_alt_odb() as early as possible once we have initialized our alternate list. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano diff --git a/sha1_file.c b/sha1_file.c index 6a5ba63..a3637d7 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -376,11 +376,12 @@ void prepare_alt_odb(void) { const char *alt; + if (alt_odb_tail) + return; + alt = getenv(ALTERNATE_DB_ENVIRONMENT); if (!alt) alt = ""; - if (alt_odb_tail) - return; alt_odb_tail = &alt_odb_list; link_alt_odb_entries(alt, alt + strlen(alt), ':', NULL, 0); -- cgit v0.10.2-6-g49f6 From 693d2bc625e7168299741d673e7205e9d2c969df Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sat, 26 May 2007 01:25:11 -0400 Subject: Attempt to delay prepare_alt_odb during get_sha1 Not every input value passed to get_sha1 is an abbreviated SHA-1. Its actually quite common for refs to be passed and for those refs to resolve to full SHA-1s, in which case we may not need to initialize the alternate object database list in this process. I'm relocating the call to prepare_alt_odb closer to the code that actually needs it to maintain the fix first introduced by Junio in 99a19b43 (to avoid ambiguous SHA-1 abbreviations from being accepted). This allows us to avoid the alt_odb list setup if we won't actually need it. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano diff --git a/sha1_name.c b/sha1_name.c index 55f25a2..8dfceb2 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -133,6 +133,7 @@ static int find_unique_short_object(int len, char *canonical, int has_unpacked, has_packed; unsigned char unpacked_sha1[20], packed_sha1[20]; + prepare_alt_odb(); has_unpacked = find_short_object_filename(len, canonical, unpacked_sha1); has_packed = find_short_packed_object(len, res, packed_sha1); if (!has_unpacked && !has_packed) @@ -654,7 +655,6 @@ int get_sha1_with_mode(const char *name, unsigned char *sha1, unsigned *mode) const char *cp; *mode = S_IFINVALID; - prepare_alt_odb(); ret = get_sha1_1(name, namelen, sha1); if (!ret) return ret; -- cgit v0.10.2-6-g49f6 From 1055880e7c096907ac87203dd83fdc6830251115 Mon Sep 17 00:00:00 2001 From: James Bowes Date: Tue, 29 May 2007 19:29:51 -0400 Subject: rev-parse: Identify short sha1 sums correctly. find_short_packed_object was not loading the pack index files. Teach it to do so. Signed-off-by: James Bowes Signed-off-by: Junio C Hamano diff --git a/sha1_name.c b/sha1_name.c index 8dfceb2..7df01af 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -76,8 +76,11 @@ static int find_short_packed_object(int len, const unsigned char *match, unsigne prepare_packed_git(); for (p = packed_git; p && found < 2; p = p->next) { - uint32_t num = p->num_objects; - uint32_t first = 0, last = num; + uint32_t num, last; + uint32_t first = 0; + open_pack_index(p); + num = p->num_objects; + last = num; while (first < last) { uint32_t mid = (first + last) / 2; const unsigned char *now; -- cgit v0.10.2-6-g49f6 From 7ff895c0d229c2c60b73e91b0c389a4e3ce69e46 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Wed, 30 May 2007 00:50:26 -0400 Subject: Test for recent rev-parse $abbrev_sha1 regression My recent patch "Lazily open pack index files on demand" caused a regression in the case of parsing abbreviated SHA-1 object names. Git was unable to translate the abbreviated name into the full name if the object was packed, as the pack .idx files were not opened before being accessed. This is a simple test to repack a repository then test for an abbreviated SHA-1 within the packfile. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano diff --git a/t/t6101-rev-parse-parents.sh b/t/t6101-rev-parse-parents.sh index 7d354a1..b0252b9 100755 --- a/t/t6101-rev-parse-parents.sh +++ b/t/t6101-rev-parse-parents.sh @@ -29,5 +29,15 @@ test_expect_success 'final^1^3 not valid' "if git-rev-parse --verify final^1^3; test_expect_failure '--verify start2^1' 'git-rev-parse --verify start2^1' test_expect_success '--verify start2^0' 'git-rev-parse --verify start2^0' +test_expect_success 'repack for next test' 'git repack -a -d' +test_expect_success 'short SHA-1 works' ' + start=`git rev-parse --verify start` && + echo $start && + abbrv=`echo $start | sed s/.\$//` && + echo $abbrv && + abbrv=`git rev-parse --verify $abbrv` && + echo $abbrv && + test $start = $abbrv' + test_done -- cgit v0.10.2-6-g49f6 From eaa867703927c1f383637979d16c40d996cea240 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Wed, 30 May 2007 02:12:28 -0400 Subject: Simplify index access condition in count-objects, pack-redundant My earlier lazy index opening patch changed this condition to check index_data and call open_pack_index if it was NULL. In truth we only care about num_objects. Since open_pack_index does no harm if the index is already open, and all indexes are likely to be closed in this application, the "performance optimization" of inlining the index_data check here was wrong. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano diff --git a/builtin-count-objects.c b/builtin-count-objects.c index ac65e03..4274ec1 100644 --- a/builtin-count-objects.c +++ b/builtin-count-objects.c @@ -111,7 +111,7 @@ int cmd_count_objects(int ac, const char **av, const char *prefix) for (p = packed_git; p; p = p->next) { if (!p->pack_local) continue; - if (!p->index_data && open_pack_index(p)) + if (open_pack_index(p)) continue; packed += p->num_objects; num_pack++; diff --git a/pack-redundant.c b/pack-redundant.c index 0617320..6bc3bdf 100644 --- a/pack-redundant.c +++ b/pack-redundant.c @@ -550,7 +550,7 @@ static struct pack_list * add_pack(struct packed_git *p) l.pack = p; llist_init(&l.all_objects); - if (!p->index_data && open_pack_index(p)) + if (open_pack_index(p)) return NULL; base = p->index_data; -- cgit v0.10.2-6-g49f6 From b77ffe8a57a0921f58cff22dcf1ed6ae64d89d6a Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Wed, 30 May 2007 02:13:14 -0400 Subject: Ensure the pack index is opened before access In this particular location of fsck the index should have already been opened by verify_pack, which is called just before we get here and loop through the object names. However, just in case a future version of that function does not use the index file we'll double-check its open before we access the num_objects field. Better safe now than sorry later. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano diff --git a/builtin-fsck.c b/builtin-fsck.c index cbbcaf0..9959818 100644 --- a/builtin-fsck.c +++ b/builtin-fsck.c @@ -668,7 +668,10 @@ int cmd_fsck(int argc, char **argv, const char *prefix) verify_pack(p, 0); for (p = packed_git; p; p = p->next) { - uint32_t i, num = p->num_objects; + uint32_t i, num; + if (open_pack_index(p)) + continue; + num = p->num_objects; for (i = 0; i < num; i++) fsck_sha1(nth_packed_object_sha1(p, i)); } -- cgit v0.10.2-6-g49f6 From bc8e478a285ff549a3e5182461b064313d400de3 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Wed, 30 May 2007 02:13:42 -0400 Subject: Style nit - don't put space after function names Our style is to not put a space after a function name. I did here, and Junio applied the patch with the incorrect formatting. So I'm cleaning up after myself since I noticed it upon review. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano diff --git a/sha1_file.c b/sha1_file.c index a3637d7..3093ac9 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -531,7 +531,7 @@ static int check_packed_git_idx(const char *path, struct packed_git *p) return 0; } -int open_pack_index (struct packed_git *p) +int open_pack_index(struct packed_git *p) { char *idx_name; int ret; -- cgit v0.10.2-6-g49f6