summaryrefslogtreecommitdiff
path: root/pack-objects.h
diff options
context:
space:
mode:
authorNguyễn Thái Ngọc Duy <pclouds@gmail.com>2018-04-14 15:35:10 (GMT)
committerJunio C Hamano <gitster@pobox.com>2018-04-16 03:38:59 (GMT)
commitac77d0c370dc5b23ac1ec4a13c754fe7ffa48564 (patch)
tree44d01cc4c9a8da445b8c0b2a7d9a8ed5adbc16c4 /pack-objects.h
parent27a7d0679f17bd536f566e76e51058de0e1fa17a (diff)
downloadgit-ac77d0c370dc5b23ac1ec4a13c754fe7ffa48564.zip
git-ac77d0c370dc5b23ac1ec4a13c754fe7ffa48564.tar.gz
git-ac77d0c370dc5b23ac1ec4a13c754fe7ffa48564.tar.bz2
pack-objects: shrink size field in struct object_entry
It's very very rare that an uncompressed object is larger than 4GB (partly because Git does not handle those large files very well to begin with). Let's optimize it for the common case where object size is smaller than this limit. Shrink size field down to 31 bits and one overflow bit. If the size is too large, we read it back from disk. As noted in the previous patch, we need to return the delta size instead of canonical size when the to-be-reused object entry type is a delta instead of a canonical one. Add two compare helpers that can take advantage of the overflow bit (e.g. if the file is 4GB+, chances are it's already larger than core.bigFileThreshold and there's no point in comparing the actual value). Another note about oe_get_size_slow(). This function MUST be thread safe because SIZE() macro is used inside try_delta() which may run in parallel. Outside parallel code, no-contention locking should be dirt cheap (or insignificant compared to i/o access anyway). To exercise this code, it's best to run the test suite with something like make test GIT_TEST_OE_SIZE=4 which forces this code on all objects larger than 3 bytes. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'pack-objects.h')
-rw-r--r--pack-objects.h57
1 files changed, 56 insertions, 1 deletions
diff --git a/pack-objects.h b/pack-objects.h
index e4ea6a3..ee2c7ab 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -7,6 +7,11 @@
#define OE_DEPTH_BITS 12
#define OE_IN_PACK_BITS 10
#define OE_Z_DELTA_BITS 20
+/*
+ * Note that oe_set_size() becomes expensive when the given size is
+ * above this limit. Don't lower it too much.
+ */
+#define OE_SIZE_BITS 31
/*
* State flags for depth-first search used for analyzing delta cycles.
@@ -70,7 +75,8 @@ enum dfs_state {
*/
struct object_entry {
struct pack_idx_entry idx;
- unsigned long size; /* uncompressed size */
+ unsigned size_:OE_SIZE_BITS;
+ unsigned size_valid:1;
unsigned in_pack_idx:OE_IN_PACK_BITS; /* already in pack */
off_t in_pack_offset;
uint32_t delta_idx; /* delta base object */
@@ -115,6 +121,8 @@ struct packing_data {
*/
struct packed_git **in_pack_by_idx;
struct packed_git **in_pack;
+
+ uintmax_t oe_size_limit;
};
void prepare_packing_data(struct packing_data *pdata);
@@ -254,4 +262,51 @@ static inline void oe_set_delta_sibling(struct packing_data *pack,
e->delta_sibling_idx = 0;
}
+unsigned long oe_get_size_slow(struct packing_data *pack,
+ const struct object_entry *e);
+static inline unsigned long oe_size(struct packing_data *pack,
+ const struct object_entry *e)
+{
+ if (e->size_valid)
+ return e->size_;
+
+ return oe_get_size_slow(pack, e);
+}
+
+static inline int oe_size_less_than(struct packing_data *pack,
+ const struct object_entry *lhs,
+ unsigned long rhs)
+{
+ if (lhs->size_valid)
+ return lhs->size_ < rhs;
+ if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */
+ return 0;
+ return oe_get_size_slow(pack, lhs) < rhs;
+}
+
+static inline int oe_size_greater_than(struct packing_data *pack,
+ const struct object_entry *lhs,
+ unsigned long rhs)
+{
+ if (lhs->size_valid)
+ return lhs->size_ > rhs;
+ if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */
+ return 1;
+ return oe_get_size_slow(pack, lhs) > rhs;
+}
+
+static inline void oe_set_size(struct packing_data *pack,
+ struct object_entry *e,
+ unsigned long size)
+{
+ if (size < pack->oe_size_limit) {
+ e->size_ = size;
+ e->size_valid = 1;
+ } else {
+ e->size_valid = 0;
+ if (oe_get_size_slow(pack, e) != size)
+ BUG("'size' is supposed to be the object size!");
+ }
+}
+
#endif