summaryrefslogtreecommitdiff
path: root/object-store.h
diff options
context:
space:
mode:
authorMatheus Tavares <matheus.bernardino@usp.br>2020-01-16 02:39:53 (GMT)
committerJunio C Hamano <gitster@pobox.com>2020-01-17 21:52:14 (GMT)
commit31877c9aec21e0824fd4fcf415069cf8dfae4b72 (patch)
treeed78b3edfb08a2f65c47dede4d320af072b46c71 /object-store.h
parentb1fc9da1c84a94ef03eb07df361f3ec43006b39f (diff)
downloadgit-31877c9aec21e0824fd4fcf415069cf8dfae4b72.zip
git-31877c9aec21e0824fd4fcf415069cf8dfae4b72.tar.gz
git-31877c9aec21e0824fd4fcf415069cf8dfae4b72.tar.bz2
object-store: allow threaded access to object reading
Allow object reading to be performed by multiple threads protecting it with an internal lock, the obj_read_mutex. The lock usage can be toggled with enable_obj_read_lock() and disable_obj_read_lock(). Currently, the functions which can be safely called in parallel are: read_object_file_extended(), repo_read_object_file(), read_object_file(), read_object_with_reference(), read_object(), oid_object_info() and oid_object_info_extended(). It's also possible to use obj_read_lock() and obj_read_unlock() to protect other sections that cannot execute in parallel with object reading. Probably there are many spots in the functions listed above that could be executed unlocked (and thus, in parallel). But, for now, we are most interested in allowing parallel access to zlib inflation. This is one of the sections where object reading spends most of the time in (e.g. up to one-third of git-grep's execution time in the chromium repo corresponds to inflation) and it's already thread-safe. So, to take advantage of that, the obj_read_mutex is released when calling git_inflate() and re-acquired right after, for every calling spot in oid_object_info_extended()'s call chain. We may refine this lock to also exploit other possible parallel spots in the future, but for now, threaded zlib inflation should already give great speedups for threaded object reading callers. Note that add_delta_base_cache() was also modified to skip adding already present entries to the cache. This wasn't possible before, but it would be now, with the parallel inflation. Take for example the following situation, where two threads - A and B - are executing the code at unpack_entry(): 1. Thread A is performing the decompression of a base O (which is not yet in the cache) at PHASE II. Thread B is simultaneously trying to unpack O, but just starting at PHASE I. 2. Since O is not yet in the cache, B will go to PHASE II to also perform the decompression. 3. When they finish decompressing, one of them will get the object reading mutex and go to PHASE III while the other waits for the mutex. Let’s say A got the mutex first. 4. Thread A will add O to the cache, go throughout the rest of PHASE III and return. 5. Thread B gets the mutex, also add O to the cache (if the check wasn't there) and returns. Finally, it is also important to highlight that the object reading lock can only ensure thread-safety in the mentioned functions thanks to two complementary mechanisms: the use of 'struct raw_object_store's replace_mutex, which guards sections in the object reading machinery that would otherwise be thread-unsafe; and the 'struct pack_window's inuse_cnt, which protects window reading operations (such as the one performed during the inflation of a packed object), allowing them to execute without the acquisition of the obj_read_mutex. Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'object-store.h')
-rw-r--r--object-store.h35
1 files changed, 35 insertions, 0 deletions
diff --git a/object-store.h b/object-store.h
index 33739c9..7c80e0d 100644
--- a/object-store.h
+++ b/object-store.h
@@ -6,6 +6,7 @@
#include "list.h"
#include "sha1-array.h"
#include "strbuf.h"
+#include "thread-utils.h"
struct object_directory {
struct object_directory *next;
@@ -251,6 +252,40 @@ int has_loose_object_nonlocal(const struct object_id *);
void assert_oid_type(const struct object_id *oid, enum object_type expect);
+/*
+ * Enabling the object read lock allows multiple threads to safely call the
+ * following functions in parallel: repo_read_object_file(), read_object_file(),
+ * read_object_file_extended(), read_object_with_reference(), read_object(),
+ * oid_object_info() and oid_object_info_extended().
+ *
+ * obj_read_lock() and obj_read_unlock() may also be used to protect other
+ * section which cannot execute in parallel with object reading. Since the used
+ * lock is a recursive mutex, these sections can even contain calls to object
+ * reading functions. However, beware that in these cases zlib inflation won't
+ * be performed in parallel, losing performance.
+ *
+ * TODO: oid_object_info_extended()'s call stack has a recursive behavior. If
+ * any of its callees end up calling it, this recursive call won't benefit from
+ * parallel inflation.
+ */
+void enable_obj_read_lock(void);
+void disable_obj_read_lock(void);
+
+extern int obj_read_use_lock;
+extern pthread_mutex_t obj_read_mutex;
+
+static inline void obj_read_lock(void)
+{
+ if(obj_read_use_lock)
+ pthread_mutex_lock(&obj_read_mutex);
+}
+
+static inline void obj_read_unlock(void)
+{
+ if(obj_read_use_lock)
+ pthread_mutex_unlock(&obj_read_mutex);
+}
+
struct object_info {
/* Request */
enum object_type *typep;