summaryrefslogtreecommitdiff
path: root/parallel-checkout.h
diff options
context:
space:
mode:
authorMatheus Tavares <matheus.bernardino@usp.br>2021-04-19 00:14:54 (GMT)
committerJunio C Hamano <gitster@pobox.com>2021-04-19 18:57:05 (GMT)
commite9e8adf1a871d22d9df7498256681685459b2507 (patch)
treee4d8edcf53f9276566d81d5f07e113e6a2483b1e /parallel-checkout.h
parent04155bdad818381681d44448bb7dc3a850117ffb (diff)
downloadgit-e9e8adf1a871d22d9df7498256681685459b2507.zip
git-e9e8adf1a871d22d9df7498256681685459b2507.tar.gz
git-e9e8adf1a871d22d9df7498256681685459b2507.tar.bz2
parallel-checkout: make it truly parallel
Use multiple worker processes to distribute the queued entries and call write_pc_item() in parallel for them. The items are distributed uniformly in contiguous chunks. This minimizes the chances of two workers writing to the same directory simultaneously, which could affect performance due to lock contention in the kernel. Work stealing (or any other format of re-distribution) is not implemented yet. The protocol between the main process and the workers is quite simple. They exchange binary messages packed in pkt-line format, and use PKT-FLUSH to mark the end of input (from both sides). The main process starts the communication by sending N pkt-lines, each corresponding to an item that needs to be written. These packets contain all the necessary information to load, smudge, and write the blob associated with each item. Then it waits for the worker to send back N pkt-lines containing the results for each item. The resulting packet must contain: the identification number of the item that it refers to, the status of the operation, and the lstat() data gathered after writing the file (iff the operation was successful). For now, checkout always uses a hardcoded value of 2 workers, only to demonstrate that the parallel checkout framework correctly divides and writes the queued entries. The next patch will add user configurations and define a more reasonable default, based on tests with the said settings. Co-authored-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Co-authored-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'parallel-checkout.h')
-rw-r--r--parallel-checkout.h73
1 files changed, 72 insertions, 1 deletions
diff --git a/parallel-checkout.h b/parallel-checkout.h
index 4ad2a51..ec58716 100644
--- a/parallel-checkout.h
+++ b/parallel-checkout.h
@@ -1,9 +1,14 @@
#ifndef PARALLEL_CHECKOUT_H
#define PARALLEL_CHECKOUT_H
+#include "convert.h"
+
struct cache_entry;
struct checkout;
-struct conv_attrs;
+
+/****************************************************************
+ * Users of parallel checkout
+ ****************************************************************/
enum pc_status {
PC_UNINITIALIZED = 0,
@@ -29,4 +34,70 @@ int enqueue_checkout(struct cache_entry *ce, struct conv_attrs *ca);
/* Write all the queued entries, returning 0 on success.*/
int run_parallel_checkout(struct checkout *state);
+/****************************************************************
+ * Interface with checkout--worker
+ ****************************************************************/
+
+enum pc_item_status {
+ PC_ITEM_PENDING = 0,
+ PC_ITEM_WRITTEN,
+ /*
+ * The entry could not be written because there was another file
+ * already present in its path or leading directories. Since
+ * checkout_entry_ca() removes such files from the working tree before
+ * enqueueing the entry for parallel checkout, it means that there was
+ * a path collision among the entries being written.
+ */
+ PC_ITEM_COLLIDED,
+ PC_ITEM_FAILED,
+};
+
+struct parallel_checkout_item {
+ /*
+ * In main process ce points to a istate->cache[] entry. Thus, it's not
+ * owned by us. In workers they own the memory, which *must be* released.
+ */
+ struct cache_entry *ce;
+ struct conv_attrs ca;
+ size_t id; /* position in parallel_checkout.items[] of main process */
+
+ /* Output fields, sent from workers. */
+ enum pc_item_status status;
+ struct stat st;
+};
+
+/*
+ * The fixed-size portion of `struct parallel_checkout_item` that is sent to the
+ * workers. Following this will be 2 strings: ca.working_tree_encoding and
+ * ce.name; These are NOT null terminated, since we have the size in the fixed
+ * portion.
+ *
+ * Note that not all fields of conv_attrs and cache_entry are passed, only the
+ * ones that will be required by the workers to smudge and write the entry.
+ */
+struct pc_item_fixed_portion {
+ size_t id;
+ struct object_id oid;
+ unsigned int ce_mode;
+ enum convert_crlf_action crlf_action;
+ int ident;
+ size_t working_tree_encoding_len;
+ size_t name_len;
+};
+
+/*
+ * The fields of `struct parallel_checkout_item` that are returned by the
+ * workers. Note: `st` must be the last one, as it is omitted on error.
+ */
+struct pc_item_result {
+ size_t id;
+ enum pc_item_status status;
+ struct stat st;
+};
+
+#define PC_ITEM_RESULT_BASE_SIZE offsetof(struct pc_item_result, st)
+
+void write_pc_item(struct parallel_checkout_item *pc_item,
+ struct checkout *state);
+
#endif /* PARALLEL_CHECKOUT_H */