parallel-checkout: make it truly parallel
Use multiple worker processes to distribute the queued entries and call
write_pc_item() in parallel for them. The items are distributed
uniformly in contiguous chunks. This minimizes the chances of two
workers writing to the same directory simultaneously, which could affect
performance due to lock contention in the kernel. Work stealing (or any
other format of re-distribution) is not implemented yet.
The protocol between the main process and the workers is quite simple.
They exchange binary messages packed in pkt-line format, and use
PKT-FLUSH to mark the end of input (from both sides). The main process
starts the communication by sending N pkt-lines, each corresponding to
an item that needs to be written. These packets contain all the
necessary information to load, smudge, and write the blob associated
with each item. Then it waits for the worker to send back N pkt-lines
containing the results for each item. The resulting packet must contain:
the identification number of the item that it refers to, the status of
the operation, and the lstat() data gathered after writing the file (iff
the operation was successful).
For now, checkout always uses a hardcoded value of 2 workers, only to
demonstrate that the parallel checkout framework correctly divides and
writes the queued entries. The next patch will add user configurations
and define a more reasonable default, based on tests with the said
settings.
Co-authored-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Co-authored-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-04-19 02:14:54 +02:00
|
|
|
#include "builtin.h"
|
2023-02-24 01:09:24 +01:00
|
|
|
#include "alloc.h"
|
parallel-checkout: make it truly parallel
Use multiple worker processes to distribute the queued entries and call
write_pc_item() in parallel for them. The items are distributed
uniformly in contiguous chunks. This minimizes the chances of two
workers writing to the same directory simultaneously, which could affect
performance due to lock contention in the kernel. Work stealing (or any
other format of re-distribution) is not implemented yet.
The protocol between the main process and the workers is quite simple.
They exchange binary messages packed in pkt-line format, and use
PKT-FLUSH to mark the end of input (from both sides). The main process
starts the communication by sending N pkt-lines, each corresponding to
an item that needs to be written. These packets contain all the
necessary information to load, smudge, and write the blob associated
with each item. Then it waits for the worker to send back N pkt-lines
containing the results for each item. The resulting packet must contain:
the identification number of the item that it refers to, the status of
the operation, and the lstat() data gathered after writing the file (iff
the operation was successful).
For now, checkout always uses a hardcoded value of 2 workers, only to
demonstrate that the parallel checkout framework correctly divides and
writes the queued entries. The next patch will add user configurations
and define a more reasonable default, based on tests with the said
settings.
Co-authored-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Co-authored-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-04-19 02:14:54 +02:00
|
|
|
#include "config.h"
|
|
|
|
#include "entry.h"
|
2023-03-21 07:25:54 +01:00
|
|
|
#include "gettext.h"
|
parallel-checkout: make it truly parallel
Use multiple worker processes to distribute the queued entries and call
write_pc_item() in parallel for them. The items are distributed
uniformly in contiguous chunks. This minimizes the chances of two
workers writing to the same directory simultaneously, which could affect
performance due to lock contention in the kernel. Work stealing (or any
other format of re-distribution) is not implemented yet.
The protocol between the main process and the workers is quite simple.
They exchange binary messages packed in pkt-line format, and use
PKT-FLUSH to mark the end of input (from both sides). The main process
starts the communication by sending N pkt-lines, each corresponding to
an item that needs to be written. These packets contain all the
necessary information to load, smudge, and write the blob associated
with each item. Then it waits for the worker to send back N pkt-lines
containing the results for each item. The resulting packet must contain:
the identification number of the item that it refers to, the status of
the operation, and the lstat() data gathered after writing the file (iff
the operation was successful).
For now, checkout always uses a hardcoded value of 2 workers, only to
demonstrate that the parallel checkout framework correctly divides and
writes the queued entries. The next patch will add user configurations
and define a more reasonable default, based on tests with the said
settings.
Co-authored-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Co-authored-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-04-19 02:14:54 +02:00
|
|
|
#include "parallel-checkout.h"
|
|
|
|
#include "parse-options.h"
|
|
|
|
#include "pkt-line.h"
|
|
|
|
|
|
|
|
static void packet_to_pc_item(const char *buffer, int len,
|
|
|
|
struct parallel_checkout_item *pc_item)
|
|
|
|
{
|
|
|
|
const struct pc_item_fixed_portion *fixed_portion;
|
|
|
|
const char *variant;
|
|
|
|
char *encoding;
|
|
|
|
|
|
|
|
if (len < sizeof(struct pc_item_fixed_portion))
|
|
|
|
BUG("checkout worker received too short item (got %dB, exp %dB)",
|
|
|
|
len, (int)sizeof(struct pc_item_fixed_portion));
|
|
|
|
|
|
|
|
fixed_portion = (struct pc_item_fixed_portion *)buffer;
|
|
|
|
|
|
|
|
if (len - sizeof(struct pc_item_fixed_portion) !=
|
|
|
|
fixed_portion->name_len + fixed_portion->working_tree_encoding_len)
|
|
|
|
BUG("checkout worker received corrupted item");
|
|
|
|
|
|
|
|
variant = buffer + sizeof(struct pc_item_fixed_portion);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Note: the main process uses zero length to communicate that the
|
|
|
|
* encoding is NULL. There is no use case that requires sending an
|
|
|
|
* actual empty string, since convert_attrs() never sets
|
|
|
|
* ca.working_tree_enconding to "".
|
|
|
|
*/
|
|
|
|
if (fixed_portion->working_tree_encoding_len) {
|
|
|
|
encoding = xmemdupz(variant,
|
|
|
|
fixed_portion->working_tree_encoding_len);
|
|
|
|
variant += fixed_portion->working_tree_encoding_len;
|
|
|
|
} else {
|
|
|
|
encoding = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
memset(pc_item, 0, sizeof(*pc_item));
|
2021-05-04 18:27:28 +02:00
|
|
|
pc_item->ce = make_empty_transient_cache_entry(fixed_portion->name_len, NULL);
|
parallel-checkout: make it truly parallel
Use multiple worker processes to distribute the queued entries and call
write_pc_item() in parallel for them. The items are distributed
uniformly in contiguous chunks. This minimizes the chances of two
workers writing to the same directory simultaneously, which could affect
performance due to lock contention in the kernel. Work stealing (or any
other format of re-distribution) is not implemented yet.
The protocol between the main process and the workers is quite simple.
They exchange binary messages packed in pkt-line format, and use
PKT-FLUSH to mark the end of input (from both sides). The main process
starts the communication by sending N pkt-lines, each corresponding to
an item that needs to be written. These packets contain all the
necessary information to load, smudge, and write the blob associated
with each item. Then it waits for the worker to send back N pkt-lines
containing the results for each item. The resulting packet must contain:
the identification number of the item that it refers to, the status of
the operation, and the lstat() data gathered after writing the file (iff
the operation was successful).
For now, checkout always uses a hardcoded value of 2 workers, only to
demonstrate that the parallel checkout framework correctly divides and
writes the queued entries. The next patch will add user configurations
and define a more reasonable default, based on tests with the said
settings.
Co-authored-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Co-authored-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-04-19 02:14:54 +02:00
|
|
|
pc_item->ce->ce_namelen = fixed_portion->name_len;
|
|
|
|
pc_item->ce->ce_mode = fixed_portion->ce_mode;
|
|
|
|
memcpy(pc_item->ce->name, variant, pc_item->ce->ce_namelen);
|
|
|
|
oidcpy(&pc_item->ce->oid, &fixed_portion->oid);
|
|
|
|
|
|
|
|
pc_item->id = fixed_portion->id;
|
|
|
|
pc_item->ca.crlf_action = fixed_portion->crlf_action;
|
|
|
|
pc_item->ca.ident = fixed_portion->ident;
|
|
|
|
pc_item->ca.working_tree_encoding = encoding;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void report_result(struct parallel_checkout_item *pc_item)
|
|
|
|
{
|
builtin/checkout--worker: zero-initialise struct to avoid MSAN complaints
report_result() sends a struct to the parent process, but that struct
would contain uninitialised padding bytes. Running this code under MSAN
rightly triggers a warning - but we don't particularly care about this
warning because we control the receiving code, and we therefore know
that those padding bytes won't be read on the receiving end.
We could simply suppress this warning under MSAN with the approporiate
ifdef'd attributes, but a less intrusive solution is to 0-initialise the
struct, which guarantees that the padding will also be initialised.
Interestingly, in the error-case branch, we only try to copy the first
two members of pc_item_result, by copying only PC_ITEM_RESULT_BASE_SIZE
bytes. However PC_ITEM_RESULT_BASE_SIZE is defined as
'offsetof(the_last_member)', which means that we're copying padding bytes
after the end of the second last member. We could avoid doing this by
redefining PC_ITEM_RESULT_BASE_SIZE as
'offsetof(second_last_member) + sizeof(second_last_member)', but there's
no huge benefit to doing so (and this patch silences the MSAN warning in
this scenario either way).
MSAN output from t2080 (partially interleaved due to the
parallel work :) ):
Uninitialized bytes in __interceptor_write at offset 12 inside [0x7fff37d83408, 160)
==23279==WARNING: MemorySanitizer: use-of-uninitialized-value
Uninitialized bytes in __interceptor_write at offset 12 inside [0x7ffdb8a07ec8, 160)
==23280==WARNING: MemorySanitizer: use-of-uninitialized-value
#0 0xd5ac28 in xwrite /home/ahunt/git/git/wrapper.c:256:8
#1 0xd5b327 in write_in_full /home/ahunt/git/git/wrapper.c:311:21
#2 0xb0a8c4 in do_packet_write /home/ahunt/git/git/pkt-line.c:221:6
#3 0xb0a5fd in packet_write /home/ahunt/git/git/pkt-line.c:242:6
#4 0x4f7441 in report_result /home/ahunt/git/git/builtin/checkout--worker.c:69:2
#5 0x4f6be6 in worker_loop /home/ahunt/git/git/builtin/checkout--worker.c:100:3
#6 0x4f68d3 in cmd_checkout__worker /home/ahunt/git/git/builtin/checkout--worker.c:143:2
#7 0x4a1e76 in run_builtin /home/ahunt/git/git/git.c:461:11
#8 0x49e1e7 in handle_builtin /home/ahunt/git/git/git.c:714:3
#9 0x4a0c08 in run_argv /home/ahunt/git/git/git.c:781:4
#10 0x49d5a8 in cmd_main /home/ahunt/git/git/git.c:912:19
#11 0x7974da in main /home/ahunt/git/git/common-main.c:52:11
#12 0x7f8778114349 in __libc_start_main (/lib64/libc.so.6+0x24349)
#13 0x421bd9 in _start /home/abuild/rpmbuild/BUILD/glibc-2.26/csu/../sysdeps/x86_64/start.S:120
Uninitialized value was created by an allocation of 'res' in the stack frame of function 'report_result'
#0 0x4f72c0 in report_result /home/ahunt/git/git/builtin/checkout--worker.c:55
SUMMARY: MemorySanitizer: use-of-uninitialized-value /home/ahunt/git/git/wrapper.c:256:8 in xwrite
Exiting
#0 0xd5ac28 in xwrite /home/ahunt/git/git/wrapper.c:256:8
#1 0xd5b327 in write_in_full /home/ahunt/git/git/wrapper.c:311:21
#2 0xb0a8c4 in do_packet_write /home/ahunt/git/git/pkt-line.c:221:6
#3 0xb0a5fd in packet_write /home/ahunt/git/git/pkt-line.c:242:6
#4 0x4f7441 in report_result /home/ahunt/git/git/builtin/checkout--worker.c:69:2
#5 0x4f6be6 in worker_loop /home/ahunt/git/git/builtin/checkout--worker.c:100:3
#6 0x4f68d3 in cmd_checkout__worker /home/ahunt/git/git/builtin/checkout--worker.c:143:2
#7 0x4a1e76 in run_builtin /home/ahunt/git/git/git.c:461:11
#8 0x49e1e7 in handle_builtin /home/ahunt/git/git/git.c:714:3
#9 0x4a0c08 in run_argv /home/ahunt/git/git/git.c:781:4
#10 0x49d5a8 in cmd_main /home/ahunt/git/git/git.c:912:19
#11 0x7974da in main /home/ahunt/git/git/common-main.c:52:11
#12 0x7f2749a0e349 in __libc_start_main (/lib64/libc.so.6+0x24349)
#13 0x421bd9 in _start /home/abuild/rpmbuild/BUILD/glibc-2.26/csu/../sysdeps/x86_64/start.S:120
Uninitialized value was created by an allocation of 'res' in the stack frame of function 'report_result'
#0 0x4f72c0 in report_result /home/ahunt/git/git/builtin/checkout--worker.c:55
SUMMARY: MemorySanitizer: use-of-uninitialized-value /home/ahunt/git/git/wrapper.c:256:8 in xwrite
Signed-off-by: Andrzej Hunt <andrzej@ahunt.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-06-14 17:51:16 +02:00
|
|
|
struct pc_item_result res = { 0 };
|
parallel-checkout: make it truly parallel
Use multiple worker processes to distribute the queued entries and call
write_pc_item() in parallel for them. The items are distributed
uniformly in contiguous chunks. This minimizes the chances of two
workers writing to the same directory simultaneously, which could affect
performance due to lock contention in the kernel. Work stealing (or any
other format of re-distribution) is not implemented yet.
The protocol between the main process and the workers is quite simple.
They exchange binary messages packed in pkt-line format, and use
PKT-FLUSH to mark the end of input (from both sides). The main process
starts the communication by sending N pkt-lines, each corresponding to
an item that needs to be written. These packets contain all the
necessary information to load, smudge, and write the blob associated
with each item. Then it waits for the worker to send back N pkt-lines
containing the results for each item. The resulting packet must contain:
the identification number of the item that it refers to, the status of
the operation, and the lstat() data gathered after writing the file (iff
the operation was successful).
For now, checkout always uses a hardcoded value of 2 workers, only to
demonstrate that the parallel checkout framework correctly divides and
writes the queued entries. The next patch will add user configurations
and define a more reasonable default, based on tests with the said
settings.
Co-authored-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Co-authored-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-04-19 02:14:54 +02:00
|
|
|
size_t size;
|
|
|
|
|
|
|
|
res.id = pc_item->id;
|
|
|
|
res.status = pc_item->status;
|
|
|
|
|
|
|
|
if (pc_item->status == PC_ITEM_WRITTEN) {
|
|
|
|
res.st = pc_item->st;
|
|
|
|
size = sizeof(res);
|
|
|
|
} else {
|
|
|
|
size = PC_ITEM_RESULT_BASE_SIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
packet_write(1, (const char *)&res, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Free the worker-side malloced data, but not pc_item itself. */
|
|
|
|
static void release_pc_item_data(struct parallel_checkout_item *pc_item)
|
|
|
|
{
|
|
|
|
free((char *)pc_item->ca.working_tree_encoding);
|
|
|
|
discard_cache_entry(pc_item->ce);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void worker_loop(struct checkout *state)
|
|
|
|
{
|
|
|
|
struct parallel_checkout_item *items = NULL;
|
|
|
|
size_t i, nr = 0, alloc = 0;
|
|
|
|
|
|
|
|
while (1) {
|
2021-10-14 22:15:12 +02:00
|
|
|
int len = packet_read(0, packet_buffer, sizeof(packet_buffer),
|
|
|
|
0);
|
parallel-checkout: make it truly parallel
Use multiple worker processes to distribute the queued entries and call
write_pc_item() in parallel for them. The items are distributed
uniformly in contiguous chunks. This minimizes the chances of two
workers writing to the same directory simultaneously, which could affect
performance due to lock contention in the kernel. Work stealing (or any
other format of re-distribution) is not implemented yet.
The protocol between the main process and the workers is quite simple.
They exchange binary messages packed in pkt-line format, and use
PKT-FLUSH to mark the end of input (from both sides). The main process
starts the communication by sending N pkt-lines, each corresponding to
an item that needs to be written. These packets contain all the
necessary information to load, smudge, and write the blob associated
with each item. Then it waits for the worker to send back N pkt-lines
containing the results for each item. The resulting packet must contain:
the identification number of the item that it refers to, the status of
the operation, and the lstat() data gathered after writing the file (iff
the operation was successful).
For now, checkout always uses a hardcoded value of 2 workers, only to
demonstrate that the parallel checkout framework correctly divides and
writes the queued entries. The next patch will add user configurations
and define a more reasonable default, based on tests with the said
settings.
Co-authored-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Co-authored-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-04-19 02:14:54 +02:00
|
|
|
|
|
|
|
if (len < 0)
|
|
|
|
BUG("packet_read() returned negative value");
|
|
|
|
else if (!len)
|
|
|
|
break;
|
|
|
|
|
|
|
|
ALLOC_GROW(items, nr + 1, alloc);
|
|
|
|
packet_to_pc_item(packet_buffer, len, &items[nr++]);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < nr; i++) {
|
|
|
|
struct parallel_checkout_item *pc_item = &items[i];
|
|
|
|
write_pc_item(pc_item, state);
|
|
|
|
report_result(pc_item);
|
|
|
|
release_pc_item_data(pc_item);
|
|
|
|
}
|
|
|
|
|
|
|
|
packet_flush(1);
|
|
|
|
|
|
|
|
free(items);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char * const checkout_worker_usage[] = {
|
|
|
|
N_("git checkout--worker [<options>]"),
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
|
|
|
int cmd_checkout__worker(int argc, const char **argv, const char *prefix)
|
|
|
|
{
|
|
|
|
struct checkout state = CHECKOUT_INIT;
|
|
|
|
struct option checkout_worker_options[] = {
|
|
|
|
OPT_STRING(0, "prefix", &state.base_dir, N_("string"),
|
|
|
|
N_("when creating files, prepend <string>")),
|
|
|
|
OPT_END()
|
|
|
|
};
|
|
|
|
|
|
|
|
if (argc == 2 && !strcmp(argv[1], "-h"))
|
|
|
|
usage_with_options(checkout_worker_usage,
|
|
|
|
checkout_worker_options);
|
|
|
|
|
|
|
|
git_config(git_default_config, NULL);
|
|
|
|
argc = parse_options(argc, argv, prefix, checkout_worker_options,
|
|
|
|
checkout_worker_usage, 0);
|
|
|
|
if (argc > 0)
|
|
|
|
usage_with_options(checkout_worker_usage, checkout_worker_options);
|
|
|
|
|
|
|
|
if (state.base_dir)
|
|
|
|
state.base_dir_len = strlen(state.base_dir);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Setting this on a worker won't actually update the index. We just
|
|
|
|
* need to tell the checkout machinery to lstat() the written entries,
|
|
|
|
* so that we can send this data back to the main process.
|
|
|
|
*/
|
|
|
|
state.refresh_cache = 1;
|
|
|
|
|
|
|
|
worker_loop(&state);
|
|
|
|
return 0;
|
|
|
|
}
|