From 5c94c93d504e29c2099200a68926a34072cf2736 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20=C3=85gren?= Date: Mon, 21 Aug 2017 19:43:45 +0200 Subject: [PATCH 1/4] convert: always initialize attr_action in convert_attrs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit convert_attrs contains an "if-else". In the "if", we set attr_action twice, and the first assignment has no effect. In the "else", we do not set it at all. Since git_check_attr always returns the same value, we'll always end up in the "if", so there is no problem right now. But convert_attrs is obviously trying not to rely on such an implementation-detail of another component. Make the initialization of attr_action after the if-else. Remove the earlier assignments. Suggested-by: Torsten Bögershausen Signed-off-by: Martin Ågren Signed-off-by: Junio C Hamano --- convert.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/convert.c b/convert.c index deaf0ba7b3..77b7615a54 100644 --- a/convert.c +++ b/convert.c @@ -1012,7 +1012,6 @@ static void convert_attrs(struct conv_attrs *ca, const char *path) ca->crlf_action = git_path_check_crlf(ccheck + 4); if (ca->crlf_action == CRLF_UNDEFINED) ca->crlf_action = git_path_check_crlf(ccheck + 0); - ca->attr_action = ca->crlf_action; ca->ident = git_path_check_ident(ccheck + 1); ca->drv = git_path_check_convert(ccheck + 2); if (ca->crlf_action != CRLF_BINARY) { @@ -1026,12 +1025,14 @@ static void convert_attrs(struct conv_attrs *ca, const char *path) else if (eol_attr == EOL_CRLF) ca->crlf_action = CRLF_TEXT_CRLF; } - ca->attr_action = ca->crlf_action; } else { ca->drv = NULL; ca->crlf_action = CRLF_UNDEFINED; ca->ident = 0; } + + /* Save attr and make a decision for action */ + ca->attr_action = ca->crlf_action; if (ca->crlf_action == CRLF_TEXT) ca->crlf_action = text_eol_is_crlf() ? CRLF_TEXT_CRLF : CRLF_TEXT_INPUT; if (ca->crlf_action == CRLF_UNDEFINED && auto_crlf == AUTO_CRLF_FALSE) From 0c2ad00b3c5a822224265c2551844b2eafc89875 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20=C3=85gren?= Date: Mon, 21 Aug 2017 19:43:46 +0200 Subject: [PATCH 2/4] pack-objects: take lock before accessing `remaining` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When checking the conditional of "while (me->remaining)", we did not hold the lock. Calling find_deltas would still be safe, since it checks "remaining" (after taking the lock) and is able to handle all values. In fact, this could (currently) not trigger any bug: a bug could happen if `remaining` transitioning from zero to non-zero races with the evaluation of the while-condition, but these are always separated by the data_ready-mechanism. Make sure we have the lock when we read `remaining`. This does mean we release it just so that find_deltas can take it immediately again. We could tweak the contract so that the lock should be taken before calling find_deltas, but let's defer that until someone can actually show that "unlock+lock" has a measurable negative impact. Signed-off-by: Martin Ågren Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index f4a8441fe9..ffb13f7800 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -2171,7 +2171,10 @@ static void *threaded_find_deltas(void *arg) { struct thread_params *me = arg; + progress_lock(); while (me->remaining) { + progress_unlock(); + find_deltas(me->list, &me->remaining, me->window, me->depth, me->processed); @@ -2193,7 +2196,10 @@ static void *threaded_find_deltas(void *arg) pthread_cond_wait(&me->cond, &me->mutex); me->data_ready = 0; pthread_mutex_unlock(&me->mutex); + + progress_lock(); } + progress_unlock(); /* leave ->working 1 so that this doesn't get more work assigned */ return NULL; } From 65961d5a75e28aa04a90ee65106f71da177fd4b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20=C3=85gren?= Date: Mon, 21 Aug 2017 19:43:47 +0200 Subject: [PATCH 3/4] strbuf_setlen: don't write to strbuf_slopbuf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit strbuf_setlen(., 0) writes '\0' to sb.buf[0], where buf is either allocated and unique to sb, or the global slopbuf. The slopbuf is meant to provide a guarantee that buf is not NULL and that a freshly initialized buffer contains the empty string, but it is not supposed to be written to. That strbuf_setlen writes to slopbuf has at least two implications: First, it's wrong in principle. Second, it might be hiding misuses which are just waiting to wreak havoc. Third, ThreadSanitizer detects a race when multiple threads write to slopbuf at roughly the same time, thus potentially making any more critical races harder to spot. Avoid writing to strbuf_slopbuf in strbuf_setlen. Let's instead assert on the first byte of slopbuf being '\0', since it helps ensure the promised invariant of buf[len] == '\0'. (We know that "len" was already 0, or someone has messed with "alloc". If someone has fiddled with the fields that much beyond the correct interface, they're on their own.) This is a function which is used in many places, possibly also in hot code paths. There are two branches in strbuf_setlen already, and we are adding a third and possibly a fourth (in the assert). In hot code paths, we hopefully reuse the buffer in order to avoid continous reallocations. Thus, after a start-up phase, we should always take the same path, which might help branch prediction, and we would never make the assert. If a hot code path continuously reallocates, we probably have bigger performance problems than this new safety-check. Simple measurements do not contradict this reasoning. 100000000 times resetting a buffer and adding the empty string takes 5.29/5.26 seconds with/without this patch (best of three). Releasing at every iteration yields 18.01/17.87. Adding a 30-character string instead of the empty string yields 5.61/5.58 and 17.28/17.28(!). This patch causes the git binary emitted by gcc 5.4.0 -O2 on my machine to grow from 11389848 bytes to 11497184 bytes, an increase of 0.9%. I also tried to piggy-back on the fact that we already check alloc, which should already tell us whether we are using the slopbuf: if (sb->alloc) { if (len > sb->alloc - 1) die("BUG: strbuf_setlen() beyond buffer"); sb->buf[len] = '\0'; } else { if (len) die("BUG: strbuf_setlen() beyond buffer"); assert(!strbuf_slopbuf[0]); } sb->len = len; That didn't seem to be much slower (5.38, 18.02, 5.70, 17.32 seconds), but it does introduce some minor code duplication. The resulting git binary was 11510528 bytes large (another 0.1% increase). Signed-off-by: Martin Ågren Signed-off-by: Junio C Hamano --- strbuf.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/strbuf.h b/strbuf.h index 2075384e0b..1a77fe146a 100644 --- a/strbuf.h +++ b/strbuf.h @@ -147,7 +147,10 @@ static inline void strbuf_setlen(struct strbuf *sb, size_t len) if (len > (sb->alloc ? sb->alloc - 1 : 0)) die("BUG: strbuf_setlen() beyond buffer"); sb->len = len; - sb->buf[len] = '\0'; + if (sb->buf != strbuf_slopbuf) + sb->buf[len] = '\0'; + else + assert(!strbuf_slopbuf[0]); } /** From 6cdf8a7929688ea5702ab53f450d038e973e64e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20=C3=85gren?= Date: Mon, 21 Aug 2017 19:43:48 +0200 Subject: [PATCH 4/4] ThreadSanitizer: add suppressions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a file .tsan-suppressions and list two functions in it: want_color() and transfer_debug(). Both of these use the pattern static int foo = -1; if (foo < 0) foo = bar(); where bar always returns the same non-negative value. This can cause ThreadSanitizer to diagnose a race when foo is written from two threads. That is indeed a race, although it arguably doesn't matter in practice since it's always the same value that is written. Add NEEDSWORK-comments to the functions so that this problem is not forever swept way under the carpet. The suppressions-file is used by setting the environment variable TSAN_OPTIONS to, e.g., "suppressions=$(pwd)/.tsan-suppressions". Observe that relative paths such as ".tsan-suppressions" might not work. Signed-off-by: Martin Ågren Signed-off-by: Junio C Hamano --- .tsan-suppressions | 10 ++++++++++ color.c | 7 +++++++ transport-helper.c | 7 +++++++ 3 files changed, 24 insertions(+) create mode 100644 .tsan-suppressions diff --git a/.tsan-suppressions b/.tsan-suppressions new file mode 100644 index 0000000000..8c85014a0a --- /dev/null +++ b/.tsan-suppressions @@ -0,0 +1,10 @@ +# Suppressions for ThreadSanitizer (tsan). +# +# This file is used by setting the environment variable TSAN_OPTIONS to, e.g., +# "suppressions=$(pwd)/.tsan-suppressions". Observe that relative paths such as +# ".tsan-suppressions" might not work. + +# A static variable is written to racily, but we always write the same value, so +# in practice it (hopefully!) doesn't matter. +race:^want_color$ +race:^transfer_debug$ diff --git a/color.c b/color.c index 31b6207a00..9a9261ac16 100644 --- a/color.c +++ b/color.c @@ -338,6 +338,13 @@ static int check_auto_color(void) int want_color(int var) { + /* + * NEEDSWORK: This function is sometimes used from multiple threads, and + * we end up using want_auto racily. That "should not matter" since + * we always write the same value, but it's still wrong. This function + * is listed in .tsan-suppressions for the time being. + */ + static int want_auto = -1; if (var < 0) diff --git a/transport-helper.c b/transport-helper.c index 33cff38cc0..2b830b2290 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -1117,6 +1117,13 @@ int transport_helper_init(struct transport *transport, const char *name) __attribute__((format (printf, 1, 2))) static void transfer_debug(const char *fmt, ...) { + /* + * NEEDSWORK: This function is sometimes used from multiple threads, and + * we end up using debug_enabled racily. That "should not matter" since + * we always write the same value, but it's still wrong. This function + * is listed in .tsan-suppressions for the time being. + */ + va_list args; char msgbuf[PBUFFERSIZE]; static int debug_enabled = -1;