Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
/*
|
|
|
|
* Various trivial helper wrappers around standard functions
|
|
|
|
*/
|
|
|
|
#include "cache.h"
|
2023-03-21 07:25:58 +01:00
|
|
|
#include "abspath.h"
|
2017-06-14 20:07:36 +02:00
|
|
|
#include "config.h"
|
2023-03-21 07:25:54 +01:00
|
|
|
#include "gettext.h"
|
2023-03-21 07:26:01 +01:00
|
|
|
#include "wrapper.h"
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
|
2022-03-30 07:06:40 +02:00
|
|
|
static intmax_t count_fsync_writeout_only;
|
|
|
|
static intmax_t count_fsync_hardware_flush;
|
|
|
|
|
2022-03-10 23:43:19 +01:00
|
|
|
#ifdef HAVE_RTLGENRANDOM
|
|
|
|
/* This is required to get access to RtlGenRandom. */
|
|
|
|
#define SystemFunction036 NTAPI SystemFunction036
|
|
|
|
#include <NTSecAPI.h>
|
|
|
|
#undef SystemFunction036
|
|
|
|
#endif
|
|
|
|
|
2014-08-16 05:08:02 +02:00
|
|
|
static int memory_limit_check(size_t size, int gentle)
|
2012-03-07 11:54:16 +01:00
|
|
|
{
|
2014-08-26 17:23:22 +02:00
|
|
|
static size_t limit = 0;
|
|
|
|
if (!limit) {
|
|
|
|
limit = git_env_ulong("GIT_ALLOC_LIMIT", 0);
|
|
|
|
if (!limit)
|
|
|
|
limit = SIZE_MAX;
|
2012-03-07 11:54:16 +01:00
|
|
|
}
|
2014-10-08 22:05:32 +02:00
|
|
|
if (size > limit) {
|
2014-08-16 05:08:02 +02:00
|
|
|
if (gentle) {
|
2014-10-08 22:05:32 +02:00
|
|
|
error("attempting to allocate %"PRIuMAX" over limit %"PRIuMAX,
|
|
|
|
(uintmax_t)size, (uintmax_t)limit);
|
2014-08-16 05:08:02 +02:00
|
|
|
return -1;
|
|
|
|
} else
|
2014-10-08 22:05:32 +02:00
|
|
|
die("attempting to allocate %"PRIuMAX" over limit %"PRIuMAX,
|
|
|
|
(uintmax_t)size, (uintmax_t)limit);
|
2014-08-16 05:08:02 +02:00
|
|
|
}
|
|
|
|
return 0;
|
2012-03-07 11:54:16 +01:00
|
|
|
}
|
|
|
|
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
char *xstrdup(const char *str)
|
|
|
|
{
|
|
|
|
char *ret = strdup(str);
|
packfile: drop release_pack_memory()
Long ago, in 97bfeb34df (Release pack windows before reporting out of
memory., 2006-12-24), we taught xmalloc() and friends to try unmapping
pack windows when malloc() failed. It's unlikely that his helps a lot in
practice, and it has some downsides. First, the downsides:
1. It makes xmalloc() not thread-safe. We've worked around this in
pack-objects.c, which installs its own locking version of the
try_to_free_routine(). But other threaded code doesn't.
2. It makes the system as a whole harder to reason about. Functions
which allocate heap memory under the hood may have farther-reaching
effects than expected.
That might be worth the tradeoff if there's a benefit. But in practice,
it seems unlikely. We're generally dealing with mmap'd files, so the OS
is going to do a much better job at responding to memory pressure by
dropping individual pages (the exception is systems with NO_MMAP, but
even there the OS can probably respond just as well with swapping).
So the only thing we're really freeing is address space. On 64-bit
systems, we have plenty of that to go around. On 32-bit systems, it
could possibly help. But around the same time we made two other changes:
77ccc5bbd1 (Introduce new config option for mmap limit., 2006-12-23) and
60bb8b1453 (Fully activate the sliding window pack access., 2006-12-23).
Together that means that a 32-bit system should have no more than 256MB
total of packed-git mmaps at one time, split between a few 32MB windows.
It's unlikely we have any address space problems since then, but we
don't have any data since the features were all added at the same time.
Likewise, xmmap() will try to free memory. At first glance, it seems
like we'd need this (when we try to mmap a new window, we might need to
close an old one to save address space on a 32-bit system). But we're
saved again by core.packedGitLimit: if we're going to exceed our 256MB
limit, we'll close an existing window before we even call mmap().
So it seems unlikely that this feature is actually doing anything
useful. And while we don't have reports of it harming anything (probably
because it rarely if ever kicks in), it would be nice to simplify the
system overall. This patch drops the whole try_to_free system from
xmalloc(), as well as the manual pack memory release in xmmap().
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-08-12 22:50:21 +02:00
|
|
|
if (!ret)
|
|
|
|
die("Out of memory, strdup failed");
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-08-16 05:08:02 +02:00
|
|
|
static void *do_xmalloc(size_t size, int gentle)
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
{
|
2012-03-07 11:54:16 +01:00
|
|
|
void *ret;
|
|
|
|
|
2014-08-16 05:08:02 +02:00
|
|
|
if (memory_limit_check(size, gentle))
|
|
|
|
return NULL;
|
2012-03-07 11:54:16 +01:00
|
|
|
ret = malloc(size);
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
if (!ret && !size)
|
|
|
|
ret = malloc(1);
|
|
|
|
if (!ret) {
|
packfile: drop release_pack_memory()
Long ago, in 97bfeb34df (Release pack windows before reporting out of
memory., 2006-12-24), we taught xmalloc() and friends to try unmapping
pack windows when malloc() failed. It's unlikely that his helps a lot in
practice, and it has some downsides. First, the downsides:
1. It makes xmalloc() not thread-safe. We've worked around this in
pack-objects.c, which installs its own locking version of the
try_to_free_routine(). But other threaded code doesn't.
2. It makes the system as a whole harder to reason about. Functions
which allocate heap memory under the hood may have farther-reaching
effects than expected.
That might be worth the tradeoff if there's a benefit. But in practice,
it seems unlikely. We're generally dealing with mmap'd files, so the OS
is going to do a much better job at responding to memory pressure by
dropping individual pages (the exception is systems with NO_MMAP, but
even there the OS can probably respond just as well with swapping).
So the only thing we're really freeing is address space. On 64-bit
systems, we have plenty of that to go around. On 32-bit systems, it
could possibly help. But around the same time we made two other changes:
77ccc5bbd1 (Introduce new config option for mmap limit., 2006-12-23) and
60bb8b1453 (Fully activate the sliding window pack access., 2006-12-23).
Together that means that a 32-bit system should have no more than 256MB
total of packed-git mmaps at one time, split between a few 32MB windows.
It's unlikely we have any address space problems since then, but we
don't have any data since the features were all added at the same time.
Likewise, xmmap() will try to free memory. At first glance, it seems
like we'd need this (when we try to mmap a new window, we might need to
close an old one to save address space on a 32-bit system). But we're
saved again by core.packedGitLimit: if we're going to exceed our 256MB
limit, we'll close an existing window before we even call mmap().
So it seems unlikely that this feature is actually doing anything
useful. And while we don't have reports of it harming anything (probably
because it rarely if ever kicks in), it would be nice to simplify the
system overall. This patch drops the whole try_to_free system from
xmalloc(), as well as the manual pack memory release in xmmap().
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-08-12 22:50:21 +02:00
|
|
|
if (!gentle)
|
|
|
|
die("Out of memory, malloc failed (tried to allocate %lu bytes)",
|
|
|
|
(unsigned long)size);
|
|
|
|
else {
|
|
|
|
error("Out of memory, malloc failed (tried to allocate %lu bytes)",
|
|
|
|
(unsigned long)size);
|
|
|
|
return NULL;
|
2014-08-16 05:08:02 +02:00
|
|
|
}
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
}
|
|
|
|
#ifdef XMALLOC_POISON
|
|
|
|
memset(ret, 0xA5, size);
|
|
|
|
#endif
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-08-16 05:08:02 +02:00
|
|
|
void *xmalloc(size_t size)
|
|
|
|
{
|
|
|
|
return do_xmalloc(size, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *do_xmallocz(size_t size, int gentle)
|
2010-01-26 19:24:12 +01:00
|
|
|
{
|
|
|
|
void *ret;
|
2014-08-16 05:08:02 +02:00
|
|
|
if (unsigned_add_overflows(size, 1)) {
|
|
|
|
if (gentle) {
|
|
|
|
error("Data too large to fit into virtual memory space.");
|
|
|
|
return NULL;
|
|
|
|
} else
|
|
|
|
die("Data too large to fit into virtual memory space.");
|
|
|
|
}
|
|
|
|
ret = do_xmalloc(size + 1, gentle);
|
|
|
|
if (ret)
|
|
|
|
((char*)ret)[size] = 0;
|
2010-01-26 19:24:12 +01:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-08-16 05:08:02 +02:00
|
|
|
void *xmallocz(size_t size)
|
|
|
|
{
|
|
|
|
return do_xmallocz(size, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void *xmallocz_gently(size_t size)
|
|
|
|
{
|
|
|
|
return do_xmallocz(size, 1);
|
|
|
|
}
|
|
|
|
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
/*
|
|
|
|
* xmemdupz() allocates (len + 1) bytes of memory, duplicates "len" bytes of
|
|
|
|
* "data" to the allocated memory, zero terminates the allocated memory,
|
|
|
|
* and returns a pointer to the allocated memory. If the allocation fails,
|
|
|
|
* the program dies.
|
|
|
|
*/
|
|
|
|
void *xmemdupz(const void *data, size_t len)
|
|
|
|
{
|
2010-01-26 19:24:12 +01:00
|
|
|
return memcpy(xmallocz(len), data, len);
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
char *xstrndup(const char *str, size_t len)
|
|
|
|
{
|
|
|
|
char *p = memchr(str, '\0', len);
|
|
|
|
return xmemdupz(str, p ? p - str : len);
|
|
|
|
}
|
|
|
|
|
wrapper: add function to compare strings with different NUL termination
When parsing capabilities for the pack protocol, there are times we'll
want to compare the value of a capability to a NUL-terminated string.
Since the data we're reading will be space-terminated, not
NUL-terminated, we need a function that compares the two strings, but
also checks that they're the same length. Otherwise, if we used strncmp
to compare these strings, we might accidentally accept a parameter that
was a prefix of the expected value.
Add a function, xstrncmpz, that takes a NUL-terminated string and a
non-NUL-terminated string, plus a length, and compares them, ensuring
that they are the same length.
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-05-25 21:58:50 +02:00
|
|
|
int xstrncmpz(const char *s, const char *t, size_t len)
|
|
|
|
{
|
|
|
|
int res = strncmp(s, t, len);
|
|
|
|
if (res)
|
|
|
|
return res;
|
|
|
|
return s[len] == '\0' ? 0 : 1;
|
|
|
|
}
|
|
|
|
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
void *xrealloc(void *ptr, size_t size)
|
|
|
|
{
|
2012-03-07 11:54:16 +01:00
|
|
|
void *ret;
|
|
|
|
|
xrealloc: do not reuse pointer freed by zero-length realloc()
This patch fixes a bug where xrealloc(ptr, 0) can double-free and
corrupt the heap on some platforms (including at least glibc).
The C99 standard says of malloc (section 7.20.3):
If the size of the space requested is zero, the behavior is
implementation-defined: either a null pointer is returned, or the
behavior is as if the size were some nonzero value, except that the
returned pointer shall not be used to access an object.
So we might get NULL back, or we might get an actual pointer (but we're
not allowed to look at its contents). To simplify our code, our
xmalloc() handles a NULL return by converting it into a single-byte
allocation. That way callers get consistent behavior. This was done way
back in 4e7a2eccc2 (?alloc: do not return NULL when asked for zero
bytes, 2005-12-29).
We also gave xcalloc() and xrealloc() the same treatment. And according
to C99, that is fine; the text above is in a paragraph that applies to
all three. But what happens to the memory we passed to realloc() in such
a case? I.e., if we do:
ret = realloc(ptr, 0);
and "ptr" is non-NULL, but we get NULL back, is "ptr" still valid? C99
doesn't cover this case specifically, but says (section 7.20.3.4):
The realloc function deallocates the old object pointed to by ptr and
returns a pointer to a new object that has the size specified by size.
So "ptr" is now deallocated, and we must only look at "ret". And since
"ret" is NULL, that means we have no allocated object at all. But that's
not quite the whole story. It also says:
If memory for the new object cannot be allocated, the old object is
not deallocated and its value is unchanged.
[...]
The realloc function returns a pointer to the new object (which may
have the same value as a pointer to the old object), or a null pointer
if the new object could not be allocated.
So if we see a NULL return with a non-zero size, we can expect that the
original object _is_ still valid. But with a non-zero size, it's
ambiguous. The NULL return might mean a failure (in which case the
object is valid), or it might mean that we successfully allocated
nothing, and used NULL to represent that.
The glibc manpage for realloc() explicitly says:
[...]if size is equal to zero, and ptr is not NULL, then the call is
equivalent to free(ptr).
Likewise, this StackOverflow answer:
https://stackoverflow.com/a/2135302
claims that C89 gave similar guidance (but I don't have a copy to verify
it). A comment on this answer:
https://stackoverflow.com/a/2022410
claims that Microsoft's CRT behaves the same.
But our current "retry with 1 byte" code passes the original pointer
again. So on glibc, we effectively free() the pointer and then try to
realloc() it again, which is undefined behavior.
The simplest fix here is to just pass "ret" (which we know to be NULL)
to the follow-up realloc(). But that means that a system which _doesn't_
free the original pointer would leak it. It's not clear if any such
systems exist, and that interpretation of the standard seems unlikely
(I'd expect a system that doesn't deallocate to simply return the
original pointer in this case). But it's easy enough to err on the safe
side, and just never pass a zero size to realloc() at all.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-02 09:54:39 +02:00
|
|
|
if (!size) {
|
|
|
|
free(ptr);
|
|
|
|
return xmalloc(0);
|
|
|
|
}
|
|
|
|
|
2014-08-16 05:08:02 +02:00
|
|
|
memory_limit_check(size, 0);
|
2012-03-07 11:54:16 +01:00
|
|
|
ret = realloc(ptr, size);
|
packfile: drop release_pack_memory()
Long ago, in 97bfeb34df (Release pack windows before reporting out of
memory., 2006-12-24), we taught xmalloc() and friends to try unmapping
pack windows when malloc() failed. It's unlikely that his helps a lot in
practice, and it has some downsides. First, the downsides:
1. It makes xmalloc() not thread-safe. We've worked around this in
pack-objects.c, which installs its own locking version of the
try_to_free_routine(). But other threaded code doesn't.
2. It makes the system as a whole harder to reason about. Functions
which allocate heap memory under the hood may have farther-reaching
effects than expected.
That might be worth the tradeoff if there's a benefit. But in practice,
it seems unlikely. We're generally dealing with mmap'd files, so the OS
is going to do a much better job at responding to memory pressure by
dropping individual pages (the exception is systems with NO_MMAP, but
even there the OS can probably respond just as well with swapping).
So the only thing we're really freeing is address space. On 64-bit
systems, we have plenty of that to go around. On 32-bit systems, it
could possibly help. But around the same time we made two other changes:
77ccc5bbd1 (Introduce new config option for mmap limit., 2006-12-23) and
60bb8b1453 (Fully activate the sliding window pack access., 2006-12-23).
Together that means that a 32-bit system should have no more than 256MB
total of packed-git mmaps at one time, split between a few 32MB windows.
It's unlikely we have any address space problems since then, but we
don't have any data since the features were all added at the same time.
Likewise, xmmap() will try to free memory. At first glance, it seems
like we'd need this (when we try to mmap a new window, we might need to
close an old one to save address space on a 32-bit system). But we're
saved again by core.packedGitLimit: if we're going to exceed our 256MB
limit, we'll close an existing window before we even call mmap().
So it seems unlikely that this feature is actually doing anything
useful. And while we don't have reports of it harming anything (probably
because it rarely if ever kicks in), it would be nice to simplify the
system overall. This patch drops the whole try_to_free system from
xmalloc(), as well as the manual pack memory release in xmmap().
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-08-12 22:50:21 +02:00
|
|
|
if (!ret)
|
|
|
|
die("Out of memory, realloc failed");
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
void *xcalloc(size_t nmemb, size_t size)
|
|
|
|
{
|
2012-03-07 11:54:16 +01:00
|
|
|
void *ret;
|
|
|
|
|
2016-02-22 23:43:18 +01:00
|
|
|
if (unsigned_mult_overflows(nmemb, size))
|
|
|
|
die("data too large to fit into virtual memory space");
|
|
|
|
|
2014-08-16 05:08:02 +02:00
|
|
|
memory_limit_check(size * nmemb, 0);
|
2012-03-07 11:54:16 +01:00
|
|
|
ret = calloc(nmemb, size);
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
if (!ret && (!nmemb || !size))
|
|
|
|
ret = calloc(1, 1);
|
packfile: drop release_pack_memory()
Long ago, in 97bfeb34df (Release pack windows before reporting out of
memory., 2006-12-24), we taught xmalloc() and friends to try unmapping
pack windows when malloc() failed. It's unlikely that his helps a lot in
practice, and it has some downsides. First, the downsides:
1. It makes xmalloc() not thread-safe. We've worked around this in
pack-objects.c, which installs its own locking version of the
try_to_free_routine(). But other threaded code doesn't.
2. It makes the system as a whole harder to reason about. Functions
which allocate heap memory under the hood may have farther-reaching
effects than expected.
That might be worth the tradeoff if there's a benefit. But in practice,
it seems unlikely. We're generally dealing with mmap'd files, so the OS
is going to do a much better job at responding to memory pressure by
dropping individual pages (the exception is systems with NO_MMAP, but
even there the OS can probably respond just as well with swapping).
So the only thing we're really freeing is address space. On 64-bit
systems, we have plenty of that to go around. On 32-bit systems, it
could possibly help. But around the same time we made two other changes:
77ccc5bbd1 (Introduce new config option for mmap limit., 2006-12-23) and
60bb8b1453 (Fully activate the sliding window pack access., 2006-12-23).
Together that means that a 32-bit system should have no more than 256MB
total of packed-git mmaps at one time, split between a few 32MB windows.
It's unlikely we have any address space problems since then, but we
don't have any data since the features were all added at the same time.
Likewise, xmmap() will try to free memory. At first glance, it seems
like we'd need this (when we try to mmap a new window, we might need to
close an old one to save address space on a 32-bit system). But we're
saved again by core.packedGitLimit: if we're going to exceed our 256MB
limit, we'll close an existing window before we even call mmap().
So it seems unlikely that this feature is actually doing anything
useful. And while we don't have reports of it harming anything (probably
because it rarely if ever kicks in), it would be nice to simplify the
system overall. This patch drops the whole try_to_free system from
xmalloc(), as well as the manual pack memory release in xmmap().
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-08-12 22:50:21 +02:00
|
|
|
if (!ret)
|
|
|
|
die("Out of memory, calloc failed");
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
wrapper.c: add x{un,}setenv(), and use xsetenv() in environment.c
Add fatal wrappers for setenv() and unsetenv(). In d7ac12b25d3 (Add
set_git_dir() function, 2007-08-01) we started checking its return
value, and since 48988c4d0c3 (set_git_dir: die when setenv() fails,
2018-03-30) we've had set_git_dir_1() die if we couldn't set it.
Let's provide a wrapper for both, this will be useful in many other
places, a subsequent patch will make another use of xsetenv().
The checking of the return value here is over-eager according to
setenv(3) and POSIX. It's documented as returning just -1 or 0, so
perhaps we should be checking -1 explicitly.
Let's just instead die on any non-zero, if our C library is so broken
as to return something else than -1 on error (and perhaps not set
errno?) the worst we'll do is die with a nonsensical errno value, but
we'll want to die in either case.
Let's make these return "void" instead of "int". As far as I can tell
there's no other x*() wrappers that needed to make the decision of
deviating from the signature in the C library, but since their return
value is only used to indicate errors (so we'd die here), we can catch
unreachable code such as
if (xsetenv(...) < 0)
[...];
I think it would be OK skip the NULL check of the "name" here for the
calls to die_errno(). Almost all of our setenv() callers are taking a
constant string hardcoded in the source as the first argument, and for
the rest we can probably assume they've done the NULL check
themselves. Even if they didn't, modern C libraries are forgiving
about it (e.g. glibc formatting it as "(null)"), on those that aren't,
well, we were about to die anyway. But let's include the check anyway
for good measure.
1. https://pubs.opengroup.org/onlinepubs/009604499/functions/setenv.html
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-09-21 15:12:59 +02:00
|
|
|
void xsetenv(const char *name, const char *value, int overwrite)
|
|
|
|
{
|
|
|
|
if (setenv(name, value, overwrite))
|
|
|
|
die_errno(_("could not setenv '%s'"), name ? name : "(null)");
|
|
|
|
}
|
|
|
|
|
2015-08-04 15:51:22 +02:00
|
|
|
/**
|
|
|
|
* xopen() is the same as open(), but it die()s if the open() fails.
|
|
|
|
*/
|
|
|
|
int xopen(const char *path, int oflag, ...)
|
|
|
|
{
|
|
|
|
mode_t mode = 0;
|
|
|
|
va_list ap;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* va_arg() will have undefined behavior if the specified type is not
|
|
|
|
* compatible with the argument type. Since integers are promoted to
|
|
|
|
* ints, we fetch the next argument as an int, and then cast it to a
|
|
|
|
* mode_t to avoid undefined behavior.
|
|
|
|
*/
|
|
|
|
va_start(ap, oflag);
|
|
|
|
if (oflag & O_CREAT)
|
|
|
|
mode = va_arg(ap, int);
|
|
|
|
va_end(ap);
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
int fd = open(path, oflag, mode);
|
|
|
|
if (fd >= 0)
|
|
|
|
return fd;
|
|
|
|
if (errno == EINTR)
|
|
|
|
continue;
|
|
|
|
|
2021-08-25 22:14:09 +02:00
|
|
|
if ((oflag & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
|
|
|
|
die_errno(_("unable to create '%s'"), path);
|
|
|
|
else if ((oflag & O_RDWR) == O_RDWR)
|
2015-08-04 15:51:22 +02:00
|
|
|
die_errno(_("could not open '%s' for reading and writing"), path);
|
|
|
|
else if ((oflag & O_WRONLY) == O_WRONLY)
|
|
|
|
die_errno(_("could not open '%s' for writing"), path);
|
|
|
|
else
|
|
|
|
die_errno(_("could not open '%s' for reading"), path);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-10 10:20:46 +02:00
|
|
|
static int handle_nonblock(int fd, short poll_events, int err)
|
|
|
|
{
|
|
|
|
struct pollfd pfd;
|
|
|
|
|
|
|
|
if (err != EAGAIN && err != EWOULDBLOCK)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
pfd.fd = fd;
|
|
|
|
pfd.events = poll_events;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* no need to check for errors, here;
|
|
|
|
* a subsequent read/write will detect unrecoverable errors
|
|
|
|
*/
|
|
|
|
poll(&pfd, 1, -1);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
/*
|
|
|
|
* xread() is the same a read(), but it automatically restarts read()
|
|
|
|
* operations with a recoverable error (EAGAIN and EINTR). xread()
|
|
|
|
* DOES NOT GUARANTEE that "len" bytes is read even if the data is available.
|
|
|
|
*/
|
|
|
|
ssize_t xread(int fd, void *buf, size_t len)
|
|
|
|
{
|
|
|
|
ssize_t nr;
|
xread, xwrite: limit size of IO to 8MB
Checking out 2GB or more through an external filter (see test) fails
on Mac OS X 10.8.4 (12E55) for a 64-bit executable with:
error: read from external filter cat failed
error: cannot feed the input to external filter cat
error: cat died of signal 13
error: external filter cat failed 141
error: external filter cat failed
The reason is that read() immediately returns with EINVAL when asked
to read more than 2GB. According to POSIX [1], if the value of
nbyte passed to read() is greater than SSIZE_MAX, the result is
implementation-defined. The write function has the same restriction
[2]. Since OS X still supports running 32-bit executables, the
32-bit limit (SSIZE_MAX = INT_MAX = 2GB - 1) seems to be also
imposed on 64-bit executables under certain conditions. For write,
the problem has been addressed earlier [6c642a].
Address the problem for read() and write() differently, by limiting
size of IO chunks unconditionally on all platforms in xread() and
xwrite(). Large chunks only cause problems, like causing latencies
when killing the process, even if OS X was not buggy. Doing IO in
reasonably sized smaller chunks should have no negative impact on
performance.
The compat wrapper clipped_write() introduced earlier [6c642a] is
not needed anymore. It will be reverted in a separate commit. The
new test catches read and write problems.
Note that 'git add' exits with 0 even if it prints filtering errors
to stderr. The test, therefore, checks stderr. 'git add' should
probably be changed (sometime in another commit) to exit with
nonzero if filtering fails. The test could then be changed to use
test_must_fail.
Thanks to the following people for suggestions and testing:
Johannes Sixt <j6t@kdbg.org>
John Keeping <john@keeping.me.uk>
Jonathan Nieder <jrnieder@gmail.com>
Kyle J. McKay <mackyle@gmail.com>
Linus Torvalds <torvalds@linux-foundation.org>
Torsten Bögershausen <tboegi@web.de>
[1] http://pubs.opengroup.org/onlinepubs/009695399/functions/read.html
[2] http://pubs.opengroup.org/onlinepubs/009695399/functions/write.html
[6c642a] commit 6c642a878688adf46b226903858b53e2d31ac5c3
compate/clipped-write.c: large write(2) fails on Mac OS X/XNU
Signed-off-by: Steffen Prohaska <prohaska@zib.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-20 08:43:54 +02:00
|
|
|
if (len > MAX_IO_SIZE)
|
2020-03-28 03:57:34 +01:00
|
|
|
len = MAX_IO_SIZE;
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
while (1) {
|
|
|
|
nr = read(fd, buf, len);
|
2015-12-16 01:04:07 +01:00
|
|
|
if (nr < 0) {
|
|
|
|
if (errno == EINTR)
|
|
|
|
continue;
|
2016-07-10 10:20:46 +02:00
|
|
|
if (handle_nonblock(fd, POLLIN, errno))
|
2016-06-27 05:56:35 +02:00
|
|
|
continue;
|
2015-12-16 01:04:07 +01:00
|
|
|
}
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
return nr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* xwrite() is the same a write(), but it automatically restarts write()
|
|
|
|
* operations with a recoverable error (EAGAIN and EINTR). xwrite() DOES NOT
|
|
|
|
* GUARANTEE that "len" bytes is written even if the operation is successful.
|
|
|
|
*/
|
|
|
|
ssize_t xwrite(int fd, const void *buf, size_t len)
|
|
|
|
{
|
|
|
|
ssize_t nr;
|
xread, xwrite: limit size of IO to 8MB
Checking out 2GB or more through an external filter (see test) fails
on Mac OS X 10.8.4 (12E55) for a 64-bit executable with:
error: read from external filter cat failed
error: cannot feed the input to external filter cat
error: cat died of signal 13
error: external filter cat failed 141
error: external filter cat failed
The reason is that read() immediately returns with EINVAL when asked
to read more than 2GB. According to POSIX [1], if the value of
nbyte passed to read() is greater than SSIZE_MAX, the result is
implementation-defined. The write function has the same restriction
[2]. Since OS X still supports running 32-bit executables, the
32-bit limit (SSIZE_MAX = INT_MAX = 2GB - 1) seems to be also
imposed on 64-bit executables under certain conditions. For write,
the problem has been addressed earlier [6c642a].
Address the problem for read() and write() differently, by limiting
size of IO chunks unconditionally on all platforms in xread() and
xwrite(). Large chunks only cause problems, like causing latencies
when killing the process, even if OS X was not buggy. Doing IO in
reasonably sized smaller chunks should have no negative impact on
performance.
The compat wrapper clipped_write() introduced earlier [6c642a] is
not needed anymore. It will be reverted in a separate commit. The
new test catches read and write problems.
Note that 'git add' exits with 0 even if it prints filtering errors
to stderr. The test, therefore, checks stderr. 'git add' should
probably be changed (sometime in another commit) to exit with
nonzero if filtering fails. The test could then be changed to use
test_must_fail.
Thanks to the following people for suggestions and testing:
Johannes Sixt <j6t@kdbg.org>
John Keeping <john@keeping.me.uk>
Jonathan Nieder <jrnieder@gmail.com>
Kyle J. McKay <mackyle@gmail.com>
Linus Torvalds <torvalds@linux-foundation.org>
Torsten Bögershausen <tboegi@web.de>
[1] http://pubs.opengroup.org/onlinepubs/009695399/functions/read.html
[2] http://pubs.opengroup.org/onlinepubs/009695399/functions/write.html
[6c642a] commit 6c642a878688adf46b226903858b53e2d31ac5c3
compate/clipped-write.c: large write(2) fails on Mac OS X/XNU
Signed-off-by: Steffen Prohaska <prohaska@zib.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-20 08:43:54 +02:00
|
|
|
if (len > MAX_IO_SIZE)
|
2020-03-28 03:57:34 +01:00
|
|
|
len = MAX_IO_SIZE;
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
while (1) {
|
|
|
|
nr = write(fd, buf, len);
|
2016-06-27 01:21:12 +02:00
|
|
|
if (nr < 0) {
|
|
|
|
if (errno == EINTR)
|
|
|
|
continue;
|
2016-07-10 10:20:46 +02:00
|
|
|
if (handle_nonblock(fd, POLLOUT, errno))
|
2016-06-27 01:21:12 +02:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2014-04-10 20:54:12 +02:00
|
|
|
return nr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* xpread() is the same as pread(), but it automatically restarts pread()
|
|
|
|
* operations with a recoverable error (EAGAIN and EINTR). xpread() DOES
|
|
|
|
* NOT GUARANTEE that "len" bytes is read even if the data is available.
|
|
|
|
*/
|
|
|
|
ssize_t xpread(int fd, void *buf, size_t len, off_t offset)
|
|
|
|
{
|
|
|
|
ssize_t nr;
|
|
|
|
if (len > MAX_IO_SIZE)
|
|
|
|
len = MAX_IO_SIZE;
|
|
|
|
while (1) {
|
|
|
|
nr = pread(fd, buf, len, offset);
|
|
|
|
if ((nr < 0) && (errno == EAGAIN || errno == EINTR))
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
continue;
|
|
|
|
return nr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-07-21 01:13:05 +02:00
|
|
|
ssize_t read_in_full(int fd, void *buf, size_t count)
|
|
|
|
{
|
|
|
|
char *p = buf;
|
|
|
|
ssize_t total = 0;
|
|
|
|
|
|
|
|
while (count > 0) {
|
|
|
|
ssize_t loaded = xread(fd, p, count);
|
read_in_full: always report errors
The read_in_full function repeatedly calls read() to fill a
buffer. If the first read() returns an error, we notify the
caller by returning the error. However, if we read some data
and then get an error on a subsequent read, we simply return
the amount of data that we did read, and the caller is
unaware of the error.
This makes the tradeoff that seeing the partial data is more
important than the fact that an error occurred. In practice,
this is generally not the case; we care more if an error
occurred, and should throw away any partial data.
I audited the current callers. In most cases, this will make
no difference at all, as they do:
if (read_in_full(fd, buf, size) != size)
error("short read");
However, it will help in a few cases:
1. In sha1_file.c:index_stream, we would fail to notice
errors in the incoming stream.
2. When reading symbolic refs in resolve_ref, we would
fail to notice errors and potentially use a truncated
ref name.
3. In various places, we will get much better error
messages. For example, callers of safe_read would
erroneously print "the remote end hung up unexpectedly"
instead of showing the read error.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-05-26 18:30:27 +02:00
|
|
|
if (loaded < 0)
|
|
|
|
return -1;
|
|
|
|
if (loaded == 0)
|
|
|
|
return total;
|
2008-07-21 01:13:05 +02:00
|
|
|
count -= loaded;
|
|
|
|
p += loaded;
|
|
|
|
total += loaded;
|
|
|
|
}
|
|
|
|
|
|
|
|
return total;
|
|
|
|
}
|
|
|
|
|
|
|
|
ssize_t write_in_full(int fd, const void *buf, size_t count)
|
|
|
|
{
|
|
|
|
const char *p = buf;
|
|
|
|
ssize_t total = 0;
|
|
|
|
|
|
|
|
while (count > 0) {
|
|
|
|
ssize_t written = xwrite(fd, p, count);
|
|
|
|
if (written < 0)
|
|
|
|
return -1;
|
|
|
|
if (!written) {
|
|
|
|
errno = ENOSPC;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
count -= written;
|
|
|
|
p += written;
|
|
|
|
total += written;
|
|
|
|
}
|
|
|
|
|
|
|
|
return total;
|
|
|
|
}
|
|
|
|
|
2014-04-10 20:31:21 +02:00
|
|
|
ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset)
|
|
|
|
{
|
|
|
|
char *p = buf;
|
|
|
|
ssize_t total = 0;
|
|
|
|
|
|
|
|
while (count > 0) {
|
|
|
|
ssize_t loaded = xpread(fd, p, count, offset);
|
|
|
|
if (loaded < 0)
|
|
|
|
return -1;
|
|
|
|
if (loaded == 0)
|
|
|
|
return total;
|
|
|
|
count -= loaded;
|
|
|
|
p += loaded;
|
|
|
|
total += loaded;
|
|
|
|
offset += loaded;
|
|
|
|
}
|
|
|
|
|
|
|
|
return total;
|
|
|
|
}
|
|
|
|
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
int xdup(int fd)
|
|
|
|
{
|
|
|
|
int ret = dup(fd);
|
|
|
|
if (ret < 0)
|
2009-06-27 17:58:46 +02:00
|
|
|
die_errno("dup failed");
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-08-04 15:51:23 +02:00
|
|
|
/**
|
|
|
|
* xfopen() is the same as fopen(), but it die()s if the fopen() fails.
|
|
|
|
*/
|
|
|
|
FILE *xfopen(const char *path, const char *mode)
|
|
|
|
{
|
|
|
|
for (;;) {
|
|
|
|
FILE *fp = fopen(path, mode);
|
|
|
|
if (fp)
|
|
|
|
return fp;
|
|
|
|
if (errno == EINTR)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (*mode && mode[1] == '+')
|
|
|
|
die_errno(_("could not open '%s' for reading and writing"), path);
|
|
|
|
else if (*mode == 'w' || *mode == 'a')
|
|
|
|
die_errno(_("could not open '%s' for writing"), path);
|
|
|
|
else
|
|
|
|
die_errno(_("could not open '%s' for reading"), path);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
FILE *xfdopen(int fd, const char *mode)
|
|
|
|
{
|
|
|
|
FILE *stream = fdopen(fd, mode);
|
2022-05-02 18:50:37 +02:00
|
|
|
if (!stream)
|
2009-06-27 17:58:46 +02:00
|
|
|
die_errno("Out of memory? fdopen failed");
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
return stream;
|
|
|
|
}
|
|
|
|
|
2016-01-06 14:09:43 +01:00
|
|
|
FILE *fopen_for_writing(const char *path)
|
|
|
|
{
|
|
|
|
FILE *ret = fopen(path, "w");
|
|
|
|
|
|
|
|
if (!ret && errno == EPERM) {
|
|
|
|
if (!unlink(path))
|
|
|
|
ret = fopen(path, "w");
|
|
|
|
else
|
|
|
|
errno = EPERM;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2017-05-08 12:40:37 +02:00
|
|
|
static void warn_on_inaccessible(const char *path)
|
|
|
|
{
|
|
|
|
warning_errno(_("unable to access '%s'"), path);
|
|
|
|
}
|
|
|
|
|
2017-05-03 12:16:49 +02:00
|
|
|
int warn_on_fopen_errors(const char *path)
|
|
|
|
{
|
|
|
|
if (errno != ENOENT && errno != ENOTDIR) {
|
|
|
|
warn_on_inaccessible(path);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-05-03 12:16:50 +02:00
|
|
|
FILE *fopen_or_warn(const char *path, const char *mode)
|
|
|
|
{
|
|
|
|
FILE *fp = fopen(path, mode);
|
|
|
|
|
|
|
|
if (fp)
|
|
|
|
return fp;
|
|
|
|
|
|
|
|
warn_on_fopen_errors(path);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2018-02-14 19:59:56 +01:00
|
|
|
int xmkstemp(char *filename_template)
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
{
|
|
|
|
int fd;
|
2010-12-18 22:28:00 +01:00
|
|
|
char origtemplate[PATH_MAX];
|
2018-02-14 19:59:56 +01:00
|
|
|
strlcpy(origtemplate, filename_template, sizeof(origtemplate));
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
|
2018-02-14 19:59:56 +01:00
|
|
|
fd = mkstemp(filename_template);
|
2010-12-18 22:28:00 +01:00
|
|
|
if (fd < 0) {
|
|
|
|
int saved_errno = errno;
|
|
|
|
const char *nonrelative_template;
|
|
|
|
|
2018-02-14 19:59:56 +01:00
|
|
|
if (strlen(filename_template) != strlen(origtemplate))
|
|
|
|
filename_template = origtemplate;
|
2010-12-18 22:28:00 +01:00
|
|
|
|
2018-02-14 19:59:56 +01:00
|
|
|
nonrelative_template = absolute_path(filename_template);
|
2010-12-18 22:28:00 +01:00
|
|
|
errno = saved_errno;
|
|
|
|
die_errno("Unable to create temporary file '%s'",
|
|
|
|
nonrelative_template);
|
|
|
|
}
|
Shrink the git binary a bit by avoiding unnecessary inline functions
So I was looking at the disgusting size of the git binary, and even with
the debugging removed, and using -Os instead of -O2, the size of the text
section was pretty high. In this day and age I guess almost a megabyte of
text isn't really all that surprising, but it still doesn't exactly make
me think "lean and mean".
With -Os, a surprising amount of text space is wasted on inline functions
that end up just being replicated multiple times, and where performance
really isn't a valid reason to inline them. In particular, the trivial
wrapper functions like "xmalloc()" are used _everywhere_, and making them
inline just duplicates the text (and the string we use to 'die()' on
failure) unnecessarily.
So this just moves them into a "wrapper.c" file, getting rid of a tiny bit
of unnecessary bloat. The following numbers are both with "CFLAGS=-Os":
Before:
[torvalds@woody git]$ size git
text data bss dec hex filename
700460 15160 292184 1007804 f60bc git
After:
[torvalds@woody git]$ size git
text data bss dec hex filename
670540 15160 292184 977884 eebdc git
so it saves almost 30k of text-space (it actually saves more than that
with the default -O2, but I don't think that's necessarily a very relevant
number from a "try to shrink git" standpoint).
It might conceivably have a performance impact, but none of this should be
_that_ performance critical. The real cost is not generally in the wrapper
anyway, but in the code it wraps (ie the cost of "xread()" is all in the
read itself, not in the trivial wrapping of it).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
|
|
|
return fd;
|
|
|
|
}
|
2009-01-08 04:54:47 +01:00
|
|
|
|
2010-11-06 12:46:31 +01:00
|
|
|
/* Adapted from libiberty's mkstemp.c. */
|
|
|
|
|
|
|
|
#undef TMP_MAX
|
|
|
|
#define TMP_MAX 16384
|
|
|
|
|
|
|
|
int git_mkstemps_mode(char *pattern, int suffix_len, int mode)
|
|
|
|
{
|
|
|
|
static const char letters[] =
|
|
|
|
"abcdefghijklmnopqrstuvwxyz"
|
|
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
|
|
"0123456789";
|
2019-10-02 17:32:07 +02:00
|
|
|
static const int num_letters = ARRAY_SIZE(letters) - 1;
|
|
|
|
static const char x_pattern[] = "XXXXXX";
|
|
|
|
static const int num_x = ARRAY_SIZE(x_pattern) - 1;
|
2018-02-14 19:59:56 +01:00
|
|
|
char *filename_template;
|
2010-11-06 12:46:31 +01:00
|
|
|
size_t len;
|
|
|
|
int fd, count;
|
|
|
|
|
|
|
|
len = strlen(pattern);
|
|
|
|
|
2019-10-02 17:32:07 +02:00
|
|
|
if (len < num_x + suffix_len) {
|
2010-11-06 12:46:31 +01:00
|
|
|
errno = EINVAL;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2019-10-02 17:32:07 +02:00
|
|
|
if (strncmp(&pattern[len - num_x - suffix_len], x_pattern, num_x)) {
|
2010-11-06 12:46:31 +01:00
|
|
|
errno = EINVAL;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Replace pattern's XXXXXX characters with randomness.
|
|
|
|
* Try TMP_MAX different filenames.
|
|
|
|
*/
|
2019-10-02 17:32:07 +02:00
|
|
|
filename_template = &pattern[len - num_x - suffix_len];
|
2010-11-06 12:46:31 +01:00
|
|
|
for (count = 0; count < TMP_MAX; ++count) {
|
2019-10-01 04:29:36 +02:00
|
|
|
int i;
|
wrapper: use a CSPRNG to generate random file names
The current way we generate random file names is by taking the seconds
and microseconds, plus the PID, and mixing them together, then encoding
them. If this fails, we increment the value by 7777, and try again up
to TMP_MAX times.
Unfortunately, this is not the best idea from a security perspective.
If we're writing into TMPDIR, an attacker can guess these values easily
and prevent us from creating any temporary files at all by creating them
all first. Even though we set TMP_MAX to 16384, this may be achievable
in some contexts, even if unlikely to occur in practice.
Fortunately, we can simply solve this by using the system
cryptographically secure pseudorandom number generator (CSPRNG) to
generate a random 64-bit value, and use that as before. Note that there
is still a small bias here, but because a six-character sequence chosen
out of 62 characters provides about 36 bits of entropy, the bias here is
less than 2^-28, which is acceptable, especially considering we'll retry
several times.
Note that the use of a CSPRNG in generating temporary file names is also
used in many libcs. glibc recently changed from an approach similar to
ours to using a CSPRNG, and FreeBSD and OpenBSD also use a CSPRNG in
this case. Even if the likelihood of an attack is low, we should still
be at least as responsible in creating temporary files as libc is.
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-17 22:56:17 +01:00
|
|
|
uint64_t v;
|
|
|
|
if (csprng_bytes(&v, sizeof(v)) < 0)
|
|
|
|
return error_errno("unable to get random bytes for temporary file");
|
|
|
|
|
2010-11-06 12:46:31 +01:00
|
|
|
/* Fill in the random bits. */
|
2019-10-02 17:32:07 +02:00
|
|
|
for (i = 0; i < num_x; i++) {
|
2019-10-01 04:29:36 +02:00
|
|
|
filename_template[i] = letters[v % num_letters];
|
|
|
|
v /= num_letters;
|
|
|
|
}
|
2010-11-06 12:46:31 +01:00
|
|
|
|
|
|
|
fd = open(pattern, O_CREAT | O_EXCL | O_RDWR, mode);
|
2013-07-12 10:58:35 +02:00
|
|
|
if (fd >= 0)
|
2010-11-06 12:46:31 +01:00
|
|
|
return fd;
|
|
|
|
/*
|
|
|
|
* Fatal error (EPERM, ENOSPC etc).
|
|
|
|
* It doesn't make sense to loop.
|
|
|
|
*/
|
|
|
|
if (errno != EEXIST)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* We return the null string if we can't find a unique file name. */
|
|
|
|
pattern[0] = '\0';
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
int git_mkstemp_mode(char *pattern, int mode)
|
|
|
|
{
|
|
|
|
/* mkstemp is just mkstemps with no suffix */
|
|
|
|
return git_mkstemps_mode(pattern, 0, mode);
|
|
|
|
}
|
|
|
|
|
2018-02-14 19:59:56 +01:00
|
|
|
int xmkstemp_mode(char *filename_template, int mode)
|
2010-02-22 23:32:13 +01:00
|
|
|
{
|
|
|
|
int fd;
|
2010-12-18 22:28:00 +01:00
|
|
|
char origtemplate[PATH_MAX];
|
2018-02-14 19:59:56 +01:00
|
|
|
strlcpy(origtemplate, filename_template, sizeof(origtemplate));
|
2010-02-22 23:32:13 +01:00
|
|
|
|
2018-02-14 19:59:56 +01:00
|
|
|
fd = git_mkstemp_mode(filename_template, mode);
|
2010-12-18 22:28:00 +01:00
|
|
|
if (fd < 0) {
|
|
|
|
int saved_errno = errno;
|
|
|
|
const char *nonrelative_template;
|
|
|
|
|
2018-02-14 19:59:56 +01:00
|
|
|
if (!filename_template[0])
|
|
|
|
filename_template = origtemplate;
|
2010-12-18 22:28:00 +01:00
|
|
|
|
2018-02-14 19:59:56 +01:00
|
|
|
nonrelative_template = absolute_path(filename_template);
|
2010-12-18 22:28:00 +01:00
|
|
|
errno = saved_errno;
|
|
|
|
die_errno("Unable to create temporary file '%s'",
|
|
|
|
nonrelative_template);
|
|
|
|
}
|
2010-02-22 23:32:13 +01:00
|
|
|
return fd;
|
|
|
|
}
|
|
|
|
|
2022-03-10 23:43:20 +01:00
|
|
|
/*
|
|
|
|
* Some platforms return EINTR from fsync. Since fsync is invoked in some
|
|
|
|
* cases by a wrapper that dies on failure, do not expose EINTR to callers.
|
|
|
|
*/
|
|
|
|
static int fsync_loop(int fd)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
do {
|
|
|
|
err = fsync(fd);
|
|
|
|
} while (err < 0 && errno == EINTR);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
int git_fsync(int fd, enum fsync_action action)
|
|
|
|
{
|
|
|
|
switch (action) {
|
|
|
|
case FSYNC_WRITEOUT_ONLY:
|
2022-03-30 07:06:40 +02:00
|
|
|
count_fsync_writeout_only += 1;
|
2022-03-10 23:43:20 +01:00
|
|
|
|
|
|
|
#ifdef __APPLE__
|
|
|
|
/*
|
|
|
|
* On macOS, fsync just causes filesystem cache writeback but
|
|
|
|
* does not flush hardware caches.
|
|
|
|
*/
|
|
|
|
return fsync_loop(fd);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef HAVE_SYNC_FILE_RANGE
|
|
|
|
/*
|
|
|
|
* On linux 2.6.17 and above, sync_file_range is the way to
|
|
|
|
* issue a writeback without a hardware flush. An offset of
|
|
|
|
* 0 and size of 0 indicates writeout of the entire file and the
|
|
|
|
* wait flags ensure that all dirty data is written to the disk
|
|
|
|
* (potentially in a disk-side cache) before we continue.
|
|
|
|
*/
|
|
|
|
|
|
|
|
return sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WAIT_BEFORE |
|
|
|
|
SYNC_FILE_RANGE_WRITE |
|
|
|
|
SYNC_FILE_RANGE_WAIT_AFTER);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef fsync_no_flush
|
|
|
|
return fsync_no_flush(fd);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
errno = ENOSYS;
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
case FSYNC_HARDWARE_FLUSH:
|
2022-03-30 07:06:40 +02:00
|
|
|
count_fsync_hardware_flush += 1;
|
|
|
|
|
2022-03-10 23:43:20 +01:00
|
|
|
/*
|
|
|
|
* On macOS, a special fcntl is required to really flush the
|
|
|
|
* caches within the storage controller. As of this writing,
|
|
|
|
* this is a very expensive operation on Apple SSDs.
|
|
|
|
*/
|
|
|
|
#ifdef __APPLE__
|
|
|
|
return fcntl(fd, F_FULLFSYNC);
|
|
|
|
#else
|
|
|
|
return fsync_loop(fd);
|
|
|
|
#endif
|
|
|
|
default:
|
|
|
|
BUG("unexpected git_fsync(%d) call", action);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
trace2: only include "fsync" events if we git_fsync()
Fix the overly verbose trace2 logging added in 9a4987677d3 (trace2:
add stats for fsync operations, 2022-03-30) (first released with
v2.36.0).
Since that change every single "git" command invocation has included
these "data" events, even though we'll only make use of these with
core.fsyncMethod=batch, and even then only have non-zero values if
we're writing object data to disk. See c0f4752ed2f (core.fsyncmethod:
batched disk flushes for loose-objects, 2022-04-04) for that feature.
As we're needing to indent the trace2_data_intmax() lines let's
introduce helper variables to ensure that our resulting lines (which
were already too) don't exceed the recommendations of the
CodingGuidelines. Doing that requires either wrapping them twice, or
introducing short throwaway variable names, let's do the latter.
The result was that e.g. "git version" would previously emit a total
of 6 trace2 events with the GIT_TRACE2_EVENT target (version, start,
cmd_ancestry, cmd_name, exit, atexit), but afterwards would emit
8. We'd emit 2 "data" events before the "exit" event.
The reason we didn't catch this was that the trace2 unit tests added
in a15860dca3f (trace2: t/helper/test-trace2, t0210.sh, t0211.sh,
t0212.sh, 2019-02-22) would omit any "data" events that weren't the
ones it cared about. Before this change to the C code 6/7 of our
"t/t0212-trace2-event.sh" tests would fail if this change was applied
to "t/t0212/parse_events.perl".
Let's make the trace2 testing more strict, and further append any new
events types we don't know about in "t/t0212/parse_events.perl". Since
we only invoke the "test-tool trace2" there's no guarantee that we'll
catch other overly verbose events in the future, but we'll at least
notice if we start emitting new events that are issues every time we
log anything with trace2's JSON target.
We exclude the "data_json" event type, we'd otherwise would fail on
both "win test" and "win+VS test" CI due to the logging added in
353d3d77f4f (trace2: collect Windows-specific process information,
2019-02-22). It looks like that logging should really be using
trace2_cmd_ancestry() instead, which was introduced later in
2f732bf15e6 (tr2: log parent process name, 2021-07-21), but let's
leave it for now.
The fix-up to aaf81223f48 (unpack-objects: use stream_loose_object()
to unpack large objects, 2022-06-11) is needed because we're changing
the behavior of these events as discussed above. Since we'd always
emit a "hardware-flush" event the test added in aaf81223f48 wasn't
testing anything except that this trace2 data was unconditionally
logged. Even if "core.fsyncMethod" wasn't set to "batch" we'd pass the
test.
Now we'll check the expected number of "writeout" v.s. "flush" calls
under "core.fsyncMethod=batch", but note that this doesn't actually
test if we carried out the sync using that method, on a platform where
we'd have to fall back to fsync() each of those "writeout" would
really be a "flush" (i.e. a full fsync()).
But in this case what we're testing is that the logic in
"unpack-objects" behaves as expected, not the OS-specific question of
whether we actually were able to use the "bulk" method.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-07-18 12:31:52 +02:00
|
|
|
static void log_trace_fsync_if(const char *key, intmax_t value)
|
|
|
|
{
|
|
|
|
if (value)
|
|
|
|
trace2_data_intmax("fsync", the_repository, key, value);
|
|
|
|
}
|
|
|
|
|
2022-03-30 07:06:40 +02:00
|
|
|
void trace_git_fsync_stats(void)
|
|
|
|
{
|
trace2: only include "fsync" events if we git_fsync()
Fix the overly verbose trace2 logging added in 9a4987677d3 (trace2:
add stats for fsync operations, 2022-03-30) (first released with
v2.36.0).
Since that change every single "git" command invocation has included
these "data" events, even though we'll only make use of these with
core.fsyncMethod=batch, and even then only have non-zero values if
we're writing object data to disk. See c0f4752ed2f (core.fsyncmethod:
batched disk flushes for loose-objects, 2022-04-04) for that feature.
As we're needing to indent the trace2_data_intmax() lines let's
introduce helper variables to ensure that our resulting lines (which
were already too) don't exceed the recommendations of the
CodingGuidelines. Doing that requires either wrapping them twice, or
introducing short throwaway variable names, let's do the latter.
The result was that e.g. "git version" would previously emit a total
of 6 trace2 events with the GIT_TRACE2_EVENT target (version, start,
cmd_ancestry, cmd_name, exit, atexit), but afterwards would emit
8. We'd emit 2 "data" events before the "exit" event.
The reason we didn't catch this was that the trace2 unit tests added
in a15860dca3f (trace2: t/helper/test-trace2, t0210.sh, t0211.sh,
t0212.sh, 2019-02-22) would omit any "data" events that weren't the
ones it cared about. Before this change to the C code 6/7 of our
"t/t0212-trace2-event.sh" tests would fail if this change was applied
to "t/t0212/parse_events.perl".
Let's make the trace2 testing more strict, and further append any new
events types we don't know about in "t/t0212/parse_events.perl". Since
we only invoke the "test-tool trace2" there's no guarantee that we'll
catch other overly verbose events in the future, but we'll at least
notice if we start emitting new events that are issues every time we
log anything with trace2's JSON target.
We exclude the "data_json" event type, we'd otherwise would fail on
both "win test" and "win+VS test" CI due to the logging added in
353d3d77f4f (trace2: collect Windows-specific process information,
2019-02-22). It looks like that logging should really be using
trace2_cmd_ancestry() instead, which was introduced later in
2f732bf15e6 (tr2: log parent process name, 2021-07-21), but let's
leave it for now.
The fix-up to aaf81223f48 (unpack-objects: use stream_loose_object()
to unpack large objects, 2022-06-11) is needed because we're changing
the behavior of these events as discussed above. Since we'd always
emit a "hardware-flush" event the test added in aaf81223f48 wasn't
testing anything except that this trace2 data was unconditionally
logged. Even if "core.fsyncMethod" wasn't set to "batch" we'd pass the
test.
Now we'll check the expected number of "writeout" v.s. "flush" calls
under "core.fsyncMethod=batch", but note that this doesn't actually
test if we carried out the sync using that method, on a platform where
we'd have to fall back to fsync() each of those "writeout" would
really be a "flush" (i.e. a full fsync()).
But in this case what we're testing is that the logic in
"unpack-objects" behaves as expected, not the OS-specific question of
whether we actually were able to use the "bulk" method.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-07-18 12:31:52 +02:00
|
|
|
log_trace_fsync_if("fsync/writeout-only", count_fsync_writeout_only);
|
|
|
|
log_trace_fsync_if("fsync/hardware-flush", count_fsync_hardware_flush);
|
2022-03-30 07:06:40 +02:00
|
|
|
}
|
|
|
|
|
2010-03-26 16:25:32 +01:00
|
|
|
static int warn_if_unremovable(const char *op, const char *file, int rc)
|
2009-04-29 23:21:46 +02:00
|
|
|
{
|
2014-07-16 20:01:18 +02:00
|
|
|
int err;
|
|
|
|
if (!rc || errno == ENOENT)
|
|
|
|
return 0;
|
|
|
|
err = errno;
|
2017-11-01 15:44:44 +01:00
|
|
|
warning_errno("unable to %s '%s'", op, file);
|
2014-07-16 20:01:18 +02:00
|
|
|
errno = err;
|
2009-04-29 23:21:46 +02:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2014-07-16 20:20:36 +02:00
|
|
|
int unlink_or_msg(const char *file, struct strbuf *err)
|
|
|
|
{
|
|
|
|
int rc = unlink(file);
|
|
|
|
|
|
|
|
assert(err);
|
|
|
|
|
|
|
|
if (!rc || errno == ENOENT)
|
|
|
|
return 0;
|
|
|
|
|
2017-11-01 15:44:44 +01:00
|
|
|
strbuf_addf(err, "unable to unlink '%s': %s",
|
2014-07-16 20:20:36 +02:00
|
|
|
file, strerror(errno));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2010-03-26 16:25:32 +01:00
|
|
|
int unlink_or_warn(const char *file)
|
|
|
|
{
|
|
|
|
return warn_if_unremovable("unlink", file, unlink(file));
|
|
|
|
}
|
2010-03-26 16:25:33 +01:00
|
|
|
|
|
|
|
int rmdir_or_warn(const char *file)
|
|
|
|
{
|
|
|
|
return warn_if_unremovable("rmdir", file, rmdir(file));
|
|
|
|
}
|
2010-03-26 16:25:34 +01:00
|
|
|
|
|
|
|
int remove_or_warn(unsigned int mode, const char *file)
|
|
|
|
{
|
|
|
|
return S_ISGITLINK(mode) ? rmdir_or_warn(file) : unlink_or_warn(file);
|
|
|
|
}
|
2012-05-22 01:10:20 +02:00
|
|
|
|
config: allow inaccessible configuration under $HOME
The changes v1.7.12.1~2^2~4 (config: warn on inaccessible files,
2012-08-21) and v1.8.1.1~22^2~2 (config: treat user and xdg config
permission problems as errors, 2012-10-13) were intended to prevent
important configuration (think "[transfer] fsckobjects") from being
ignored when the configuration is unintentionally unreadable (for
example with EIO on a flaky filesystem, or with ENOMEM due to a DoS
attack). Usually ~/.gitconfig and ~/.config/git are readable by the
current user, and if they aren't then it would be easy to fix those
permissions, so the damage from adding this check should have been
minimal.
Unfortunately the access() check often trips when git is being run as
a server. A daemon (such as inetd or git-daemon) starts as "root",
creates a listening socket, and then drops privileges, meaning that
when git commands are invoked they cannot access $HOME and die with
fatal: unable to access '/root/.config/git/config': Permission denied
Any patch to fix this would have one of three problems:
1. We annoy sysadmins who need to take an extra step to handle HOME
when dropping privileges (the current behavior, or any other
proposal that they have to opt into).
2. We annoy sysadmins who want to set HOME when dropping privileges,
either by making what they want to do impossible, or making them
set an extra variable or option to accomplish what used to work
(e.g., a patch to git-daemon to set HOME when --user is passed).
3. We loosen the check, so some cases which might be noteworthy are
not caught.
This patch is of type (3).
Treat user and xdg configuration that are inaccessible due to
permissions (EACCES) as though no user configuration was provided at
all.
An alternative method would be to check if $HOME is readable, but that
would not help in cases where the user who dropped privileges had a
globally readable HOME with only .config or .gitconfig being private.
This does not change the behavior when /etc/gitconfig or .git/config
is unreadable (since those are more serious configuration errors),
nor when ~/.gitconfig or ~/.config/git is unreadable due to problems
other than permissions.
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Improved-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-12 23:03:18 +02:00
|
|
|
static int access_error_is_ok(int err, unsigned flag)
|
|
|
|
{
|
2017-05-30 02:23:33 +02:00
|
|
|
return (is_missing_file_error(err) ||
|
|
|
|
((flag & ACCESS_EACCES_OK) && err == EACCES));
|
config: allow inaccessible configuration under $HOME
The changes v1.7.12.1~2^2~4 (config: warn on inaccessible files,
2012-08-21) and v1.8.1.1~22^2~2 (config: treat user and xdg config
permission problems as errors, 2012-10-13) were intended to prevent
important configuration (think "[transfer] fsckobjects") from being
ignored when the configuration is unintentionally unreadable (for
example with EIO on a flaky filesystem, or with ENOMEM due to a DoS
attack). Usually ~/.gitconfig and ~/.config/git are readable by the
current user, and if they aren't then it would be easy to fix those
permissions, so the damage from adding this check should have been
minimal.
Unfortunately the access() check often trips when git is being run as
a server. A daemon (such as inetd or git-daemon) starts as "root",
creates a listening socket, and then drops privileges, meaning that
when git commands are invoked they cannot access $HOME and die with
fatal: unable to access '/root/.config/git/config': Permission denied
Any patch to fix this would have one of three problems:
1. We annoy sysadmins who need to take an extra step to handle HOME
when dropping privileges (the current behavior, or any other
proposal that they have to opt into).
2. We annoy sysadmins who want to set HOME when dropping privileges,
either by making what they want to do impossible, or making them
set an extra variable or option to accomplish what used to work
(e.g., a patch to git-daemon to set HOME when --user is passed).
3. We loosen the check, so some cases which might be noteworthy are
not caught.
This patch is of type (3).
Treat user and xdg configuration that are inaccessible due to
permissions (EACCES) as though no user configuration was provided at
all.
An alternative method would be to check if $HOME is readable, but that
would not help in cases where the user who dropped privileges had a
globally readable HOME with only .config or .gitconfig being private.
This does not change the behavior when /etc/gitconfig or .git/config
is unreadable (since those are more serious configuration errors),
nor when ~/.gitconfig or ~/.config/git is unreadable due to problems
other than permissions.
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Improved-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-12 23:03:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
int access_or_warn(const char *path, int mode, unsigned flag)
|
2012-08-21 08:10:59 +02:00
|
|
|
{
|
|
|
|
int ret = access(path, mode);
|
config: allow inaccessible configuration under $HOME
The changes v1.7.12.1~2^2~4 (config: warn on inaccessible files,
2012-08-21) and v1.8.1.1~22^2~2 (config: treat user and xdg config
permission problems as errors, 2012-10-13) were intended to prevent
important configuration (think "[transfer] fsckobjects") from being
ignored when the configuration is unintentionally unreadable (for
example with EIO on a flaky filesystem, or with ENOMEM due to a DoS
attack). Usually ~/.gitconfig and ~/.config/git are readable by the
current user, and if they aren't then it would be easy to fix those
permissions, so the damage from adding this check should have been
minimal.
Unfortunately the access() check often trips when git is being run as
a server. A daemon (such as inetd or git-daemon) starts as "root",
creates a listening socket, and then drops privileges, meaning that
when git commands are invoked they cannot access $HOME and die with
fatal: unable to access '/root/.config/git/config': Permission denied
Any patch to fix this would have one of three problems:
1. We annoy sysadmins who need to take an extra step to handle HOME
when dropping privileges (the current behavior, or any other
proposal that they have to opt into).
2. We annoy sysadmins who want to set HOME when dropping privileges,
either by making what they want to do impossible, or making them
set an extra variable or option to accomplish what used to work
(e.g., a patch to git-daemon to set HOME when --user is passed).
3. We loosen the check, so some cases which might be noteworthy are
not caught.
This patch is of type (3).
Treat user and xdg configuration that are inaccessible due to
permissions (EACCES) as though no user configuration was provided at
all.
An alternative method would be to check if $HOME is readable, but that
would not help in cases where the user who dropped privileges had a
globally readable HOME with only .config or .gitconfig being private.
This does not change the behavior when /etc/gitconfig or .git/config
is unreadable (since those are more serious configuration errors),
nor when ~/.gitconfig or ~/.config/git is unreadable due to problems
other than permissions.
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Improved-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-12 23:03:18 +02:00
|
|
|
if (ret && !access_error_is_ok(errno, flag))
|
2012-08-21 23:52:07 +02:00
|
|
|
warn_on_inaccessible(path);
|
2012-08-21 08:10:59 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
config: allow inaccessible configuration under $HOME
The changes v1.7.12.1~2^2~4 (config: warn on inaccessible files,
2012-08-21) and v1.8.1.1~22^2~2 (config: treat user and xdg config
permission problems as errors, 2012-10-13) were intended to prevent
important configuration (think "[transfer] fsckobjects") from being
ignored when the configuration is unintentionally unreadable (for
example with EIO on a flaky filesystem, or with ENOMEM due to a DoS
attack). Usually ~/.gitconfig and ~/.config/git are readable by the
current user, and if they aren't then it would be easy to fix those
permissions, so the damage from adding this check should have been
minimal.
Unfortunately the access() check often trips when git is being run as
a server. A daemon (such as inetd or git-daemon) starts as "root",
creates a listening socket, and then drops privileges, meaning that
when git commands are invoked they cannot access $HOME and die with
fatal: unable to access '/root/.config/git/config': Permission denied
Any patch to fix this would have one of three problems:
1. We annoy sysadmins who need to take an extra step to handle HOME
when dropping privileges (the current behavior, or any other
proposal that they have to opt into).
2. We annoy sysadmins who want to set HOME when dropping privileges,
either by making what they want to do impossible, or making them
set an extra variable or option to accomplish what used to work
(e.g., a patch to git-daemon to set HOME when --user is passed).
3. We loosen the check, so some cases which might be noteworthy are
not caught.
This patch is of type (3).
Treat user and xdg configuration that are inaccessible due to
permissions (EACCES) as though no user configuration was provided at
all.
An alternative method would be to check if $HOME is readable, but that
would not help in cases where the user who dropped privileges had a
globally readable HOME with only .config or .gitconfig being private.
This does not change the behavior when /etc/gitconfig or .git/config
is unreadable (since those are more serious configuration errors),
nor when ~/.gitconfig or ~/.config/git is unreadable due to problems
other than permissions.
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Improved-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-12 23:03:18 +02:00
|
|
|
int access_or_die(const char *path, int mode, unsigned flag)
|
2012-10-14 02:04:02 +02:00
|
|
|
{
|
|
|
|
int ret = access(path, mode);
|
config: allow inaccessible configuration under $HOME
The changes v1.7.12.1~2^2~4 (config: warn on inaccessible files,
2012-08-21) and v1.8.1.1~22^2~2 (config: treat user and xdg config
permission problems as errors, 2012-10-13) were intended to prevent
important configuration (think "[transfer] fsckobjects") from being
ignored when the configuration is unintentionally unreadable (for
example with EIO on a flaky filesystem, or with ENOMEM due to a DoS
attack). Usually ~/.gitconfig and ~/.config/git are readable by the
current user, and if they aren't then it would be easy to fix those
permissions, so the damage from adding this check should have been
minimal.
Unfortunately the access() check often trips when git is being run as
a server. A daemon (such as inetd or git-daemon) starts as "root",
creates a listening socket, and then drops privileges, meaning that
when git commands are invoked they cannot access $HOME and die with
fatal: unable to access '/root/.config/git/config': Permission denied
Any patch to fix this would have one of three problems:
1. We annoy sysadmins who need to take an extra step to handle HOME
when dropping privileges (the current behavior, or any other
proposal that they have to opt into).
2. We annoy sysadmins who want to set HOME when dropping privileges,
either by making what they want to do impossible, or making them
set an extra variable or option to accomplish what used to work
(e.g., a patch to git-daemon to set HOME when --user is passed).
3. We loosen the check, so some cases which might be noteworthy are
not caught.
This patch is of type (3).
Treat user and xdg configuration that are inaccessible due to
permissions (EACCES) as though no user configuration was provided at
all.
An alternative method would be to check if $HOME is readable, but that
would not help in cases where the user who dropped privileges had a
globally readable HOME with only .config or .gitconfig being private.
This does not change the behavior when /etc/gitconfig or .git/config
is unreadable (since those are more serious configuration errors),
nor when ~/.gitconfig or ~/.config/git is unreadable due to problems
other than permissions.
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Improved-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-12 23:03:18 +02:00
|
|
|
if (ret && !access_error_is_ok(errno, flag))
|
2012-10-14 02:04:02 +02:00
|
|
|
die_errno(_("unable to access '%s'"), path);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-07-28 20:29:50 +02:00
|
|
|
char *xgetcwd(void)
|
|
|
|
{
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
if (strbuf_getcwd(&sb))
|
|
|
|
die_errno(_("unable to get current working directory"));
|
|
|
|
return strbuf_detach(&sb, NULL);
|
|
|
|
}
|
2014-11-30 09:24:45 +01:00
|
|
|
|
2015-09-24 23:05:37 +02:00
|
|
|
int xsnprintf(char *dst, size_t max, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list ap;
|
|
|
|
int len;
|
|
|
|
|
|
|
|
va_start(ap, fmt);
|
|
|
|
len = vsnprintf(dst, max, fmt, ap);
|
|
|
|
va_end(ap);
|
|
|
|
|
|
|
|
if (len < 0)
|
2018-05-02 11:38:39 +02:00
|
|
|
BUG("your snprintf is broken");
|
2015-09-24 23:05:37 +02:00
|
|
|
if (len >= max)
|
2018-05-02 11:38:39 +02:00
|
|
|
BUG("attempt to snprintf into too-small buffer");
|
2015-09-24 23:05:37 +02:00
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
write_file: add pointer+len variant
There are many callsites which could use write_file, but for
which it is a little awkward because they have a strbuf or
other pointer/len combo. Specifically:
1. write_file() takes a format string, so we have to use
"%s" or "%.*s", which are ugly.
2. Using any form of "%s" does not handle embedded NULs in
the output. That probably doesn't matter for our
call-sites, but it's nicer not to have to worry.
3. It's less efficient; we format into another strbuf
just to do the write. That's probably not measurably
slow for our uses, but it's simply inelegant.
We can fix this by providing a helper to write out the
formatted buffer, and just calling it from write_file().
Note that we don't do the usual "complete with a newline"
that write_file does. If the caller has their own buffer,
there's a reasonable chance they're doing something more
complicated than a single line, and they can call
strbuf_complete_line() themselves.
We could go even further and add strbuf_write_file(), but it
doesn't save much:
- write_file_buf(path, sb.buf, sb.len);
+ strbuf_write_file(&sb, path);
It would also be somewhat asymmetric with strbuf_read_file,
which actually returns errors rather than dying (and the
error handling is most of the benefit of write_file() in the
first place).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-08 11:12:22 +02:00
|
|
|
void write_file_buf(const char *path, const char *buf, size_t len)
|
2014-11-30 09:24:45 +01:00
|
|
|
{
|
write_file: add pointer+len variant
There are many callsites which could use write_file, but for
which it is a little awkward because they have a strbuf or
other pointer/len combo. Specifically:
1. write_file() takes a format string, so we have to use
"%s" or "%.*s", which are ugly.
2. Using any form of "%s" does not handle embedded NULs in
the output. That probably doesn't matter for our
call-sites, but it's nicer not to have to worry.
3. It's less efficient; we format into another strbuf
just to do the write. That's probably not measurably
slow for our uses, but it's simply inelegant.
We can fix this by providing a helper to write out the
formatted buffer, and just calling it from write_file().
Note that we don't do the usual "complete with a newline"
that write_file does. If the caller has their own buffer,
there's a reasonable chance they're doing something more
complicated than a single line, and they can call
strbuf_complete_line() themselves.
We could go even further and add strbuf_write_file(), but it
doesn't save much:
- write_file_buf(path, sb.buf, sb.len);
+ strbuf_write_file(&sb, path);
It would also be somewhat asymmetric with strbuf_read_file,
which actually returns errors rather than dying (and the
error handling is most of the benefit of write_file() in the
first place).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-08 11:12:22 +02:00
|
|
|
int fd = xopen(path, O_WRONLY | O_CREAT | O_TRUNC, 0666);
|
avoid "write_in_full(fd, buf, len) != len" pattern
The return value of write_in_full() is either "-1", or the
requested number of bytes[1]. If we make a partial write
before seeing an error, we still return -1, not a partial
value. This goes back to f6aa66cb95 (write_in_full: really
write in full or return error on disk full., 2007-01-11).
So checking anything except "was the return value negative"
is pointless. And there are a couple of reasons not to do
so:
1. It can do a funny signed/unsigned comparison. If your
"len" is signed (e.g., a size_t) then the compiler will
promote the "-1" to its unsigned variant.
This works out for "!= len" (unless you really were
trying to write the maximum size_t bytes), but is a
bug if you check "< len" (an example of which was fixed
recently in config.c).
We should avoid promoting the mental model that you
need to check the length at all, so that new sites are
not tempted to copy us.
2. Checking for a negative value is shorter to type,
especially when the length is an expression.
3. Linus says so. In d34cf19b89 (Clean up write_in_full()
users, 2007-01-11), right after the write_in_full()
semantics were changed, he wrote:
I really wish every "write_in_full()" user would just
check against "<0" now, but this fixes the nasty and
stupid ones.
Appeals to authority aside, this makes it clear that
writing it this way does not have an intentional
benefit. It's a historical curiosity that we never
bothered to clean up (and which was undoubtedly
cargo-culted into new sites).
So let's convert these obviously-correct cases (this
includes write_str_in_full(), which is just a wrapper for
write_in_full()).
[1] A careful reader may notice there is one way that
write_in_full() can return a different value. If we ask
write() to write N bytes and get a return value that is
_larger_ than N, we could return a larger total. But
besides the fact that this would imply a totally broken
version of write(), it would already invoke undefined
behavior. Our internal remaining counter is an unsigned
size_t, which means that subtracting too many byte will
wrap it around to a very large number. So we'll instantly
begin reading off the end of the buffer, trying to write
gigabytes (or petabytes) of data.
Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-13 19:16:03 +02:00
|
|
|
if (write_in_full(fd, buf, len) < 0)
|
2017-11-01 15:44:44 +01:00
|
|
|
die_errno(_("could not write to '%s'"), path);
|
write_file: add pointer+len variant
There are many callsites which could use write_file, but for
which it is a little awkward because they have a strbuf or
other pointer/len combo. Specifically:
1. write_file() takes a format string, so we have to use
"%s" or "%.*s", which are ugly.
2. Using any form of "%s" does not handle embedded NULs in
the output. That probably doesn't matter for our
call-sites, but it's nicer not to have to worry.
3. It's less efficient; we format into another strbuf
just to do the write. That's probably not measurably
slow for our uses, but it's simply inelegant.
We can fix this by providing a helper to write out the
formatted buffer, and just calling it from write_file().
Note that we don't do the usual "complete with a newline"
that write_file does. If the caller has their own buffer,
there's a reasonable chance they're doing something more
complicated than a single line, and they can call
strbuf_complete_line() themselves.
We could go even further and add strbuf_write_file(), but it
doesn't save much:
- write_file_buf(path, sb.buf, sb.len);
+ strbuf_write_file(&sb, path);
It would also be somewhat asymmetric with strbuf_read_file,
which actually returns errors rather than dying (and the
error handling is most of the benefit of write_file() in the
first place).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-08 11:12:22 +02:00
|
|
|
if (close(fd))
|
2017-11-01 15:44:44 +01:00
|
|
|
die_errno(_("could not close '%s'"), path);
|
2014-11-30 09:24:45 +01:00
|
|
|
}
|
2015-06-05 21:45:05 +02:00
|
|
|
|
2016-07-08 11:09:34 +02:00
|
|
|
void write_file(const char *path, const char *fmt, ...)
|
2015-08-24 22:03:07 +02:00
|
|
|
{
|
|
|
|
va_list params;
|
2014-11-30 09:24:45 +01:00
|
|
|
struct strbuf sb = STRBUF_INIT;
|
2015-08-24 22:03:07 +02:00
|
|
|
|
|
|
|
va_start(params, fmt);
|
2016-07-08 11:09:34 +02:00
|
|
|
strbuf_vaddf(&sb, fmt, params);
|
2015-08-24 22:03:07 +02:00
|
|
|
va_end(params);
|
|
|
|
|
2016-07-08 11:09:34 +02:00
|
|
|
strbuf_complete_line(&sb);
|
2015-08-24 22:03:07 +02:00
|
|
|
|
write_file: add pointer+len variant
There are many callsites which could use write_file, but for
which it is a little awkward because they have a strbuf or
other pointer/len combo. Specifically:
1. write_file() takes a format string, so we have to use
"%s" or "%.*s", which are ugly.
2. Using any form of "%s" does not handle embedded NULs in
the output. That probably doesn't matter for our
call-sites, but it's nicer not to have to worry.
3. It's less efficient; we format into another strbuf
just to do the write. That's probably not measurably
slow for our uses, but it's simply inelegant.
We can fix this by providing a helper to write out the
formatted buffer, and just calling it from write_file().
Note that we don't do the usual "complete with a newline"
that write_file does. If the caller has their own buffer,
there's a reasonable chance they're doing something more
complicated than a single line, and they can call
strbuf_complete_line() themselves.
We could go even further and add strbuf_write_file(), but it
doesn't save much:
- write_file_buf(path, sb.buf, sb.len);
+ strbuf_write_file(&sb, path);
It would also be somewhat asymmetric with strbuf_read_file,
which actually returns errors rather than dying (and the
error handling is most of the benefit of write_file() in the
first place).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-08 11:12:22 +02:00
|
|
|
write_file_buf(path, sb.buf, sb.len);
|
2016-07-08 11:09:34 +02:00
|
|
|
strbuf_release(&sb);
|
2015-08-24 22:03:07 +02:00
|
|
|
}
|
|
|
|
|
2015-06-05 21:45:05 +02:00
|
|
|
void sleep_millisec(int millisec)
|
|
|
|
{
|
|
|
|
poll(NULL, 0, millisec);
|
|
|
|
}
|
2017-04-18 23:57:43 +02:00
|
|
|
|
|
|
|
int xgethostname(char *buf, size_t len)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* If the full hostname doesn't fit in buf, POSIX does not
|
|
|
|
* specify whether the buffer will be null-terminated, so to
|
|
|
|
* be safe, do it ourselves.
|
|
|
|
*/
|
|
|
|
int ret = gethostname(buf, len);
|
|
|
|
if (!ret)
|
|
|
|
buf[len - 1] = 0;
|
|
|
|
return ret;
|
|
|
|
}
|
2019-01-02 16:38:32 +01:00
|
|
|
|
|
|
|
int is_empty_or_missing_file(const char *filename)
|
|
|
|
{
|
|
|
|
struct stat st;
|
|
|
|
|
|
|
|
if (stat(filename, &st) < 0) {
|
|
|
|
if (errno == ENOENT)
|
|
|
|
return 1;
|
|
|
|
die_errno(_("could not stat %s"), filename);
|
|
|
|
}
|
|
|
|
|
|
|
|
return !st.st_size;
|
|
|
|
}
|
2021-02-16 15:44:22 +01:00
|
|
|
|
|
|
|
int open_nofollow(const char *path, int flags)
|
|
|
|
{
|
|
|
|
#ifdef O_NOFOLLOW
|
|
|
|
return open(path, flags | O_NOFOLLOW);
|
|
|
|
#else
|
|
|
|
struct stat st;
|
|
|
|
if (lstat(path, &st) < 0)
|
|
|
|
return -1;
|
|
|
|
if (S_ISLNK(st.st_mode)) {
|
|
|
|
errno = ELOOP;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return open(path, flags);
|
|
|
|
#endif
|
|
|
|
}
|
wrapper: add a helper to generate numbers from a CSPRNG
There are many situations in which having access to a cryptographically
secure pseudorandom number generator (CSPRNG) is helpful. In the
future, we'll encounter one of these when dealing with temporary files.
To make this possible, let's add a function which reads from a system
CSPRNG and returns some bytes.
We know that all systems will have such an interface. A CSPRNG is
required for a secure TLS or SSH implementation and a Git implementation
which provided neither would be of little practical use. In addition,
POSIX is set to standardize getentropy(2) in the next version, so in the
(potentially distant) future we can rely on that.
For systems which lack one of the other interfaces, we provide the
ability to use OpenSSL's CSPRNG. OpenSSL is highly portable and
functions on practically every known OS, and we know it will have access
to some source of cryptographically secure randomness. We also provide
support for the arc4random in libbsd for folks who would prefer to use
that.
Because this is a security sensitive interface, we take some
precautions. We either succeed by filling the buffer completely as we
requested, or we fail. We don't return partial data because the caller
will almost never find that to be a useful behavior.
Specify a makefile knob which users can use to specify one or more
suitable CSPRNGs, and turn the multiple string options into a set of
defines, since we cannot match on strings in the preprocessor. We allow
multiple options to make the job of handling this in autoconf easier.
The order of options is important here. On systems with arc4random,
which is most of the BSDs, we use that, since, except on MirBSD and
macOS, it uses ChaCha20, which is extremely fast, and sits entirely in
userspace, avoiding a system call. We then prefer getrandom over
getentropy, because the former has been available longer on Linux, and
then OpenSSL. Finally, if none of those are available, we use
/dev/urandom, because most Unix-like operating systems provide that API.
We prefer options that don't involve device files when possible because
those work in some restricted environments where device files may not be
available.
Set the configuration variables appropriately for Linux and the BSDs,
including macOS, as well as Windows and NonStop. We specifically only
consider versions which receive publicly available security support
here. For the same reason, we don't specify getrandom(2) on Linux,
because CentOS 7 doesn't support it in glibc (although its kernel does)
and we don't want to resort to making syscalls.
Finally, add a test helper to allow this to be tested by hand and in
tests. We don't add any tests, since invoking the CSPRNG is not likely
to produce interesting, reproducible results.
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-17 22:56:16 +01:00
|
|
|
|
|
|
|
int csprng_bytes(void *buf, size_t len)
|
|
|
|
{
|
|
|
|
#if defined(HAVE_ARC4RANDOM) || defined(HAVE_ARC4RANDOM_LIBBSD)
|
|
|
|
/* This function never returns an error. */
|
|
|
|
arc4random_buf(buf, len);
|
|
|
|
return 0;
|
|
|
|
#elif defined(HAVE_GETRANDOM)
|
|
|
|
ssize_t res;
|
|
|
|
char *p = buf;
|
|
|
|
while (len) {
|
|
|
|
res = getrandom(p, len, 0);
|
|
|
|
if (res < 0)
|
|
|
|
return -1;
|
|
|
|
len -= res;
|
|
|
|
p += res;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
#elif defined(HAVE_GETENTROPY)
|
|
|
|
int res;
|
|
|
|
char *p = buf;
|
|
|
|
while (len) {
|
|
|
|
/* getentropy has a maximum size of 256 bytes. */
|
|
|
|
size_t chunk = len < 256 ? len : 256;
|
|
|
|
res = getentropy(p, chunk);
|
|
|
|
if (res < 0)
|
|
|
|
return -1;
|
|
|
|
len -= chunk;
|
|
|
|
p += chunk;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
#elif defined(HAVE_RTLGENRANDOM)
|
|
|
|
if (!RtlGenRandom(buf, len))
|
|
|
|
return -1;
|
|
|
|
return 0;
|
|
|
|
#elif defined(HAVE_OPENSSL_CSPRNG)
|
|
|
|
int res = RAND_bytes(buf, len);
|
|
|
|
if (res == 1)
|
|
|
|
return 0;
|
|
|
|
if (res == -1)
|
|
|
|
errno = ENOTSUP;
|
|
|
|
else
|
|
|
|
errno = EIO;
|
|
|
|
return -1;
|
|
|
|
#else
|
|
|
|
ssize_t res;
|
|
|
|
char *p = buf;
|
|
|
|
int fd, err;
|
|
|
|
fd = open("/dev/urandom", O_RDONLY);
|
|
|
|
if (fd < 0)
|
|
|
|
return -1;
|
|
|
|
while (len) {
|
|
|
|
res = xread(fd, p, len);
|
|
|
|
if (res < 0) {
|
|
|
|
err = errno;
|
|
|
|
close(fd);
|
|
|
|
errno = err;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
len -= res;
|
|
|
|
p += res;
|
|
|
|
}
|
|
|
|
close(fd);
|
|
|
|
return 0;
|
|
|
|
#endif
|
|
|
|
}
|