2005-12-05 20:54:29 +01:00
|
|
|
#ifndef GIT_COMPAT_UTIL_H
|
|
|
|
#define GIT_COMPAT_UTIL_H
|
|
|
|
|
2019-06-25 16:49:40 +02:00
|
|
|
#ifdef USE_MSVC_CRTDBG
|
|
|
|
/*
|
|
|
|
* For these to work they must appear very early in each
|
|
|
|
* file -- before most of the standard header files.
|
|
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <crtdbg.h>
|
|
|
|
#endif
|
|
|
|
|
2007-02-17 10:13:10 +01:00
|
|
|
#define _FILE_OFFSET_BITS 64
|
|
|
|
|
2015-04-30 14:44:14 +02:00
|
|
|
|
|
|
|
/* Derived from Linux "Features Test Macro" header
|
|
|
|
* Convenience macros to test the versions of gcc (or
|
|
|
|
* a compatible compiler).
|
|
|
|
* Use them like this:
|
|
|
|
* #if GIT_GNUC_PREREQ (2,8)
|
|
|
|
* ... code requiring gcc 2.8 or later ...
|
|
|
|
* #endif
|
|
|
|
*/
|
|
|
|
#if defined(__GNUC__) && defined(__GNUC_MINOR__)
|
|
|
|
# define GIT_GNUC_PREREQ(maj, min) \
|
|
|
|
((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
|
|
|
|
#else
|
|
|
|
#define GIT_GNUC_PREREQ(maj, min) 0
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
2006-01-07 10:33:54 +01:00
|
|
|
#ifndef FLEX_ARRAY
|
2007-11-20 21:08:06 +01:00
|
|
|
/*
|
|
|
|
* See if our compiler is known to support flexible array members.
|
|
|
|
*/
|
git-compat-util.h: avoid using c99 flex array feature with Sun compiler 5.8
The Sun c99 compiler as recent as version 5.8 Patch 121016-06 2007/08/01
produces an error when compiling diff-delta.c. This source file #includes
the delta.h header file which pre-declares a struct which is later defined
to contain a flex array member. The Sun c99 compiler fails to compile
diff-delta.c and gives the following error:
"diff-delta.c", line 314: identifier redeclared: create_delta
current : function(pointer to const struct delta_index {unsigned long memsize, pointer to const void src_buf, unsigned long src_size, unsigned int hash_mask, array[-1] of pointer to struct index_entry {..} hash}, pointer to const void, unsigned long, pointer to unsigned long, unsigned long) returning pointer to void
previous: function(pointer to const struct delta_index {unsigned long memsize, pointer to const void src_buf, unsigned long src_size, unsigned int hash_mask, array[-1] of pointer to struct index_entry {..} hash}, pointer to const void, unsigned long, pointer to unsigned long, unsigned long) returning pointer to void : "delta.h", line 44
c99: acomp failed for diff-delta.c
So, avoid using this c99 feature when compiling with the Sun c compilers
version 5.8 and older (the most recent version tested).
Signed-off-by: Brandon Casey <drafnel@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-06-09 01:53:48 +02:00
|
|
|
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && (!defined(__SUNPRO_C) || (__SUNPRO_C > 0x580))
|
2007-11-20 21:08:06 +01:00
|
|
|
# define FLEX_ARRAY /* empty */
|
|
|
|
#elif defined(__GNUC__)
|
|
|
|
# if (__GNUC__ >= 3)
|
|
|
|
# define FLEX_ARRAY /* empty */
|
|
|
|
# else
|
|
|
|
# define FLEX_ARRAY 0 /* older GNU extension */
|
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Otherwise, default to safer but a bit wasteful traditional style
|
|
|
|
*/
|
|
|
|
#ifndef FLEX_ARRAY
|
|
|
|
# define FLEX_ARRAY 1
|
2006-01-07 10:33:54 +01:00
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
2015-04-30 14:44:14 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* BUILD_ASSERT_OR_ZERO - assert a build-time dependency, as an expression.
|
|
|
|
* @cond: the compile-time condition which must be true.
|
|
|
|
*
|
|
|
|
* Your compile will fail if the condition isn't true, or can't be evaluated
|
|
|
|
* by the compiler. This can be used in an expression: its value is "0".
|
|
|
|
*
|
|
|
|
* Example:
|
|
|
|
* #define foo_to_char(foo) \
|
|
|
|
* ((char *)(foo) \
|
|
|
|
* + BUILD_ASSERT_OR_ZERO(offsetof(struct foo, string) == 0))
|
|
|
|
*/
|
|
|
|
#define BUILD_ASSERT_OR_ZERO(cond) \
|
|
|
|
(sizeof(char [1 - 2*!(cond)]) - 1)
|
|
|
|
|
2015-06-25 00:12:07 +02:00
|
|
|
#if GIT_GNUC_PREREQ(3, 1)
|
2015-04-30 14:44:14 +02:00
|
|
|
/* &arr[0] degrades to a pointer: a different type from an array */
|
|
|
|
# define BARF_UNLESS_AN_ARRAY(arr) \
|
|
|
|
BUILD_ASSERT_OR_ZERO(!__builtin_types_compatible_p(__typeof__(arr), \
|
|
|
|
__typeof__(&(arr)[0])))
|
2015-06-25 00:12:07 +02:00
|
|
|
#else
|
|
|
|
# define BARF_UNLESS_AN_ARRAY(arr) 0
|
2015-04-30 14:44:14 +02:00
|
|
|
#endif
|
|
|
|
/*
|
|
|
|
* ARRAY_SIZE - get the number of elements in a visible array
|
2019-10-11 20:24:54 +02:00
|
|
|
* @x: the array whose size you want.
|
2015-04-30 14:44:14 +02:00
|
|
|
*
|
|
|
|
* This does not work on pointers, or arrays declared as [], or
|
|
|
|
* function parameters. With correct compiler support, such usage
|
|
|
|
* will cause a build error (see the build_assert_or_zero macro).
|
|
|
|
*/
|
|
|
|
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]) + BARF_UNLESS_AN_ARRAY(x))
|
|
|
|
|
2009-07-22 23:34:34 +02:00
|
|
|
#define bitsizeof(x) (CHAR_BIT * sizeof(x))
|
2006-03-09 20:58:05 +01:00
|
|
|
|
2010-10-05 09:24:10 +02:00
|
|
|
#define maximum_signed_value_of_type(a) \
|
|
|
|
(INTMAX_MAX >> (bitsizeof(intmax_t) - bitsizeof(a)))
|
|
|
|
|
2010-10-11 04:59:26 +02:00
|
|
|
#define maximum_unsigned_value_of_type(a) \
|
|
|
|
(UINTMAX_MAX >> (bitsizeof(uintmax_t) - bitsizeof(a)))
|
|
|
|
|
2010-10-05 09:24:10 +02:00
|
|
|
/*
|
|
|
|
* Signed integer overflow is undefined in C, so here's a helper macro
|
|
|
|
* to detect if the sum of two integers will overflow.
|
|
|
|
*
|
|
|
|
* Requires: a >= 0, typeof(a) equals typeof(b)
|
|
|
|
*/
|
|
|
|
#define signed_add_overflows(a, b) \
|
|
|
|
((b) > maximum_signed_value_of_type(a) - (a))
|
|
|
|
|
2010-10-11 04:59:26 +02:00
|
|
|
#define unsigned_add_overflows(a, b) \
|
|
|
|
((b) > maximum_unsigned_value_of_type(a) - (a))
|
|
|
|
|
add helpers for detecting size_t overflow
Performing computations on size_t variables that we feed to
xmalloc and friends can be dangerous, as an integer overflow
can cause us to allocate a much smaller chunk than we
realized.
We already have unsigned_add_overflows(), but let's add
unsigned_mult_overflows() to that. Furthermore, rather than
have each site manually check and die on overflow, we can
provide some helpers that will:
- promote the arguments to size_t, so that we know we are
doing our computation in the same size of integer that
will ultimately be fed to xmalloc
- check and die on overflow
- return the result so that computations can be done in
the parameter list of xmalloc.
These functions are a lot uglier to use than normal
arithmetic operators (you have to do "st_add(foo, bar)"
instead of "foo + bar"). To at least limit the damage, we
also provide multi-valued versions. So rather than:
st_add(st_add(a, b), st_add(c, d));
you can write:
st_add4(a, b, c, d);
This isn't nearly as elegant as a varargs function, but it's
a lot harder to get it wrong. You don't have to remember to
add a sentinel value at the end, and the compiler will
complain if you get the number of arguments wrong. This
patch adds only the numbered variants required to convert
the current code base; we can easily add more later if
needed.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-02-19 12:21:19 +01:00
|
|
|
/*
|
|
|
|
* Returns true if the multiplication of "a" and "b" will
|
|
|
|
* overflow. The types of "a" and "b" must match and must be unsigned.
|
|
|
|
* Note that this macro evaluates "a" twice!
|
|
|
|
*/
|
|
|
|
#define unsigned_mult_overflows(a, b) \
|
|
|
|
((a) && (b) > maximum_unsigned_value_of_type(a) / (a))
|
|
|
|
|
2007-04-09 07:06:29 +02:00
|
|
|
#ifdef __GNUC__
|
|
|
|
#define TYPEOF(x) (__typeof__(x))
|
|
|
|
#else
|
|
|
|
#define TYPEOF(x)
|
|
|
|
#endif
|
|
|
|
|
2009-07-22 23:34:34 +02:00
|
|
|
#define MSB(x, bits) ((x) & TYPEOF(x)(~0ULL << (bitsizeof(x) - (bits))))
|
2007-11-07 11:20:27 +01:00
|
|
|
#define HAS_MULTI_BITS(i) ((i) & ((i) - 1)) /* checks if an integer has more than 1 bit set */
|
2007-04-09 07:06:29 +02:00
|
|
|
|
2009-07-22 23:34:35 +02:00
|
|
|
#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
|
|
|
|
|
2007-05-15 18:33:25 +02:00
|
|
|
/* Approximation of the length of the decimal representation of this type. */
|
|
|
|
#define decimal_length(x) ((int)(sizeof(x) * 2.56 + 0.5) + 1)
|
|
|
|
|
2009-06-06 01:36:13 +02:00
|
|
|
#if defined(__sun__)
|
|
|
|
/*
|
|
|
|
* On Solaris, when _XOPEN_EXTENDED is set, its header file
|
|
|
|
* forces the programs to be XPG4v2, defeating any _XOPEN_SOURCE
|
|
|
|
* setting to say we are XPG5 or XPG6. Also on Solaris,
|
|
|
|
* XPG6 programs must be compiled with a c99 compiler, while
|
|
|
|
* non XPG6 programs must be compiled with a pre-c99 compiler.
|
|
|
|
*/
|
|
|
|
# if __STDC_VERSION__ - 0 >= 199901L
|
|
|
|
# define _XOPEN_SOURCE 600
|
|
|
|
# else
|
|
|
|
# define _XOPEN_SOURCE 500
|
|
|
|
# endif
|
2010-04-02 09:52:09 +02:00
|
|
|
#elif !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__USLC__) && \
|
2012-09-19 12:03:30 +02:00
|
|
|
!defined(_M_UNIX) && !defined(__sgi) && !defined(__DragonFly__) && \
|
2014-11-23 18:14:55 +01:00
|
|
|
!defined(__TANDEM) && !defined(__QNX__) && !defined(__MirBSD__) && \
|
|
|
|
!defined(__CYGWIN__)
|
2006-12-19 23:34:12 +01:00
|
|
|
#define _XOPEN_SOURCE 600 /* glibc2 and AIX 5.3L need 500, OpenBSD needs 600 for S_ISLNK() */
|
|
|
|
#define _XOPEN_SOURCE_EXTENDED 1 /* AIX 5.3L needs this */
|
2006-12-21 02:32:21 +01:00
|
|
|
#endif
|
2007-01-16 02:34:49 +01:00
|
|
|
#define _ALL_SOURCE 1
|
|
|
|
#define _GNU_SOURCE 1
|
|
|
|
#define _BSD_SOURCE 1
|
2014-09-14 07:33:35 +02:00
|
|
|
#define _DEFAULT_SOURCE 1
|
2009-04-26 15:49:00 +02:00
|
|
|
#define _NETBSD_SOURCE 1
|
git-compat-util.h: adjust for SGI IRIX 6.5
Don't define _XOPEN_SOURCE
Do define _SGI_SOURCE
Defining _XOPEN_SOURCE prevents many of the common functions and macros
from being defined. _Not_ setting _XOPEN_SOURCE, and instead setting
_SGI_SOURCE, provides all of the XPG4, XPG5, BSD, POSIX functions and
declarations, _BUT_ provides a horribly broken snprintf(). SGI does have
a working snprintf(), but it is only provided when _NO_XOPEN5 evaluates
to zero, and this only happens if _XOPEN_SOURCE is defined which, as
mentioned above, prevents many other common functions and defines.
The broken snprintf will be worked around with SNPRINTF_RETURNS_BOGUS in
the Makefile in a later patch.
Signed-off-by: Brandon Casey <casey@nrlssc.navy.mil>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-07-10 19:10:44 +02:00
|
|
|
#define _SGI_SOURCE 1
|
2006-12-19 23:34:12 +01:00
|
|
|
|
2013-05-02 21:26:08 +02:00
|
|
|
#if defined(WIN32) && !defined(__CYGWIN__) /* Both MinGW and MSVC */
|
2018-10-03 21:43:43 +02:00
|
|
|
# if !defined(_WIN32_WINNT)
|
2018-10-03 21:43:44 +02:00
|
|
|
# define _WIN32_WINNT 0x0600
|
2013-01-31 19:28:35 +01:00
|
|
|
# endif
|
2009-09-16 10:20:26 +02:00
|
|
|
#define WIN32_LEAN_AND_MEAN /* stops windows.h including winsock.h */
|
|
|
|
#include <winsock2.h>
|
|
|
|
#include <windows.h>
|
2013-05-02 21:26:08 +02:00
|
|
|
#define GIT_WINDOWS_NATIVE
|
2009-09-16 10:20:26 +02:00
|
|
|
#endif
|
|
|
|
|
2005-12-05 20:54:29 +01:00
|
|
|
#include <unistd.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <stddef.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdarg.h>
|
|
|
|
#include <string.h>
|
2012-12-14 20:57:01 +01:00
|
|
|
#ifdef HAVE_STRINGS_H
|
2012-09-19 12:03:30 +02:00
|
|
|
#include <strings.h> /* for strcasecmp() */
|
|
|
|
#endif
|
2005-12-05 20:54:29 +01:00
|
|
|
#include <errno.h>
|
|
|
|
#include <limits.h>
|
git-compat-util.h: do not #include <sys/param.h> by default
Earlier we allowed platforms that lack <sys/param.h> not to include
the header file from git-compat-util.h; we have included this header
file since the early days back when we used MAXPATHLEN (which we no
longer use) and also depended on it slurping ULONG_MAX (which we get
by including stdint.h or inttypes.h these days).
It turns out that we can compile our modern codebase just file
without including it on many platforms (so far, Fedora, Debian,
Ubuntu, MinGW, Mac OS X, Cygwin, HP-Nonstop, QNX and z/OS are
reported to be OK).
Let's stop including it by default, and on platforms that need it to
be included, leave "make NEEDS_SYS_PARAM_H=YesPlease" as an escape
hatch and ask them to report to us, so that we can find out about
the real dependency and fix it in a more platform agnostic way.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-12-18 18:35:33 +01:00
|
|
|
#ifdef NEEDS_SYS_PARAM_H
|
2005-12-05 20:54:29 +01:00
|
|
|
#include <sys/param.h>
|
2012-12-14 20:56:58 +01:00
|
|
|
#endif
|
2005-12-05 20:54:29 +01:00
|
|
|
#include <sys/types.h>
|
|
|
|
#include <dirent.h>
|
2006-12-19 23:34:12 +01:00
|
|
|
#include <sys/time.h>
|
|
|
|
#include <time.h>
|
|
|
|
#include <signal.h>
|
2007-12-01 21:24:59 +01:00
|
|
|
#include <assert.h>
|
|
|
|
#include <regex.h>
|
|
|
|
#include <utime.h>
|
2010-11-04 02:35:10 +01:00
|
|
|
#include <syslog.h>
|
2018-11-14 02:10:43 +01:00
|
|
|
#if !defined(NO_POLL_H)
|
|
|
|
#include <poll.h>
|
|
|
|
#elif !defined(NO_SYS_POLL_H)
|
2010-11-04 02:35:21 +01:00
|
|
|
#include <sys/poll.h>
|
2010-10-27 10:39:52 +02:00
|
|
|
#else
|
2018-11-14 02:10:43 +01:00
|
|
|
/* Pull the compat stuff */
|
2010-10-27 10:39:52 +02:00
|
|
|
#include <poll.h>
|
|
|
|
#endif
|
2015-03-08 08:14:36 +01:00
|
|
|
#ifdef HAVE_BSD_SYSCTL
|
|
|
|
#include <sys/sysctl.h>
|
|
|
|
#endif
|
2013-05-30 01:53:28 +02:00
|
|
|
|
2017-07-03 16:41:37 +02:00
|
|
|
#if defined(__CYGWIN__)
|
2018-12-15 05:33:30 +01:00
|
|
|
#include "compat/win32/path-utils.h"
|
2017-07-03 16:41:37 +02:00
|
|
|
#endif
|
2011-10-31 20:12:42 +01:00
|
|
|
#if defined(__MINGW32__)
|
|
|
|
/* pull in Windows compatibility stuff */
|
2018-12-15 05:33:30 +01:00
|
|
|
#include "compat/win32/path-utils.h"
|
2011-10-31 20:12:42 +01:00
|
|
|
#include "compat/mingw.h"
|
|
|
|
#elif defined(_MSC_VER)
|
2019-04-08 13:26:16 +02:00
|
|
|
#include "compat/win32/path-utils.h"
|
2011-10-31 20:12:42 +01:00
|
|
|
#include "compat/msvc.h"
|
|
|
|
#else
|
2015-03-08 11:12:46 +01:00
|
|
|
#include <sys/utsname.h>
|
2007-12-01 21:24:59 +01:00
|
|
|
#include <sys/wait.h>
|
2011-03-18 21:23:52 +01:00
|
|
|
#include <sys/resource.h>
|
2006-12-19 23:34:12 +01:00
|
|
|
#include <sys/socket.h>
|
2007-11-13 21:05:01 +01:00
|
|
|
#include <sys/ioctl.h>
|
2010-01-11 11:41:01 +01:00
|
|
|
#include <termios.h>
|
2008-01-24 19:34:46 +01:00
|
|
|
#ifndef NO_SYS_SELECT_H
|
2007-11-13 21:05:01 +01:00
|
|
|
#include <sys/select.h>
|
2008-01-24 19:34:46 +01:00
|
|
|
#endif
|
2006-12-19 23:34:12 +01:00
|
|
|
#include <netinet/in.h>
|
|
|
|
#include <netinet/tcp.h>
|
|
|
|
#include <arpa/inet.h>
|
|
|
|
#include <netdb.h>
|
|
|
|
#include <pwd.h>
|
2011-12-10 11:34:14 +01:00
|
|
|
#include <sys/un.h>
|
2010-10-27 10:39:52 +02:00
|
|
|
#ifndef NO_INTTYPES_H
|
2007-01-25 22:11:40 +01:00
|
|
|
#include <inttypes.h>
|
2010-10-27 10:39:52 +02:00
|
|
|
#else
|
|
|
|
#include <stdint.h>
|
|
|
|
#endif
|
2012-09-19 12:03:30 +02:00
|
|
|
#ifdef NO_INTPTR_T
|
|
|
|
/*
|
2018-08-08 13:49:58 +02:00
|
|
|
* On I16LP32, ILP32 and LP64 "long" is the safe bet, however
|
2012-09-19 12:03:30 +02:00
|
|
|
* on LLP86, IL33LLP64 and P64 it needs to be "long long",
|
|
|
|
* while on IP16 and IP16L32 it is "int" (resp. "short")
|
|
|
|
* Size needs to match (or exceed) 'sizeof(void *)'.
|
|
|
|
* We can't take "long long" here as not everybody has it.
|
|
|
|
*/
|
|
|
|
typedef long intptr_t;
|
|
|
|
typedef unsigned long uintptr_t;
|
|
|
|
#endif
|
2007-01-16 02:34:49 +01:00
|
|
|
#undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */
|
2006-12-19 23:34:12 +01:00
|
|
|
#include <grp.h>
|
2007-01-16 02:34:49 +01:00
|
|
|
#define _ALL_SOURCE 1
|
2007-03-03 19:28:52 +01:00
|
|
|
#endif
|
2006-12-19 23:34:12 +01:00
|
|
|
|
git on Mac OS and precomposed unicode
Mac OS X mangles file names containing unicode on file systems HFS+,
VFAT or SAMBA. When a file using unicode code points outside ASCII
is created on a HFS+ drive, the file name is converted into
decomposed unicode and written to disk. No conversion is done if
the file name is already decomposed unicode.
Calling open("\xc3\x84", ...) with a precomposed "Ä" yields the same
result as open("\x41\xcc\x88",...) with a decomposed "Ä".
As a consequence, readdir() returns the file names in decomposed
unicode, even if the user expects precomposed unicode. Unlike on
HFS+, Mac OS X stores files on a VFAT drive (e.g. an USB drive) in
precomposed unicode, but readdir() still returns file names in
decomposed unicode. When a git repository is stored on a network
share using SAMBA, file names are send over the wire and written to
disk on the remote system in precomposed unicode, but Mac OS X
readdir() returns decomposed unicode to be compatible with its
behaviour on HFS+ and VFAT.
The unicode decomposition causes many problems:
- The names "git add" and other commands get from the end user may
often be precomposed form (the decomposed form is not easily input
from the keyboard), but when the commands read from the filesystem
to see what it is going to update the index with already is on the
filesystem, readdir() will give decomposed form, which is different.
- Similarly "git log", "git mv" and all other commands that need to
compare pathnames found on the command line (often but not always
precomposed form; a command line input resulting from globbing may
be in decomposed) with pathnames found in the tree objects (should
be precomposed form to be compatible with other systems and for
consistency in general).
- The same for names stored in the index, which should be
precomposed, that may need to be compared with the names read from
readdir().
NFS mounted from Linux is fully transparent and does not suffer from
the above.
As Mac OS X treats precomposed and decomposed file names as equal,
we can
- wrap readdir() on Mac OS X to return the precomposed form, and
- normalize decomposed form given from the command line also to the
precomposed form,
to ensure that all pathnames used in Git are always in the
precomposed form. This behaviour can be requested by setting
"core.precomposedunicode" configuration variable to true.
The code in compat/precomposed_utf8.c implements basically 4 new
functions: precomposed_utf8_opendir(), precomposed_utf8_readdir(),
precomposed_utf8_closedir() and precompose_argv(). The first three
are to wrap opendir(3), readdir(3), and closedir(3) functions.
The argv[] conversion allows to use the TAB filename completion done
by the shell on command line. It tolerates other tools which use
readdir() to feed decomposed file names into git.
When creating a new git repository with "git init" or "git clone",
"core.precomposedunicode" will be set "false".
The user needs to activate this feature manually. She typically
sets core.precomposedunicode to "true" on HFS and VFAT, or file
systems mounted via SAMBA.
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-07-08 15:50:25 +02:00
|
|
|
/* used on Mac OS X */
|
|
|
|
#ifdef PRECOMPOSE_UNICODE
|
|
|
|
#include "compat/precompose_utf8.h"
|
|
|
|
#else
|
MacOS: precompose_argv_prefix()
The following sequence leads to a "BUG" assertion running under MacOS:
DIR=git-test-restore-p
Adiarnfd=$(printf 'A\314\210')
DIRNAME=xx${Adiarnfd}yy
mkdir $DIR &&
cd $DIR &&
git init &&
mkdir $DIRNAME &&
cd $DIRNAME &&
echo "Initial" >file &&
git add file &&
echo "One more line" >>file &&
echo y | git restore -p .
Initialized empty Git repository in /tmp/git-test-restore-p/.git/
BUG: pathspec.c:495: error initializing pathspec_item
Cannot close git diff-index --cached --numstat
[snip]
The command `git restore` is run from a directory inside a Git repo.
Git needs to split the $CWD into 2 parts:
The path to the repo and "the rest", if any.
"The rest" becomes a "prefix" later used inside the pathspec code.
As an example, "/path/to/repo/dir-inside-repå" would determine
"/path/to/repo" as the root of the repo, the place where the
configuration file .git/config is found.
The rest becomes the prefix ("dir-inside-repå"), from where the
pathspec machinery expands the ".", more about this later.
If there is a decomposed form, (making the decomposing visible like this),
"dir-inside-rep°a" doesn't match "dir-inside-repå".
Git commands need to:
(a) read the configuration variable "core.precomposeunicode"
(b) precocompose argv[]
(c) precompose the prefix, if there was any
The first commit,
76759c7dff53 "git on Mac OS and precomposed unicode"
addressed (a) and (b).
The call to precompose_argv() was added into parse-options.c,
because that seemed to be a good place when the patch was written.
Commands that don't use parse-options need to do (a) and (b) themselfs.
The commands `diff-files`, `diff-index`, `diff-tree` and `diff`
learned (a) and (b) in
commit 90a78b83e0b8 "diff: run arguments through precompose_argv"
Branch names (or refs in general) using decomposed code points
resulting in decomposed file names had been fixed in
commit 8e712ef6fc97 "Honor core.precomposeUnicode in more places"
The bug report from above shows 2 things:
- more commands need to handle precomposed unicode
- (c) should be implemented for all commands using pathspecs
Solution:
precompose_argv() now handles the prefix (if needed), and is renamed into
precompose_argv_prefix().
Inside this function the config variable core.precomposeunicode is read
into the global variable precomposed_unicode, as before.
This reading is skipped if precomposed_unicode had been read before.
The original patch for preocomposed unicode, 76759c7dff53, placed
precompose_argv() into parse-options.c
Now add it into git.c::run_builtin() as well. Existing precompose
calls in diff-files.c and others may become redundant, and if we
audit the callflows that reach these places to make sure that they
can never be reached without going through the new call added to
run_builtin(), we might be able to remove these existing ones.
But in this commit, we do not bother to do so and leave these
precompose callsites as they are. Because precompose() is
idempotent and can be called on an already precomposed string
safely, this is safer than removing existing calls without fully
vetting the callflows.
There is certainly room for cleanups - this change intends to be a bug fix.
Cleanups needs more tests in e.g. t/t3910-mac-os-precompose.sh, and should
be done in future commits.
[1] git-bugreport-2021-01-06-1209.txt (git can't deal with special characters)
[2] https://lore.kernel.org/git/A102844A-9501-4A86-854D-E3B387D378AA@icloud.com/
Reported-by: Daniel Troger <random_n0body@icloud.com>
Helped-By: Philippe Blain <levraiphilippeblain@gmail.com>
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-02-03 17:28:23 +01:00
|
|
|
static inline const char *precompose_argv_prefix(int argc, const char **argv, const char *prefix)
|
compat-util: type-check parameters of no-op replacement functions
When there is no need to run a specific function on certain platforms,
we often #define an empty function to swallow its parameters and
make it into a no-op, e.g.
#define precompose_argv(c,v) /* no-op */
While this guarantees that no unneeded code is generated, it also
discards type and other checks on these parameters, e.g. a new code
written with the argv-array API (diff_args is of type "struct
argv_array" that has .argc and .argv members):
precompose_argv(diff_args.argc, diff_args.argv);
must be updated to use "struct strvec diff_args" with .nr and .v
members, like so:
precompose_argv(diff_args.nr, diff_args.v);
after the argv-array API has been updated to the strvec API.
However, the "no oop" C preprocessor macro is too aggressive to
discard what is unused, and did not catch such a call that was left
unconverted.
Using a "static inline" function whose body is a no-op should still
result in the same binary with decent compilers yet catch such a
reference to a missing field or passing a value of a wrong type.
While at it, I notice that precompute_str() has never been used
anywhere in the code, since it was introduced at 76759c7d (git on
Mac OS and precomposed unicode, 2012-07-08). Instead of turning it
into a static inline, just remove it.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-08-07 02:25:37 +02:00
|
|
|
{
|
MacOS: precompose_argv_prefix()
The following sequence leads to a "BUG" assertion running under MacOS:
DIR=git-test-restore-p
Adiarnfd=$(printf 'A\314\210')
DIRNAME=xx${Adiarnfd}yy
mkdir $DIR &&
cd $DIR &&
git init &&
mkdir $DIRNAME &&
cd $DIRNAME &&
echo "Initial" >file &&
git add file &&
echo "One more line" >>file &&
echo y | git restore -p .
Initialized empty Git repository in /tmp/git-test-restore-p/.git/
BUG: pathspec.c:495: error initializing pathspec_item
Cannot close git diff-index --cached --numstat
[snip]
The command `git restore` is run from a directory inside a Git repo.
Git needs to split the $CWD into 2 parts:
The path to the repo and "the rest", if any.
"The rest" becomes a "prefix" later used inside the pathspec code.
As an example, "/path/to/repo/dir-inside-repå" would determine
"/path/to/repo" as the root of the repo, the place where the
configuration file .git/config is found.
The rest becomes the prefix ("dir-inside-repå"), from where the
pathspec machinery expands the ".", more about this later.
If there is a decomposed form, (making the decomposing visible like this),
"dir-inside-rep°a" doesn't match "dir-inside-repå".
Git commands need to:
(a) read the configuration variable "core.precomposeunicode"
(b) precocompose argv[]
(c) precompose the prefix, if there was any
The first commit,
76759c7dff53 "git on Mac OS and precomposed unicode"
addressed (a) and (b).
The call to precompose_argv() was added into parse-options.c,
because that seemed to be a good place when the patch was written.
Commands that don't use parse-options need to do (a) and (b) themselfs.
The commands `diff-files`, `diff-index`, `diff-tree` and `diff`
learned (a) and (b) in
commit 90a78b83e0b8 "diff: run arguments through precompose_argv"
Branch names (or refs in general) using decomposed code points
resulting in decomposed file names had been fixed in
commit 8e712ef6fc97 "Honor core.precomposeUnicode in more places"
The bug report from above shows 2 things:
- more commands need to handle precomposed unicode
- (c) should be implemented for all commands using pathspecs
Solution:
precompose_argv() now handles the prefix (if needed), and is renamed into
precompose_argv_prefix().
Inside this function the config variable core.precomposeunicode is read
into the global variable precomposed_unicode, as before.
This reading is skipped if precomposed_unicode had been read before.
The original patch for preocomposed unicode, 76759c7dff53, placed
precompose_argv() into parse-options.c
Now add it into git.c::run_builtin() as well. Existing precompose
calls in diff-files.c and others may become redundant, and if we
audit the callflows that reach these places to make sure that they
can never be reached without going through the new call added to
run_builtin(), we might be able to remove these existing ones.
But in this commit, we do not bother to do so and leave these
precompose callsites as they are. Because precompose() is
idempotent and can be called on an already precomposed string
safely, this is safer than removing existing calls without fully
vetting the callflows.
There is certainly room for cleanups - this change intends to be a bug fix.
Cleanups needs more tests in e.g. t/t3910-mac-os-precompose.sh, and should
be done in future commits.
[1] git-bugreport-2021-01-06-1209.txt (git can't deal with special characters)
[2] https://lore.kernel.org/git/A102844A-9501-4A86-854D-E3B387D378AA@icloud.com/
Reported-by: Daniel Troger <random_n0body@icloud.com>
Helped-By: Philippe Blain <levraiphilippeblain@gmail.com>
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-02-03 17:28:23 +01:00
|
|
|
return prefix;
|
compat-util: type-check parameters of no-op replacement functions
When there is no need to run a specific function on certain platforms,
we often #define an empty function to swallow its parameters and
make it into a no-op, e.g.
#define precompose_argv(c,v) /* no-op */
While this guarantees that no unneeded code is generated, it also
discards type and other checks on these parameters, e.g. a new code
written with the argv-array API (diff_args is of type "struct
argv_array" that has .argc and .argv members):
precompose_argv(diff_args.argc, diff_args.argv);
must be updated to use "struct strvec diff_args" with .nr and .v
members, like so:
precompose_argv(diff_args.nr, diff_args.v);
after the argv-array API has been updated to the strvec API.
However, the "no oop" C preprocessor macro is too aggressive to
discard what is unused, and did not catch such a call that was left
unconverted.
Using a "static inline" function whose body is a no-op should still
result in the same binary with decent compilers yet catch such a
reference to a missing field or passing a value of a wrong type.
While at it, I notice that precompute_str() has never been used
anywhere in the code, since it was introduced at 76759c7d (git on
Mac OS and precomposed unicode, 2012-07-08). Instead of turning it
into a static inline, just remove it.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-08-07 02:25:37 +02:00
|
|
|
}
|
2021-04-04 08:17:45 +02:00
|
|
|
static inline const char *precompose_string_if_needed(const char *in)
|
|
|
|
{
|
|
|
|
return in;
|
|
|
|
}
|
|
|
|
|
2015-10-05 05:45:26 +02:00
|
|
|
#define probe_utf8_pathname_composition()
|
git on Mac OS and precomposed unicode
Mac OS X mangles file names containing unicode on file systems HFS+,
VFAT or SAMBA. When a file using unicode code points outside ASCII
is created on a HFS+ drive, the file name is converted into
decomposed unicode and written to disk. No conversion is done if
the file name is already decomposed unicode.
Calling open("\xc3\x84", ...) with a precomposed "Ä" yields the same
result as open("\x41\xcc\x88",...) with a decomposed "Ä".
As a consequence, readdir() returns the file names in decomposed
unicode, even if the user expects precomposed unicode. Unlike on
HFS+, Mac OS X stores files on a VFAT drive (e.g. an USB drive) in
precomposed unicode, but readdir() still returns file names in
decomposed unicode. When a git repository is stored on a network
share using SAMBA, file names are send over the wire and written to
disk on the remote system in precomposed unicode, but Mac OS X
readdir() returns decomposed unicode to be compatible with its
behaviour on HFS+ and VFAT.
The unicode decomposition causes many problems:
- The names "git add" and other commands get from the end user may
often be precomposed form (the decomposed form is not easily input
from the keyboard), but when the commands read from the filesystem
to see what it is going to update the index with already is on the
filesystem, readdir() will give decomposed form, which is different.
- Similarly "git log", "git mv" and all other commands that need to
compare pathnames found on the command line (often but not always
precomposed form; a command line input resulting from globbing may
be in decomposed) with pathnames found in the tree objects (should
be precomposed form to be compatible with other systems and for
consistency in general).
- The same for names stored in the index, which should be
precomposed, that may need to be compared with the names read from
readdir().
NFS mounted from Linux is fully transparent and does not suffer from
the above.
As Mac OS X treats precomposed and decomposed file names as equal,
we can
- wrap readdir() on Mac OS X to return the precomposed form, and
- normalize decomposed form given from the command line also to the
precomposed form,
to ensure that all pathnames used in Git are always in the
precomposed form. This behaviour can be requested by setting
"core.precomposedunicode" configuration variable to true.
The code in compat/precomposed_utf8.c implements basically 4 new
functions: precomposed_utf8_opendir(), precomposed_utf8_readdir(),
precomposed_utf8_closedir() and precompose_argv(). The first three
are to wrap opendir(3), readdir(3), and closedir(3) functions.
The argv[] conversion allows to use the TAB filename completion done
by the shell on command line. It tolerates other tools which use
readdir() to feed decomposed file names into git.
When creating a new git repository with "git init" or "git clone",
"core.precomposedunicode" will be set "false".
The user needs to activate this feature manually. She typically
sets core.precomposedunicode to "true" on HFS and VFAT, or file
systems mounted via SAMBA.
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-07-08 15:50:25 +02:00
|
|
|
#endif
|
|
|
|
|
2012-08-24 12:31:03 +02:00
|
|
|
#ifdef MKDIR_WO_TRAILING_SLASH
|
|
|
|
#define mkdir(a,b) compat_mkdir_wo_trailing_slash((a),(b))
|
2019-04-29 10:28:14 +02:00
|
|
|
int compat_mkdir_wo_trailing_slash(const char*, mode_t);
|
2012-08-24 12:31:03 +02:00
|
|
|
#endif
|
|
|
|
|
2012-09-08 18:54:34 +02:00
|
|
|
#ifdef NO_STRUCT_ITIMERVAL
|
|
|
|
struct itimerval {
|
|
|
|
struct timeval it_interval;
|
|
|
|
struct timeval it_value;
|
2014-08-29 18:42:33 +02:00
|
|
|
};
|
2012-09-08 18:54:34 +02:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef NO_SETITIMER
|
compat-util: type-check parameters of no-op replacement functions
When there is no need to run a specific function on certain platforms,
we often #define an empty function to swallow its parameters and
make it into a no-op, e.g.
#define precompose_argv(c,v) /* no-op */
While this guarantees that no unneeded code is generated, it also
discards type and other checks on these parameters, e.g. a new code
written with the argv-array API (diff_args is of type "struct
argv_array" that has .argc and .argv members):
precompose_argv(diff_args.argc, diff_args.argv);
must be updated to use "struct strvec diff_args" with .nr and .v
members, like so:
precompose_argv(diff_args.nr, diff_args.v);
after the argv-array API has been updated to the strvec API.
However, the "no oop" C preprocessor macro is too aggressive to
discard what is unused, and did not catch such a call that was left
unconverted.
Using a "static inline" function whose body is a no-op should still
result in the same binary with decent compilers yet catch such a
reference to a missing field or passing a value of a wrong type.
While at it, I notice that precompute_str() has never been used
anywhere in the code, since it was introduced at 76759c7d (git on
Mac OS and precomposed unicode, 2012-07-08). Instead of turning it
into a static inline, just remove it.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-08-07 02:25:37 +02:00
|
|
|
static inline int setitimer(int which, const struct itimerval *value, struct itimerval *newvalue) {
|
2020-12-15 22:26:17 +01:00
|
|
|
return 0; /* pretend success */
|
compat-util: type-check parameters of no-op replacement functions
When there is no need to run a specific function on certain platforms,
we often #define an empty function to swallow its parameters and
make it into a no-op, e.g.
#define precompose_argv(c,v) /* no-op */
While this guarantees that no unneeded code is generated, it also
discards type and other checks on these parameters, e.g. a new code
written with the argv-array API (diff_args is of type "struct
argv_array" that has .argc and .argv members):
precompose_argv(diff_args.argc, diff_args.argv);
must be updated to use "struct strvec diff_args" with .nr and .v
members, like so:
precompose_argv(diff_args.nr, diff_args.v);
after the argv-array API has been updated to the strvec API.
However, the "no oop" C preprocessor macro is too aggressive to
discard what is unused, and did not catch such a call that was left
unconverted.
Using a "static inline" function whose body is a no-op should still
result in the same binary with decent compilers yet catch such a
reference to a missing field or passing a value of a wrong type.
While at it, I notice that precompute_str() has never been used
anywhere in the code, since it was introduced at 76759c7d (git on
Mac OS and precomposed unicode, 2012-07-08). Instead of turning it
into a static inline, just remove it.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-08-07 02:25:37 +02:00
|
|
|
}
|
2012-09-08 18:54:34 +02:00
|
|
|
#endif
|
|
|
|
|
2009-05-31 10:35:51 +02:00
|
|
|
#ifndef NO_LIBGEN_H
|
|
|
|
#include <libgen.h>
|
|
|
|
#else
|
|
|
|
#define basename gitbasename
|
2019-04-29 10:28:14 +02:00
|
|
|
char *gitbasename(char *);
|
2016-01-12 08:57:36 +01:00
|
|
|
#define dirname gitdirname
|
2019-04-29 10:28:14 +02:00
|
|
|
char *gitdirname(char *);
|
2009-05-31 10:35:51 +02:00
|
|
|
#endif
|
|
|
|
|
2006-12-19 23:34:12 +01:00
|
|
|
#ifndef NO_ICONV
|
|
|
|
#include <iconv.h>
|
|
|
|
#endif
|
2005-12-05 20:54:29 +01:00
|
|
|
|
2008-07-09 23:29:00 +02:00
|
|
|
#ifndef NO_OPENSSL
|
2015-02-06 10:35:31 +01:00
|
|
|
#ifdef __APPLE__
|
git-compat-util: suppress unavoidable Apple-specific deprecation warnings
With the release of Mac OS X 10.7 in July 2011, Apple deprecated all
openssl.h functionality due to OpenSSL ABI (application binary
interface) instability, resulting in an explosion of compilation
warnings about deprecated SSL, SHA1, and X509 functions (among others).
61067954ce (cache.h: eliminate SHA-1 deprecation warnings on Mac OS X;
2013-05-19) and be4c828b76 (imap-send: eliminate HMAC deprecation
warnings on Mac OS X; 2013-05-19) attempted to ameliorate the situation
by taking advantage of drop-in replacement functionality provided by
Apple's (ABI-stable) CommonCrypto facility, however CommonCrypto
supplies only a subset of deprecated OpenSSL functionality, thus a host
of warnings remain.
Despite this shortcoming, it was hoped that Apple would ultimately
provide CommonCrypto replacements for all deprecated OpenSSL
functionality, and that the effort started by 61067954ce and be4c828b76
would be continued and eventually eliminate all deprecation warnings.
However, now 3.5 years later, and with Mac OS X at 10.10, the hoped-for
CommonCrypto replacements have not yet materialized, nor is there any
indication that they will be forthcoming.
These Apple-specific warnings are pure noise: they don't tell us
anything useful and we have no control over them, nor is Apple likely to
provide replacements any time soon. Such noise may obscure other
legitimate warnings, therefore silence them.
Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-12-17 00:19:36 +01:00
|
|
|
#define __AVAILABILITY_MACROS_USES_AVAILABILITY 0
|
2015-02-06 10:35:31 +01:00
|
|
|
#include <AvailabilityMacros.h>
|
|
|
|
#undef DEPRECATED_ATTRIBUTE
|
|
|
|
#define DEPRECATED_ATTRIBUTE
|
|
|
|
#undef __AVAILABILITY_MACROS_USES_AVAILABILITY
|
|
|
|
#endif
|
2008-07-09 23:29:00 +02:00
|
|
|
#include <openssl/ssl.h>
|
|
|
|
#include <openssl/err.h>
|
|
|
|
#endif
|
|
|
|
|
2018-04-15 17:36:17 +02:00
|
|
|
#ifdef HAVE_SYSINFO
|
|
|
|
# include <sys/sysinfo.h>
|
|
|
|
#endif
|
|
|
|
|
2013-02-25 20:30:19 +01:00
|
|
|
/* On most systems <netdb.h> would have given us this, but
|
|
|
|
* not on some systems (e.g. z/OS).
|
|
|
|
*/
|
|
|
|
#ifndef NI_MAXHOST
|
|
|
|
#define NI_MAXHOST 1025
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef NI_MAXSERV
|
|
|
|
#define NI_MAXSERV 32
|
|
|
|
#endif
|
|
|
|
|
2006-09-16 07:47:21 +02:00
|
|
|
/* On most systems <limits.h> would have given us this, but
|
|
|
|
* not on some systems (e.g. GNU/Hurd).
|
|
|
|
*/
|
|
|
|
#ifndef PATH_MAX
|
|
|
|
#define PATH_MAX 4096
|
|
|
|
#endif
|
|
|
|
|
use uintmax_t for timestamps
Previously, we used `unsigned long` for timestamps. This was only a good
choice on Linux, where we know implicitly that `unsigned long` is what is
used for `time_t`.
However, we want to use a different data type for timestamps for two
reasons:
- there is nothing that says that `unsigned long` should be the same data
type as `time_t`, and indeed, on 64-bit Windows for example, it is not:
`unsigned long` is 32-bit but `time_t` is 64-bit.
- even on 32-bit Linux, where `unsigned long` (and thereby `time_t`) is
32-bit, we *want* to be able to encode timestamps in Git that are
currently absurdly far in the future, *even if* the system library is
not able to format those timestamps into date strings.
So let's just switch to the maximal integer type available, which should
be at least 64-bit for all practical purposes these days. It certainly
cannot be worse than `unsigned long`, so...
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-04-26 21:29:42 +02:00
|
|
|
typedef uintmax_t timestamp_t;
|
|
|
|
#define PRItime PRIuMAX
|
|
|
|
#define parse_timestamp strtoumax
|
|
|
|
#define TIME_MAX UINTMAX_MAX
|
name-rev: avoid cutoff timestamp underflow
When 'git name-rev' is invoked with commit-ish parameters, it tries to
save some work, and doesn't visit commits older than the committer
date of the oldest given commit minus a one day worth of slop. Since
our 'timestamp_t' is an unsigned type, this leads to a timestamp
underflow when the committer date of the oldest given commit is within
a day of the UNIX epoch. As a result the cutoff timestamp ends up
far-far in the future, and 'git name-rev' doesn't visit any commits,
and names each given commit as 'undefined'.
Check whether subtracting the slop from the oldest committer date
would lead to an underflow, and use no cutoff in that case. We don't
have a TIME_MIN constant, dddbad728c (timestamp_t: a new data type for
timestamps, 2017-04-26) didn't add one, so do it now.
Note that the type of the cutoff timestamp variable used to be signed
before 5589e87fd8 (name-rev: change a "long" variable to timestamp_t,
2017-05-20). The behavior was still the same even back then, but the
underflow didn't happen when substracting the slop from the oldest
committer date, but when comparing the signed cutoff timestamp with
unsigned committer dates in name_rev(). IOW, this underflow bug is as
old as 'git name-rev' itself.
Helped-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-24 09:32:13 +02:00
|
|
|
#define TIME_MIN 0
|
2017-04-21 12:45:44 +02:00
|
|
|
|
2007-12-03 21:55:57 +01:00
|
|
|
#ifndef PATH_SEP
|
|
|
|
#define PATH_SEP ':'
|
|
|
|
#endif
|
|
|
|
|
2010-04-13 11:07:13 +02:00
|
|
|
#ifdef HAVE_PATHS_H
|
|
|
|
#include <paths.h>
|
|
|
|
#endif
|
|
|
|
#ifndef _PATH_DEFPATH
|
|
|
|
#define _PATH_DEFPATH "/usr/local/bin:/usr/bin:/bin"
|
|
|
|
#endif
|
|
|
|
|
2018-10-30 19:40:04 +01:00
|
|
|
#ifndef platform_core_config
|
|
|
|
static inline int noop_core_config(const char *var, const char *value, void *cb)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#define platform_core_config noop_core_config
|
|
|
|
#endif
|
|
|
|
|
checkout: fix bug that makes checkout follow symlinks in leading path
Before checking out a file, we have to confirm that all of its leading
components are real existing directories. And to reduce the number of
lstat() calls in this process, we cache the last leading path known to
contain only directories. However, when a path collision occurs (e.g.
when checking out case-sensitive files in case-insensitive file
systems), a cached path might have its file type changed on disk,
leaving the cache on an invalid state. Normally, this doesn't bring
any bad consequences as we usually check out files in index order, and
therefore, by the time the cached path becomes outdated, we no longer
need it anyway (because all files in that directory would have already
been written).
But, there are some users of the checkout machinery that do not always
follow the index order. In particular: checkout-index writes the paths
in the same order that they appear on the CLI (or stdin); and the
delayed checkout feature -- used when a long-running filter process
replies with "status=delayed" -- postpones the checkout of some entries,
thus modifying the checkout order.
When we have to check out an out-of-order entry and the lstat() cache is
invalid (due to a previous path collision), checkout_entry() may end up
using the invalid data and thrusting that the leading components are
real directories when, in reality, they are not. In the best case
scenario, where the directory was replaced by a regular file, the user
will get an error: "fatal: unable to create file 'foo/bar': Not a
directory". But if the directory was replaced by a symlink, checkout
could actually end up following the symlink and writing the file at a
wrong place, even outside the repository. Since delayed checkout is
affected by this bug, it could be used by an attacker to write
arbitrary files during the clone of a maliciously crafted repository.
Some candidate solutions considered were to disable the lstat() cache
during unordered checkouts or sort the entries before passing them to
the checkout machinery. But both ideas include some performance penalty
and they don't future-proof the code against new unordered use cases.
Instead, we now manually reset the lstat cache whenever we successfully
remove a directory. Note: We are not even checking whether the directory
was the same as the lstat cache points to because we might face a
scenario where the paths refer to the same location but differ due to
case folding, precomposed UTF-8 issues, or the presence of `..`
components in the path. Two regression tests, with case-collisions and
utf8-collisions, are also added for both checkout-index and delayed
checkout.
Note: to make the previously mentioned clone attack unfeasible, it would
be sufficient to reset the lstat cache only after the remove_subtree()
call inside checkout_entry(). This is the place where we would remove a
directory whose path collides with the path of another entry that we are
currently trying to check out (possibly a symlink). However, in the
interest of a thorough fix that does not leave Git open to
similar-but-not-identical attack vectors, we decided to intercept
all `rmdir()` calls in one fell swoop.
This addresses CVE-2021-21300.
Co-authored-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
2020-12-10 14:27:55 +01:00
|
|
|
int lstat_cache_aware_rmdir(const char *path);
|
|
|
|
#if !defined(__MINGW32__) && !defined(_MSC_VER)
|
|
|
|
#define rmdir lstat_cache_aware_rmdir
|
|
|
|
#endif
|
|
|
|
|
2008-03-05 21:51:27 +01:00
|
|
|
#ifndef has_dos_drive_prefix
|
2014-08-16 23:48:33 +02:00
|
|
|
static inline int git_has_dos_drive_prefix(const char *path)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#define has_dos_drive_prefix git_has_dos_drive_prefix
|
2008-03-05 21:51:27 +01:00
|
|
|
#endif
|
|
|
|
|
2016-01-12 08:57:22 +01:00
|
|
|
#ifndef skip_dos_drive_prefix
|
|
|
|
static inline int git_skip_dos_drive_prefix(char **path)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#define skip_dos_drive_prefix git_skip_dos_drive_prefix
|
|
|
|
#endif
|
|
|
|
|
2014-08-16 23:48:33 +02:00
|
|
|
#ifndef is_dir_sep
|
|
|
|
static inline int git_is_dir_sep(int c)
|
|
|
|
{
|
|
|
|
return c == '/';
|
|
|
|
}
|
|
|
|
#define is_dir_sep git_is_dir_sep
|
2010-07-13 16:17:43 +02:00
|
|
|
#endif
|
|
|
|
|
2014-08-16 23:48:33 +02:00
|
|
|
#ifndef offset_1st_component
|
|
|
|
static inline int git_offset_1st_component(const char *path)
|
|
|
|
{
|
|
|
|
return is_dir_sep(path[0]);
|
|
|
|
}
|
|
|
|
#define offset_1st_component git_offset_1st_component
|
2008-03-05 21:51:27 +01:00
|
|
|
#endif
|
|
|
|
|
mingw: refuse to access paths with trailing spaces or periods
When creating a directory on Windows whose path ends in a space or a
period (or chains thereof), the Win32 API "helpfully" trims those. For
example, `mkdir("abc ");` will return success, but actually create a
directory called `abc` instead.
This stems back to the DOS days, when all file names had exactly 8
characters plus exactly 3 characters for the file extension, and the
only way to have shorter names was by padding with spaces.
Sadly, this "helpful" behavior is a bit inconsistent: after a successful
`mkdir("abc ");`, a `mkdir("abc /def")` will actually _fail_ (because
the directory `abc ` does not actually exist).
Even if it would work, we now have a serious problem because a Git
repository could contain directories `abc` and `abc `, and on Windows,
they would be "merged" unintentionally.
As these paths are illegal on Windows, anyway, let's disallow any
accesses to such paths on that Operating System.
For practical reasons, this behavior is still guarded by the
config setting `core.protectNTFS`: it is possible (and at least two
regression tests make use of it) to create commits without involving the
worktree. In such a scenario, it is of course possible -- even on
Windows -- to create such file names.
Among other consequences, this patch disallows submodules' paths to end
in spaces on Windows (which would formerly have confused Git enough to
try to write into incorrect paths, anyway).
While this patch does not fix a vulnerability on its own, it prevents an
attack vector that was exploited in demonstrations of a number of
recently-fixed security bugs.
The regression test added to `t/t7417-submodule-path-url.sh` reflects
that attack vector.
Note that we have to adjust the test case "prevent git~1 squatting on
Windows" in `t/t7415-submodule-names.sh` because of a very subtle issue.
It tries to clone two submodules whose names differ only in a trailing
period character, and as a consequence their git directories differ in
the same way. Previously, when Git tried to clone the second submodule,
it thought that the git directory already existed (because on Windows,
when you create a directory with the name `b.` it actually creates `b`),
but with this patch, the first submodule's clone will fail because of
the illegal name of the git directory. Therefore, when cloning the
second submodule, Git will take a different code path: a fresh clone
(without an existing git directory). Both code paths fail to clone the
second submodule, both because the the corresponding worktree directory
exists and is not empty, but the error messages are worded differently.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
2019-09-05 13:27:53 +02:00
|
|
|
#ifndef is_valid_path
|
|
|
|
#define is_valid_path(path) 1
|
|
|
|
#endif
|
|
|
|
|
2011-05-27 18:00:39 +02:00
|
|
|
#ifndef find_last_dir_sep
|
2014-08-16 23:48:33 +02:00
|
|
|
static inline char *git_find_last_dir_sep(const char *path)
|
|
|
|
{
|
|
|
|
return strrchr(path, '/');
|
|
|
|
}
|
|
|
|
#define find_last_dir_sep git_find_last_dir_sep
|
2011-05-27 18:00:39 +02:00
|
|
|
#endif
|
|
|
|
|
2020-03-27 01:36:43 +01:00
|
|
|
#ifndef has_dir_sep
|
|
|
|
static inline int git_has_dir_sep(const char *path)
|
|
|
|
{
|
|
|
|
return !!strchr(path, '/');
|
|
|
|
}
|
|
|
|
#define has_dir_sep(path) git_has_dir_sep(path)
|
|
|
|
#endif
|
|
|
|
|
2018-10-15 11:47:08 +02:00
|
|
|
#ifndef query_user_email
|
|
|
|
#define query_user_email() NULL
|
|
|
|
#endif
|
|
|
|
|
2019-01-03 22:03:50 +01:00
|
|
|
#ifdef __TANDEM
|
|
|
|
#include <floss.h(floss_execl,floss_execlp,floss_execv,floss_execvp)>
|
|
|
|
#include <floss.h(floss_getpwuid)>
|
|
|
|
#ifndef NSIG
|
|
|
|
/*
|
|
|
|
* NonStop NSE and NSX do not provide NSIG. SIGGUARDIAN(99) is the highest
|
|
|
|
* known, by detective work using kill -l as a list is all signals
|
|
|
|
* instead of signal.h where it should be.
|
|
|
|
*/
|
|
|
|
# define NSIG 100
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
2011-11-15 18:31:09 +01:00
|
|
|
#if defined(__HP_cc) && (__HP_cc >= 61000)
|
2011-03-07 13:13:15 +01:00
|
|
|
#define NORETURN __attribute__((noreturn))
|
|
|
|
#define NORETURN_PTR
|
2011-06-19 03:07:03 +02:00
|
|
|
#elif defined(__GNUC__) && !defined(NO_NORETURN)
|
2005-12-05 20:54:29 +01:00
|
|
|
#define NORETURN __attribute__((__noreturn__))
|
2009-09-30 20:05:50 +02:00
|
|
|
#define NORETURN_PTR __attribute__((__noreturn__))
|
2010-01-20 20:45:12 +01:00
|
|
|
#elif defined(_MSC_VER)
|
|
|
|
#define NORETURN __declspec(noreturn)
|
|
|
|
#define NORETURN_PTR
|
2005-12-05 20:54:29 +01:00
|
|
|
#else
|
|
|
|
#define NORETURN
|
2009-09-30 20:05:50 +02:00
|
|
|
#define NORETURN_PTR
|
2014-07-05 01:43:49 +02:00
|
|
|
#ifndef __GNUC__
|
2005-12-05 20:54:29 +01:00
|
|
|
#ifndef __attribute__
|
|
|
|
#define __attribute__(x)
|
|
|
|
#endif
|
|
|
|
#endif
|
2014-07-05 01:43:49 +02:00
|
|
|
#endif
|
2005-12-05 20:54:29 +01:00
|
|
|
|
2013-07-18 22:02:12 +02:00
|
|
|
/* The sentinel attribute is valid from gcc version 4.0 */
|
|
|
|
#if defined(__GNUC__) && (__GNUC__ >= 4)
|
|
|
|
#define LAST_ARG_MUST_BE_NULL __attribute__((sentinel))
|
|
|
|
#else
|
|
|
|
#define LAST_ARG_MUST_BE_NULL
|
|
|
|
#endif
|
|
|
|
|
2018-10-23 23:50:19 +02:00
|
|
|
#define MAYBE_UNUSED __attribute__((__unused__))
|
|
|
|
|
2009-08-18 21:26:55 +02:00
|
|
|
#include "compat/bswap.h"
|
|
|
|
|
2013-01-01 03:44:11 +01:00
|
|
|
#include "wildmatch.h"
|
|
|
|
|
2014-07-16 20:20:36 +02:00
|
|
|
struct strbuf;
|
|
|
|
|
2005-12-05 20:54:29 +01:00
|
|
|
/* General helper functions */
|
2019-04-29 10:28:14 +02:00
|
|
|
void vreportf(const char *prefix, const char *err, va_list params);
|
|
|
|
NORETURN void usage(const char *err);
|
2019-04-29 10:28:20 +02:00
|
|
|
NORETURN void usagef(const char *err, ...) __attribute__((format (printf, 1, 2)));
|
|
|
|
NORETURN void die(const char *err, ...) __attribute__((format (printf, 1, 2)));
|
|
|
|
NORETURN void die_errno(const char *err, ...) __attribute__((format (printf, 1, 2)));
|
|
|
|
int error(const char *err, ...) __attribute__((format (printf, 1, 2)));
|
|
|
|
int error_errno(const char *err, ...) __attribute__((format (printf, 1, 2)));
|
|
|
|
void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
|
|
|
|
void warning_errno(const char *err, ...) __attribute__((format (printf, 1, 2)));
|
2005-12-05 20:54:29 +01:00
|
|
|
|
2013-08-05 17:59:23 +02:00
|
|
|
#ifndef NO_OPENSSL
|
|
|
|
#ifdef APPLE_COMMON_CRYPTO
|
|
|
|
#include "compat/apple-common-crypto.h"
|
|
|
|
#else
|
|
|
|
#include <openssl/evp.h>
|
|
|
|
#include <openssl/hmac.h>
|
|
|
|
#endif /* APPLE_COMMON_CRYPTO */
|
|
|
|
#include <openssl/x509v3.h>
|
|
|
|
#endif /* NO_OPENSSL */
|
|
|
|
|
make error()'s constant return value more visible
When git is compiled with "gcc -Wuninitialized -O3", some
inlined calls provide an additional opportunity for the
compiler to do static analysis on variable initialization.
For example, with two functions like this:
int get_foo(int *foo)
{
if (something_that_might_fail() < 0)
return error("unable to get foo");
*foo = 0;
return 0;
}
void some_fun(void)
{
int foo;
if (get_foo(&foo) < 0)
return -1;
printf("foo is %d\n", foo);
}
If get_foo() is not inlined, then when compiling some_fun,
gcc sees only that a pointer to the local variable is
passed, and must assume that it is an out parameter that
is initialized after get_foo returns.
However, when get_foo() is inlined, the compiler may look at
all of the code together and see that some code paths in
get_foo() do not initialize the variable. As a result, it
prints a warning. But what the compiler can't see is that
error() always returns -1, and therefore we know that either
we return early from some_fun, or foo ends up initialized,
and the code is safe. The warning is a false positive.
If we can make the compiler aware that error() will always
return -1, it can do a better job of analysis. The simplest
method would be to inline the error() function. However,
this doesn't work, because gcc will not inline a variadc
function. We can work around this by defining a macro. This
relies on two gcc extensions:
1. Variadic macros (these are present in C99, but we do
not rely on that).
2. Gcc treats the "##" paste operator specially between a
comma and __VA_ARGS__, which lets our variadic macro
work even if no format parameters are passed to
error().
Since we are using these extra features, we hide the macro
behind an #ifdef. This is OK, though, because our goal was
just to help gcc.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-12-15 18:37:36 +01:00
|
|
|
/*
|
|
|
|
* Let callers be aware of the constant return value; this can help
|
2013-02-08 16:09:28 +01:00
|
|
|
* gcc with -Wuninitialized analysis. We restrict this trick to gcc, though,
|
|
|
|
* because some compilers may not support variadic macros. Since we're only
|
|
|
|
* trying to help gcc, anyway, it's OK; other compilers will fall back to
|
|
|
|
* using the function as usual.
|
make error()'s constant return value more visible
When git is compiled with "gcc -Wuninitialized -O3", some
inlined calls provide an additional opportunity for the
compiler to do static analysis on variable initialization.
For example, with two functions like this:
int get_foo(int *foo)
{
if (something_that_might_fail() < 0)
return error("unable to get foo");
*foo = 0;
return 0;
}
void some_fun(void)
{
int foo;
if (get_foo(&foo) < 0)
return -1;
printf("foo is %d\n", foo);
}
If get_foo() is not inlined, then when compiling some_fun,
gcc sees only that a pointer to the local variable is
passed, and must assume that it is an out parameter that
is initialized after get_foo returns.
However, when get_foo() is inlined, the compiler may look at
all of the code together and see that some code paths in
get_foo() do not initialize the variable. As a result, it
prints a warning. But what the compiler can't see is that
error() always returns -1, and therefore we know that either
we return early from some_fun, or foo ends up initialized,
and the code is safe. The warning is a false positive.
If we can make the compiler aware that error() will always
return -1, it can do a better job of analysis. The simplest
method would be to inline the error() function. However,
this doesn't work, because gcc will not inline a variadc
function. We can work around this by defining a macro. This
relies on two gcc extensions:
1. Variadic macros (these are present in C99, but we do
not rely on that).
2. Gcc treats the "##" paste operator specially between a
comma and __VA_ARGS__, which lets our variadic macro
work even if no format parameters are passed to
error().
Since we are using these extra features, we hide the macro
behind an #ifdef. This is OK, though, because our goal was
just to help gcc.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-12-15 18:37:36 +01:00
|
|
|
*/
|
2014-05-06 17:17:50 +02:00
|
|
|
#if defined(__GNUC__)
|
2014-05-06 17:14:42 +02:00
|
|
|
static inline int const_error(void)
|
|
|
|
{
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
#define error(...) (error(__VA_ARGS__), const_error())
|
2016-08-31 05:41:22 +02:00
|
|
|
#define error_errno(...) (error_errno(__VA_ARGS__), const_error())
|
make error()'s constant return value more visible
When git is compiled with "gcc -Wuninitialized -O3", some
inlined calls provide an additional opportunity for the
compiler to do static analysis on variable initialization.
For example, with two functions like this:
int get_foo(int *foo)
{
if (something_that_might_fail() < 0)
return error("unable to get foo");
*foo = 0;
return 0;
}
void some_fun(void)
{
int foo;
if (get_foo(&foo) < 0)
return -1;
printf("foo is %d\n", foo);
}
If get_foo() is not inlined, then when compiling some_fun,
gcc sees only that a pointer to the local variable is
passed, and must assume that it is an out parameter that
is initialized after get_foo returns.
However, when get_foo() is inlined, the compiler may look at
all of the code together and see that some code paths in
get_foo() do not initialize the variable. As a result, it
prints a warning. But what the compiler can't see is that
error() always returns -1, and therefore we know that either
we return early from some_fun, or foo ends up initialized,
and the code is safe. The warning is a false positive.
If we can make the compiler aware that error() will always
return -1, it can do a better job of analysis. The simplest
method would be to inline the error() function. However,
this doesn't work, because gcc will not inline a variadc
function. We can work around this by defining a macro. This
relies on two gcc extensions:
1. Variadic macros (these are present in C99, but we do
not rely on that).
2. Gcc treats the "##" paste operator specially between a
comma and __VA_ARGS__, which lets our variadic macro
work even if no format parameters are passed to
error().
Since we are using these extra features, we hide the macro
behind an #ifdef. This is OK, though, because our goal was
just to help gcc.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-12-15 18:37:36 +01:00
|
|
|
#endif
|
|
|
|
|
2020-10-15 21:30:04 +02:00
|
|
|
typedef void (*report_fn)(const char *, va_list params);
|
|
|
|
|
|
|
|
void set_die_routine(NORETURN_PTR report_fn routine);
|
|
|
|
void set_error_routine(report_fn routine);
|
|
|
|
report_fn get_error_routine(void);
|
|
|
|
void set_warn_routine(report_fn routine);
|
|
|
|
report_fn get_warn_routine(void);
|
2019-04-29 10:28:14 +02:00
|
|
|
void set_die_is_recursing_routine(int (*routine)(void));
|
2006-06-24 04:34:38 +02:00
|
|
|
|
2019-04-29 10:28:14 +02:00
|
|
|
int starts_with(const char *str, const char *prefix);
|
|
|
|
int istarts_with(const char *str, const char *prefix);
|
2008-01-03 10:23:12 +01:00
|
|
|
|
refactor skip_prefix to return a boolean
The skip_prefix() function returns a pointer to the content
past the prefix, or NULL if the prefix was not found. While
this is nice and simple, in practice it makes it hard to use
for two reasons:
1. When you want to conditionally skip or keep the string
as-is, you have to introduce a temporary variable.
For example:
tmp = skip_prefix(buf, "foo");
if (tmp)
buf = tmp;
2. It is verbose to check the outcome in a conditional, as
you need extra parentheses to silence compiler
warnings. For example:
if ((cp = skip_prefix(buf, "foo"))
/* do something with cp */
Both of these make it harder to use for long if-chains, and
we tend to use starts_with() instead. However, the first line
of "do something" is often to then skip forward in buf past
the prefix, either using a magic constant or with an extra
strlen(3) (which is generally computed at compile time, but
means we are repeating ourselves).
This patch refactors skip_prefix() to return a simple boolean,
and to provide the pointer value as an out-parameter. If the
prefix is not found, the out-parameter is untouched. This
lets you write:
if (skip_prefix(arg, "foo ", &arg))
do_foo(arg);
else if (skip_prefix(arg, "bar ", &arg))
do_bar(arg);
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-06-18 21:44:19 +02:00
|
|
|
/*
|
|
|
|
* If the string "str" begins with the string found in "prefix", return 1.
|
|
|
|
* The "out" parameter is set to "str + strlen(prefix)" (i.e., to the point in
|
|
|
|
* the string right after the prefix).
|
|
|
|
*
|
|
|
|
* Otherwise, return 0 and leave "out" untouched.
|
|
|
|
*
|
|
|
|
* Examples:
|
|
|
|
*
|
|
|
|
* [extract branch name, fail if not a branch]
|
|
|
|
* if (!skip_prefix(ref, "refs/heads/", &branch)
|
|
|
|
* return -1;
|
|
|
|
*
|
|
|
|
* [skip prefix if present, otherwise use whole string]
|
|
|
|
* skip_prefix(name, "refs/heads/", &name);
|
|
|
|
*/
|
|
|
|
static inline int skip_prefix(const char *str, const char *prefix,
|
|
|
|
const char **out)
|
2008-06-27 18:21:56 +02:00
|
|
|
{
|
2014-03-04 00:22:15 +01:00
|
|
|
do {
|
refactor skip_prefix to return a boolean
The skip_prefix() function returns a pointer to the content
past the prefix, or NULL if the prefix was not found. While
this is nice and simple, in practice it makes it hard to use
for two reasons:
1. When you want to conditionally skip or keep the string
as-is, you have to introduce a temporary variable.
For example:
tmp = skip_prefix(buf, "foo");
if (tmp)
buf = tmp;
2. It is verbose to check the outcome in a conditional, as
you need extra parentheses to silence compiler
warnings. For example:
if ((cp = skip_prefix(buf, "foo"))
/* do something with cp */
Both of these make it harder to use for long if-chains, and
we tend to use starts_with() instead. However, the first line
of "do something" is often to then skip forward in buf past
the prefix, either using a magic constant or with an extra
strlen(3) (which is generally computed at compile time, but
means we are repeating ourselves).
This patch refactors skip_prefix() to return a simple boolean,
and to provide the pointer value as an out-parameter. If the
prefix is not found, the out-parameter is untouched. This
lets you write:
if (skip_prefix(arg, "foo ", &arg))
do_foo(arg);
else if (skip_prefix(arg, "bar ", &arg))
do_bar(arg);
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-06-18 21:44:19 +02:00
|
|
|
if (!*prefix) {
|
|
|
|
*out = str;
|
|
|
|
return 1;
|
|
|
|
}
|
2014-03-04 00:22:15 +01:00
|
|
|
} while (*str++ == *prefix++);
|
refactor skip_prefix to return a boolean
The skip_prefix() function returns a pointer to the content
past the prefix, or NULL if the prefix was not found. While
this is nice and simple, in practice it makes it hard to use
for two reasons:
1. When you want to conditionally skip or keep the string
as-is, you have to introduce a temporary variable.
For example:
tmp = skip_prefix(buf, "foo");
if (tmp)
buf = tmp;
2. It is verbose to check the outcome in a conditional, as
you need extra parentheses to silence compiler
warnings. For example:
if ((cp = skip_prefix(buf, "foo"))
/* do something with cp */
Both of these make it harder to use for long if-chains, and
we tend to use starts_with() instead. However, the first line
of "do something" is often to then skip forward in buf past
the prefix, either using a magic constant or with an extra
strlen(3) (which is generally computed at compile time, but
means we are repeating ourselves).
This patch refactors skip_prefix() to return a simple boolean,
and to provide the pointer value as an out-parameter. If the
prefix is not found, the out-parameter is untouched. This
lets you write:
if (skip_prefix(arg, "foo ", &arg))
do_foo(arg);
else if (skip_prefix(arg, "bar ", &arg))
do_bar(arg);
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-06-18 21:44:19 +02:00
|
|
|
return 0;
|
2008-06-27 18:21:56 +02:00
|
|
|
}
|
|
|
|
|
2017-12-09 21:40:07 +01:00
|
|
|
/*
|
|
|
|
* If the string "str" is the same as the string in "prefix", then the "arg"
|
|
|
|
* parameter is set to the "def" parameter and 1 is returned.
|
|
|
|
* If the string "str" begins with the string found in "prefix" and then a
|
|
|
|
* "=" sign, then the "arg" parameter is set to "str + strlen(prefix) + 1"
|
|
|
|
* (i.e., to the point in the string right after the prefix and the "=" sign),
|
|
|
|
* and 1 is returned.
|
|
|
|
*
|
|
|
|
* Otherwise, return 0 and leave "arg" untouched.
|
|
|
|
*
|
|
|
|
* When we accept both a "--key" and a "--key=<val>" option, this function
|
|
|
|
* can be used instead of !strcmp(arg, "--key") and then
|
|
|
|
* skip_prefix(arg, "--key=", &arg) to parse such an option.
|
|
|
|
*/
|
|
|
|
int skip_to_optional_arg_default(const char *str, const char *prefix,
|
|
|
|
const char **arg, const char *def);
|
|
|
|
|
|
|
|
static inline int skip_to_optional_arg(const char *str, const char *prefix,
|
|
|
|
const char **arg)
|
|
|
|
{
|
|
|
|
return skip_to_optional_arg_default(str, prefix, arg, "");
|
|
|
|
}
|
|
|
|
|
2016-06-23 19:33:57 +02:00
|
|
|
/*
|
|
|
|
* Like skip_prefix, but promises never to read past "len" bytes of the input
|
|
|
|
* buffer, and returns the remaining number of bytes in "out" via "outlen".
|
|
|
|
*/
|
|
|
|
static inline int skip_prefix_mem(const char *buf, size_t len,
|
|
|
|
const char *prefix,
|
|
|
|
const char **out, size_t *outlen)
|
|
|
|
{
|
|
|
|
size_t prefix_len = strlen(prefix);
|
|
|
|
if (prefix_len <= len && !memcmp(buf, prefix, prefix_len)) {
|
|
|
|
*out = buf + prefix_len;
|
|
|
|
*outlen = len - prefix_len;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
add strip_suffix function
Many callers of ends_with want to not only find out whether
a string has a suffix, but want to also strip it off. Doing
that separately has two minor problems:
1. We often run over the string twice (once to find
the suffix, and then once more to find its length to
subtract the suffix length).
2. We have to specify the suffix length again, which means
either a magic number, or repeating ourselves with
strlen("suffix").
Just as we have skip_prefix to avoid these cases with
starts_with, we can add a strip_suffix to avoid them with
ends_with.
Note that we add two forms of strip_suffix here: one that
takes a string, with the resulting length as an
out-parameter; and one that takes a pointer/length pair, and
reuses the length as an out-parameter. The latter is more
efficient when the caller already has the length (e.g., when
using strbufs), but it can be easy to confuse the two, as
they take the same number and types of parameters.
For that reason, the "mem" form puts its length parameter
next to the buffer (since they are a pair), and the string
form puts it at the end (since it is an out-parameter). The
compiler can notice when you get the order wrong, which
should help prevent writing one when you meant the other.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-06-30 18:57:51 +02:00
|
|
|
/*
|
|
|
|
* If buf ends with suffix, return 1 and subtract the length of the suffix
|
|
|
|
* from *len. Otherwise, return 0 and leave *len untouched.
|
|
|
|
*/
|
|
|
|
static inline int strip_suffix_mem(const char *buf, size_t *len,
|
|
|
|
const char *suffix)
|
|
|
|
{
|
|
|
|
size_t suflen = strlen(suffix);
|
|
|
|
if (*len < suflen || memcmp(buf + (*len - suflen), suffix, suflen))
|
|
|
|
return 0;
|
|
|
|
*len -= suflen;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If str ends with suffix, return 1 and set *len to the size of the string
|
|
|
|
* without the suffix. Otherwise, return 0 and set *len to the size of the
|
|
|
|
* string.
|
|
|
|
*
|
|
|
|
* Note that we do _not_ NUL-terminate str to the new length.
|
|
|
|
*/
|
|
|
|
static inline int strip_suffix(const char *str, const char *suffix, size_t *len)
|
|
|
|
{
|
|
|
|
*len = strlen(str);
|
|
|
|
return strip_suffix_mem(str, len, suffix);
|
|
|
|
}
|
|
|
|
|
2014-06-30 18:58:08 +02:00
|
|
|
static inline int ends_with(const char *str, const char *suffix)
|
|
|
|
{
|
|
|
|
size_t len;
|
|
|
|
return strip_suffix(str, suffix, &len);
|
|
|
|
}
|
|
|
|
|
2017-01-28 22:38:21 +01:00
|
|
|
#define SWAP(a, b) do { \
|
|
|
|
void *_swap_a_ptr = &(a); \
|
|
|
|
void *_swap_b_ptr = &(b); \
|
|
|
|
unsigned char _swap_buffer[sizeof(a)]; \
|
|
|
|
memcpy(_swap_buffer, _swap_a_ptr, sizeof(a)); \
|
|
|
|
memcpy(_swap_a_ptr, _swap_b_ptr, sizeof(a) + \
|
|
|
|
BUILD_ASSERT_OR_ZERO(sizeof(a) == sizeof(b))); \
|
|
|
|
memcpy(_swap_b_ptr, _swap_buffer, sizeof(a)); \
|
|
|
|
} while (0)
|
|
|
|
|
2009-03-13 16:50:45 +01:00
|
|
|
#if defined(NO_MMAP) || defined(USE_WIN32_MMAP)
|
2005-12-05 20:54:29 +01:00
|
|
|
|
|
|
|
#ifndef PROT_READ
|
|
|
|
#define PROT_READ 1
|
|
|
|
#define PROT_WRITE 2
|
|
|
|
#define MAP_PRIVATE 1
|
|
|
|
#endif
|
|
|
|
|
2006-12-24 06:45:37 +01:00
|
|
|
#define mmap git_mmap
|
|
|
|
#define munmap git_munmap
|
2019-04-29 10:28:14 +02:00
|
|
|
void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
|
|
|
|
int git_munmap(void *start, size_t length);
|
2005-12-05 20:54:29 +01:00
|
|
|
|
2009-03-13 16:50:45 +01:00
|
|
|
#else /* NO_MMAP || USE_WIN32_MMAP */
|
|
|
|
|
|
|
|
#include <sys/mman.h>
|
|
|
|
|
|
|
|
#endif /* NO_MMAP || USE_WIN32_MMAP */
|
|
|
|
|
|
|
|
#ifdef NO_MMAP
|
|
|
|
|
2007-02-14 22:20:41 +01:00
|
|
|
/* This value must be multiple of (pagesize * 2) */
|
2006-12-24 06:46:13 +01:00
|
|
|
#define DEFAULT_PACKED_GIT_WINDOW_SIZE (1 * 1024 * 1024)
|
|
|
|
|
2005-12-05 20:54:29 +01:00
|
|
|
#else /* NO_MMAP */
|
|
|
|
|
2007-02-14 22:20:41 +01:00
|
|
|
/* This value must be multiple of (pagesize * 2) */
|
2007-01-05 04:28:08 +01:00
|
|
|
#define DEFAULT_PACKED_GIT_WINDOW_SIZE \
|
|
|
|
(sizeof(void*) >= 8 \
|
|
|
|
? 1 * 1024 * 1024 * 1024 \
|
|
|
|
: 32 * 1024 * 1024)
|
2005-12-05 20:54:29 +01:00
|
|
|
|
|
|
|
#endif /* NO_MMAP */
|
|
|
|
|
2010-05-14 11:31:39 +02:00
|
|
|
#ifndef MAP_FAILED
|
|
|
|
#define MAP_FAILED ((void *)-1)
|
|
|
|
#endif
|
|
|
|
|
2008-08-18 21:57:16 +02:00
|
|
|
#ifdef NO_ST_BLOCKS_IN_STRUCT_STAT
|
|
|
|
#define on_disk_bytes(st) ((st).st_size)
|
|
|
|
#else
|
|
|
|
#define on_disk_bytes(st) ((st).st_blocks * 512)
|
|
|
|
#endif
|
|
|
|
|
2014-12-04 03:24:17 +01:00
|
|
|
#ifdef NEEDS_MODE_TRANSLATION
|
|
|
|
#undef S_IFMT
|
|
|
|
#undef S_IFREG
|
|
|
|
#undef S_IFDIR
|
|
|
|
#undef S_IFLNK
|
|
|
|
#undef S_IFBLK
|
|
|
|
#undef S_IFCHR
|
|
|
|
#undef S_IFIFO
|
|
|
|
#undef S_IFSOCK
|
|
|
|
#define S_IFMT 0170000
|
|
|
|
#define S_IFREG 0100000
|
|
|
|
#define S_IFDIR 0040000
|
|
|
|
#define S_IFLNK 0120000
|
|
|
|
#define S_IFBLK 0060000
|
|
|
|
#define S_IFCHR 0020000
|
|
|
|
#define S_IFIFO 0010000
|
|
|
|
#define S_IFSOCK 0140000
|
|
|
|
#ifdef stat
|
|
|
|
#undef stat
|
|
|
|
#endif
|
|
|
|
#define stat(path, buf) git_stat(path, buf)
|
2019-04-29 10:28:14 +02:00
|
|
|
int git_stat(const char *, struct stat *);
|
2014-12-04 03:24:17 +01:00
|
|
|
#ifdef fstat
|
|
|
|
#undef fstat
|
|
|
|
#endif
|
|
|
|
#define fstat(fd, buf) git_fstat(fd, buf)
|
2019-04-29 10:28:14 +02:00
|
|
|
int git_fstat(int, struct stat *);
|
2014-12-04 03:24:17 +01:00
|
|
|
#ifdef lstat
|
|
|
|
#undef lstat
|
|
|
|
#endif
|
|
|
|
#define lstat(path, buf) git_lstat(path, buf)
|
2019-04-29 10:28:14 +02:00
|
|
|
int git_lstat(const char *, struct stat *);
|
2014-12-04 03:24:17 +01:00
|
|
|
#endif
|
|
|
|
|
2007-01-05 04:28:08 +01:00
|
|
|
#define DEFAULT_PACKED_GIT_LIMIT \
|
Increase core.packedGitLimit
When core.packedGitLimit is exceeded, git will close packs. If there
is a repack operation going on in parallel with a fetch, the fetch
might open a pack, and then be forced to close it due to
packedGitLimit being hit. The repack could then delete the pack
out from under the fetch, causing the fetch to fail.
Increase core.packedGitLimit's default value to prevent
this.
On current 64-bit x86_64 machines, 48 bits of address space are
available. It appears that 64-bit ARM machines have no standard
amount of address space (that is, it varies by manufacturer), and IA64
and POWER machines have the full 64 bits. So 48 bits is the only
limit that we can reasonably care about. We reserve a few bits of the
48-bit address space for the kernel's use (this is not strictly
necessary, but it's better to be safe), and use up to the remaining
45. No git repository will be anywhere near this large any time soon,
so this should prevent the failure.
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: David Turner <dturner@twosigma.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-04-20 22:41:18 +02:00
|
|
|
((1024L * 1024L) * (size_t)(sizeof(void*) >= 8 ? (32 * 1024L * 1024L) : 256))
|
2006-12-24 06:46:13 +01:00
|
|
|
|
2007-01-09 22:04:12 +01:00
|
|
|
#ifdef NO_PREAD
|
|
|
|
#define pread git_pread
|
2019-04-29 10:28:14 +02:00
|
|
|
ssize_t git_pread(int fd, void *buf, size_t count, off_t offset);
|
2007-01-09 22:04:12 +01:00
|
|
|
#endif
|
2007-11-17 20:48:14 +01:00
|
|
|
/*
|
|
|
|
* Forward decl that will remind us if its twin in cache.h changes.
|
|
|
|
* This function is used in compat/pread.c. But we can't include
|
|
|
|
* cache.h there.
|
|
|
|
*/
|
2019-04-29 10:28:14 +02:00
|
|
|
ssize_t read_in_full(int fd, void *buf, size_t count);
|
2007-01-09 22:04:12 +01:00
|
|
|
|
2005-12-05 20:54:29 +01:00
|
|
|
#ifdef NO_SETENV
|
|
|
|
#define setenv gitsetenv
|
2019-04-29 10:28:14 +02:00
|
|
|
int gitsetenv(const char *, const char *, int);
|
2005-12-05 20:54:29 +01:00
|
|
|
#endif
|
|
|
|
|
2007-10-20 22:03:49 +02:00
|
|
|
#ifdef NO_MKDTEMP
|
|
|
|
#define mkdtemp gitmkdtemp
|
2019-04-29 10:28:14 +02:00
|
|
|
char *gitmkdtemp(char *);
|
2007-10-20 22:03:49 +02:00
|
|
|
#endif
|
2009-05-31 10:35:50 +02:00
|
|
|
|
2006-01-25 21:38:36 +01:00
|
|
|
#ifdef NO_UNSETENV
|
|
|
|
#define unsetenv gitunsetenv
|
2019-04-29 10:28:14 +02:00
|
|
|
void gitunsetenv(const char *);
|
2006-01-25 21:38:36 +01:00
|
|
|
#endif
|
|
|
|
|
2005-12-05 20:54:29 +01:00
|
|
|
#ifdef NO_STRCASESTR
|
|
|
|
#define strcasestr gitstrcasestr
|
2019-04-29 10:28:14 +02:00
|
|
|
char *gitstrcasestr(const char *haystack, const char *needle);
|
2005-12-05 20:54:29 +01:00
|
|
|
#endif
|
|
|
|
|
2006-06-24 16:01:25 +02:00
|
|
|
#ifdef NO_STRLCPY
|
|
|
|
#define strlcpy gitstrlcpy
|
2019-04-29 10:28:14 +02:00
|
|
|
size_t gitstrlcpy(char *, const char *, size_t);
|
2006-06-24 16:01:25 +02:00
|
|
|
#endif
|
|
|
|
|
2007-02-20 01:22:56 +01:00
|
|
|
#ifdef NO_STRTOUMAX
|
|
|
|
#define strtoumax gitstrtoumax
|
2019-04-29 10:28:14 +02:00
|
|
|
uintmax_t gitstrtoumax(const char *, char **, int);
|
2011-11-05 16:37:34 +01:00
|
|
|
#define strtoimax gitstrtoimax
|
2019-04-29 10:28:14 +02:00
|
|
|
intmax_t gitstrtoimax(const char *, char **, int);
|
2007-02-20 01:22:56 +01:00
|
|
|
#endif
|
|
|
|
|
2007-06-13 20:54:32 +02:00
|
|
|
#ifdef NO_HSTRERROR
|
|
|
|
#define hstrerror githstrerror
|
2019-04-29 10:28:14 +02:00
|
|
|
const char *githstrerror(int herror);
|
2007-06-13 20:54:32 +02:00
|
|
|
#endif
|
|
|
|
|
2007-09-07 00:32:54 +02:00
|
|
|
#ifdef NO_MEMMEM
|
|
|
|
#define memmem gitmemmem
|
|
|
|
void *gitmemmem(const void *haystack, size_t haystacklen,
|
2018-12-06 16:42:06 +01:00
|
|
|
const void *needle, size_t needlelen);
|
2007-09-07 00:32:54 +02:00
|
|
|
#endif
|
|
|
|
|
2016-09-03 17:59:15 +02:00
|
|
|
#ifdef OVERRIDE_STRDUP
|
|
|
|
#ifdef strdup
|
|
|
|
#undef strdup
|
|
|
|
#endif
|
|
|
|
#define strdup gitstrdup
|
|
|
|
char *gitstrdup(const char *s);
|
|
|
|
#endif
|
2007-09-07 00:32:54 +02:00
|
|
|
|
2012-12-18 23:03:55 +01:00
|
|
|
#ifdef NO_GETPAGESIZE
|
|
|
|
#define getpagesize() sysconf(_SC_PAGESIZE)
|
|
|
|
#endif
|
|
|
|
|
2016-08-22 14:47:55 +02:00
|
|
|
#ifndef O_CLOEXEC
|
|
|
|
#define O_CLOEXEC 0
|
|
|
|
#endif
|
|
|
|
|
2008-02-09 03:32:47 +01:00
|
|
|
#ifdef FREAD_READS_DIRECTORIES
|
2017-05-08 22:45:56 +02:00
|
|
|
# if !defined(SUPPRESS_FOPEN_REDEFINITION)
|
|
|
|
# ifdef fopen
|
|
|
|
# undef fopen
|
|
|
|
# endif
|
|
|
|
# define fopen(a,b) git_fopen(a,b)
|
|
|
|
# endif
|
2019-04-29 10:28:14 +02:00
|
|
|
FILE *git_fopen(const char*, const char*);
|
2008-02-09 03:32:47 +01:00
|
|
|
#endif
|
|
|
|
|
2008-03-05 16:46:13 +01:00
|
|
|
#ifdef SNPRINTF_RETURNS_BOGUS
|
2014-01-31 07:25:12 +01:00
|
|
|
#ifdef snprintf
|
|
|
|
#undef snprintf
|
|
|
|
#endif
|
2008-03-05 16:46:13 +01:00
|
|
|
#define snprintf git_snprintf
|
2019-04-29 10:28:20 +02:00
|
|
|
int git_snprintf(char *str, size_t maxsize,
|
2019-04-29 10:28:23 +02:00
|
|
|
const char *format, ...);
|
2014-01-31 07:25:12 +01:00
|
|
|
#ifdef vsnprintf
|
|
|
|
#undef vsnprintf
|
|
|
|
#endif
|
2008-03-05 16:46:13 +01:00
|
|
|
#define vsnprintf git_vsnprintf
|
2019-04-29 10:28:14 +02:00
|
|
|
int git_vsnprintf(char *str, size_t maxsize,
|
2019-04-29 10:28:23 +02:00
|
|
|
const char *format, va_list ap);
|
2008-03-05 16:46:13 +01:00
|
|
|
#endif
|
|
|
|
|
Makefile: add OPEN_RETURNS_EINTR knob
On some platforms, open() reportedly returns EINTR when opening regular
files and we receive a signal (usually SIGALRM from our progress meter).
This shouldn't happen, as open() should be a restartable syscall, and we
specify SA_RESTART when setting up the alarm handler. So it may actually
be a kernel or libc bug for this to happen. But it has been reported on
at least one version of Linux (on a network filesystem):
https://lore.kernel.org/git/c8061cce-71e4-17bd-a56a-a5fed93804da@neanderfunk.de/
as well as on macOS starting with Big Sur even on a regular filesystem.
We can work around it by retrying open() calls that get EINTR, just as
we do for read(), etc. Since we don't ever _want_ to interrupt an open()
call, we can get away with just redefining open, rather than insisting
all callsites use xopen().
We actually do have an xopen() wrapper already (and it even does this
retry, though there's no indication of it being an observed problem back
then; it seems simply to have been lifted from xread(), etc). But it is
used hardly anywhere, and isn't suitable for general use because it will
die() on error. In theory we could combine the two, but it's awkward to
do so because of the variable-args interface of open().
This patch adds a Makefile knob for enabling the workaround. It's not
enabled by default for any platforms in config.mak.uname yet, as we
don't have enough data to decide how common this is (I have not been
able to reproduce on either Linux or Big Sur myself). It may be worth
enabling preemptively anyway, since the cost is pretty low (if we don't
see an EINTR, it's just an extra conditional).
However, note that we must not enable this on Windows. It doesn't do
anything there, and the macro overrides the existing mingw_open()
redirection. I've added a preemptive #undef here in the mingw header
(which is processed first) to just quietly disable it (we could also
make it an #error, but there is little point in being so aggressive).
Reported-by: Aleksey Kliger <alklig@microsoft.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-02-26 07:14:35 +01:00
|
|
|
#ifdef OPEN_RETURNS_EINTR
|
|
|
|
#undef open
|
|
|
|
#define open git_open_with_retry
|
|
|
|
int git_open_with_retry(const char *path, int flag, ...);
|
|
|
|
#endif
|
|
|
|
|
2007-11-12 11:09:05 +01:00
|
|
|
#ifdef __GLIBC_PREREQ
|
|
|
|
#if __GLIBC_PREREQ(2, 1)
|
|
|
|
#define HAVE_STRCHRNUL
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef HAVE_STRCHRNUL
|
2007-11-09 01:49:36 +01:00
|
|
|
#define strchrnul gitstrchrnul
|
2007-11-10 12:55:48 +01:00
|
|
|
static inline char *gitstrchrnul(const char *s, int c)
|
|
|
|
{
|
|
|
|
while (*s && *s != c)
|
|
|
|
s++;
|
|
|
|
return (char *)s;
|
|
|
|
}
|
2007-11-09 01:49:36 +01:00
|
|
|
#endif
|
|
|
|
|
2010-11-04 02:35:11 +01:00
|
|
|
#ifdef NO_INET_PTON
|
|
|
|
int inet_pton(int af, const char *src, void *dst);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef NO_INET_NTOP
|
|
|
|
const char *inet_ntop(int af, const void *src, char *dst, size_t size);
|
|
|
|
#endif
|
|
|
|
|
2014-10-18 14:31:15 +02:00
|
|
|
#ifdef NO_PTHREADS
|
|
|
|
#define atexit git_atexit
|
2019-04-29 10:28:14 +02:00
|
|
|
int git_atexit(void (*handler)(void));
|
2014-10-18 14:31:15 +02:00
|
|
|
#endif
|
|
|
|
|
add helpers for detecting size_t overflow
Performing computations on size_t variables that we feed to
xmalloc and friends can be dangerous, as an integer overflow
can cause us to allocate a much smaller chunk than we
realized.
We already have unsigned_add_overflows(), but let's add
unsigned_mult_overflows() to that. Furthermore, rather than
have each site manually check and die on overflow, we can
provide some helpers that will:
- promote the arguments to size_t, so that we know we are
doing our computation in the same size of integer that
will ultimately be fed to xmalloc
- check and die on overflow
- return the result so that computations can be done in
the parameter list of xmalloc.
These functions are a lot uglier to use than normal
arithmetic operators (you have to do "st_add(foo, bar)"
instead of "foo + bar"). To at least limit the damage, we
also provide multi-valued versions. So rather than:
st_add(st_add(a, b), st_add(c, d));
you can write:
st_add4(a, b, c, d);
This isn't nearly as elegant as a varargs function, but it's
a lot harder to get it wrong. You don't have to remember to
add a sentinel value at the end, and the compiler will
complain if you get the number of arguments wrong. This
patch adds only the numbered variants required to convert
the current code base; we can easily add more later if
needed.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-02-19 12:21:19 +01:00
|
|
|
static inline size_t st_add(size_t a, size_t b)
|
|
|
|
{
|
|
|
|
if (unsigned_add_overflows(a, b))
|
|
|
|
die("size_t overflow: %"PRIuMAX" + %"PRIuMAX,
|
|
|
|
(uintmax_t)a, (uintmax_t)b);
|
|
|
|
return a + b;
|
|
|
|
}
|
2016-03-21 05:35:57 +01:00
|
|
|
#define st_add3(a,b,c) st_add(st_add((a),(b)),(c))
|
|
|
|
#define st_add4(a,b,c,d) st_add(st_add3((a),(b),(c)),(d))
|
add helpers for detecting size_t overflow
Performing computations on size_t variables that we feed to
xmalloc and friends can be dangerous, as an integer overflow
can cause us to allocate a much smaller chunk than we
realized.
We already have unsigned_add_overflows(), but let's add
unsigned_mult_overflows() to that. Furthermore, rather than
have each site manually check and die on overflow, we can
provide some helpers that will:
- promote the arguments to size_t, so that we know we are
doing our computation in the same size of integer that
will ultimately be fed to xmalloc
- check and die on overflow
- return the result so that computations can be done in
the parameter list of xmalloc.
These functions are a lot uglier to use than normal
arithmetic operators (you have to do "st_add(foo, bar)"
instead of "foo + bar"). To at least limit the damage, we
also provide multi-valued versions. So rather than:
st_add(st_add(a, b), st_add(c, d));
you can write:
st_add4(a, b, c, d);
This isn't nearly as elegant as a varargs function, but it's
a lot harder to get it wrong. You don't have to remember to
add a sentinel value at the end, and the compiler will
complain if you get the number of arguments wrong. This
patch adds only the numbered variants required to convert
the current code base; we can easily add more later if
needed.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-02-19 12:21:19 +01:00
|
|
|
|
|
|
|
static inline size_t st_mult(size_t a, size_t b)
|
|
|
|
{
|
|
|
|
if (unsigned_mult_overflows(a, b))
|
|
|
|
die("size_t overflow: %"PRIuMAX" * %"PRIuMAX,
|
|
|
|
(uintmax_t)a, (uintmax_t)b);
|
|
|
|
return a * b;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline size_t st_sub(size_t a, size_t b)
|
|
|
|
{
|
|
|
|
if (a < b)
|
|
|
|
die("size_t underflow: %"PRIuMAX" - %"PRIuMAX,
|
|
|
|
(uintmax_t)a, (uintmax_t)b);
|
|
|
|
return a - b;
|
|
|
|
}
|
2010-03-24 21:22:34 +01:00
|
|
|
|
Portable alloca for Git
In the next patch we'll have to use alloca() for performance reasons,
but since alloca is non-standardized and is not portable, let's have a
trick with compatibility wrappers:
1. at configure time, determine, do we have working alloca() through
alloca.h, and define
#define HAVE_ALLOCA_H
if yes.
2. in code
#ifdef HAVE_ALLOCA_H
# include <alloca.h>
# define xalloca(size) (alloca(size))
# define xalloca_free(p) do {} while(0)
#else
# define xalloca(size) (xmalloc(size))
# define xalloca_free(p) (free(p))
#endif
and use it like
func() {
p = xalloca(size);
...
xalloca_free(p);
}
This way, for systems, where alloca is available, we'll have optimal
on-stack allocations with fast executions. On the other hand, on
systems, where alloca is not available, this gracefully fallbacks to
xmalloc/free.
Both autoconf and config.mak.uname configurations were updated. For
autoconf, we are not bothering considering cases, when no alloca.h is
available, but alloca() works some other way - its simply alloca.h is
available and works or not, everything else is deep legacy.
For config.mak.uname, I've tried to make my almost-sure guess for where
alloca() is available, but since I only have access to Linux it is the
only change I can be sure about myself, with relevant to other changed
systems people Cc'ed.
NOTE
SunOS and Windows had explicit -DHAVE_ALLOCA_H in their configurations.
I've changed that to now-common HAVE_ALLOCA_H=YesPlease which should be
correct.
Cc: Brandon Casey <drafnel@gmail.com>
Cc: Marius Storm-Olsen <mstormo@gmail.com>
Cc: Johannes Sixt <j6t@kdbg.org>
Cc: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Cc: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Cc: Gerrit Pape <pape@smarden.org>
Cc: Petr Salinger <Petr.Salinger@seznam.cz>
Cc: Jonathan Nieder <jrnieder@gmail.com>
Acked-by: Thomas Schwinge <thomas@codesourcery.com> (GNU Hurd changes)
Signed-off-by: Kirill Smelkov <kirr@mns.spb.ru>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-03-27 15:22:50 +01:00
|
|
|
#ifdef HAVE_ALLOCA_H
|
|
|
|
# include <alloca.h>
|
|
|
|
# define xalloca(size) (alloca(size))
|
|
|
|
# define xalloca_free(p) do {} while (0)
|
|
|
|
#else
|
|
|
|
# define xalloca(size) (xmalloc(size))
|
|
|
|
# define xalloca_free(p) (free(p))
|
|
|
|
#endif
|
2019-04-29 10:28:14 +02:00
|
|
|
char *xstrdup(const char *str);
|
|
|
|
void *xmalloc(size_t size);
|
|
|
|
void *xmallocz(size_t size);
|
|
|
|
void *xmallocz_gently(size_t size);
|
|
|
|
void *xmemdupz(const void *data, size_t len);
|
|
|
|
char *xstrndup(const char *str, size_t len);
|
|
|
|
void *xrealloc(void *ptr, size_t size);
|
|
|
|
void *xcalloc(size_t nmemb, size_t size);
|
|
|
|
void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
|
2021-06-30 02:01:32 +02:00
|
|
|
const char *mmap_os_err(void);
|
2019-04-29 10:28:14 +02:00
|
|
|
void *xmmap_gently(void *start, size_t length, int prot, int flags, int fd, off_t offset);
|
2019-04-29 10:28:20 +02:00
|
|
|
int xopen(const char *path, int flags, ...);
|
2019-04-29 10:28:14 +02:00
|
|
|
ssize_t xread(int fd, void *buf, size_t len);
|
|
|
|
ssize_t xwrite(int fd, const void *buf, size_t len);
|
|
|
|
ssize_t xpread(int fd, void *buf, size_t len, off_t offset);
|
|
|
|
int xdup(int fd);
|
|
|
|
FILE *xfopen(const char *path, const char *mode);
|
|
|
|
FILE *xfdopen(int fd, const char *mode);
|
|
|
|
int xmkstemp(char *temp_filename);
|
|
|
|
int xmkstemp_mode(char *temp_filename, int mode);
|
|
|
|
char *xgetcwd(void);
|
|
|
|
FILE *fopen_for_writing(const char *path);
|
|
|
|
FILE *fopen_or_warn(const char *path, const char *mode);
|
2007-08-14 21:44:53 +02:00
|
|
|
|
wrapper: add function to compare strings with different NUL termination
When parsing capabilities for the pack protocol, there are times we'll
want to compare the value of a capability to a NUL-terminated string.
Since the data we're reading will be space-terminated, not
NUL-terminated, we need a function that compares the two strings, but
also checks that they're the same length. Otherwise, if we used strncmp
to compare these strings, we might accidentally accept a parameter that
was a prefix of the expected value.
Add a function, xstrncmpz, that takes a NUL-terminated string and a
non-NUL-terminated string, plus a length, and compares them, ensuring
that they are the same length.
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-05-25 21:58:50 +02:00
|
|
|
/*
|
|
|
|
* Like strncmp, but only return zero if s is NUL-terminated and exactly len
|
|
|
|
* characters long. If it is not, consider it greater than t.
|
|
|
|
*/
|
|
|
|
int xstrncmpz(const char *s, const char *t, size_t len);
|
|
|
|
|
2017-06-15 23:06:59 +02:00
|
|
|
/*
|
|
|
|
* FREE_AND_NULL(ptr) is like free(ptr) followed by ptr = NULL. Note
|
|
|
|
* that ptr is used twice, so don't pass e.g. ptr++.
|
|
|
|
*/
|
|
|
|
#define FREE_AND_NULL(p) do { free(p); (p) = NULL; } while (0)
|
|
|
|
|
2016-02-22 23:43:18 +01:00
|
|
|
#define ALLOC_ARRAY(x, alloc) (x) = xmalloc(st_mult(sizeof(*(x)), (alloc)))
|
2021-03-13 17:10:47 +01:00
|
|
|
#define CALLOC_ARRAY(x, alloc) (x) = xcalloc((alloc), sizeof(*(x)))
|
2016-02-22 23:43:18 +01:00
|
|
|
#define REALLOC_ARRAY(x, alloc) (x) = xrealloc((x), st_mult(sizeof(*(x)), (alloc)))
|
2014-09-16 20:56:48 +02:00
|
|
|
|
2016-09-25 09:15:42 +02:00
|
|
|
#define COPY_ARRAY(dst, src, n) copy_array((dst), (src), (n), sizeof(*(dst)) + \
|
|
|
|
BUILD_ASSERT_OR_ZERO(sizeof(*(dst)) == sizeof(*(src))))
|
|
|
|
static inline void copy_array(void *dst, const void *src, size_t n, size_t size)
|
|
|
|
{
|
|
|
|
if (n)
|
|
|
|
memcpy(dst, src, st_mult(size, n));
|
|
|
|
}
|
|
|
|
|
2017-07-15 21:36:20 +02:00
|
|
|
#define MOVE_ARRAY(dst, src, n) move_array((dst), (src), (n), sizeof(*(dst)) + \
|
|
|
|
BUILD_ASSERT_OR_ZERO(sizeof(*(dst)) == sizeof(*(src))))
|
|
|
|
static inline void move_array(void *dst, const void *src, size_t n, size_t size)
|
|
|
|
{
|
|
|
|
if (n)
|
|
|
|
memmove(dst, src, st_mult(size, n));
|
|
|
|
}
|
|
|
|
|
2016-02-22 23:43:25 +01:00
|
|
|
/*
|
|
|
|
* These functions help you allocate structs with flex arrays, and copy
|
|
|
|
* the data directly into the array. For example, if you had:
|
|
|
|
*
|
|
|
|
* struct foo {
|
|
|
|
* int bar;
|
|
|
|
* char name[FLEX_ARRAY];
|
|
|
|
* };
|
|
|
|
*
|
|
|
|
* you can do:
|
|
|
|
*
|
|
|
|
* struct foo *f;
|
|
|
|
* FLEX_ALLOC_MEM(f, name, src, len);
|
|
|
|
*
|
|
|
|
* to allocate a "foo" with the contents of "src" in the "name" field.
|
|
|
|
* The resulting struct is automatically zero'd, and the flex-array field
|
|
|
|
* is NUL-terminated (whether the incoming src buffer was or not).
|
|
|
|
*
|
|
|
|
* The FLEXPTR_* variants operate on structs that don't use flex-arrays,
|
|
|
|
* but do want to store a pointer to some extra data in the same allocated
|
|
|
|
* block. For example, if you have:
|
|
|
|
*
|
|
|
|
* struct foo {
|
|
|
|
* char *name;
|
|
|
|
* int bar;
|
|
|
|
* };
|
|
|
|
*
|
|
|
|
* you can do:
|
|
|
|
*
|
|
|
|
* struct foo *f;
|
2016-08-13 11:01:21 +02:00
|
|
|
* FLEXPTR_ALLOC_STR(f, name, src);
|
2016-02-22 23:43:25 +01:00
|
|
|
*
|
|
|
|
* and "name" will point to a block of memory after the struct, which will be
|
|
|
|
* freed along with the struct (but the pointer can be repointed anywhere).
|
|
|
|
*
|
|
|
|
* The *_STR variants accept a string parameter rather than a ptr/len
|
|
|
|
* combination.
|
|
|
|
*
|
|
|
|
* Note that these macros will evaluate the first parameter multiple
|
|
|
|
* times, and it must be assignable as an lvalue.
|
|
|
|
*/
|
|
|
|
#define FLEX_ALLOC_MEM(x, flexname, buf, len) do { \
|
2016-10-15 18:23:11 +02:00
|
|
|
size_t flex_array_len_ = (len); \
|
|
|
|
(x) = xcalloc(1, st_add3(sizeof(*(x)), flex_array_len_, 1)); \
|
|
|
|
memcpy((void *)(x)->flexname, (buf), flex_array_len_); \
|
2016-02-22 23:43:25 +01:00
|
|
|
} while (0)
|
|
|
|
#define FLEXPTR_ALLOC_MEM(x, ptrname, buf, len) do { \
|
2016-10-16 12:06:02 +02:00
|
|
|
size_t flex_array_len_ = (len); \
|
|
|
|
(x) = xcalloc(1, st_add3(sizeof(*(x)), flex_array_len_, 1)); \
|
|
|
|
memcpy((x) + 1, (buf), flex_array_len_); \
|
2016-02-22 23:43:25 +01:00
|
|
|
(x)->ptrname = (void *)((x)+1); \
|
|
|
|
} while(0)
|
|
|
|
#define FLEX_ALLOC_STR(x, flexname, str) \
|
|
|
|
FLEX_ALLOC_MEM((x), flexname, (str), strlen(str))
|
|
|
|
#define FLEXPTR_ALLOC_STR(x, ptrname, str) \
|
|
|
|
FLEXPTR_ALLOC_MEM((x), ptrname, (str), strlen(str))
|
|
|
|
|
2015-01-13 02:57:37 +01:00
|
|
|
static inline char *xstrdup_or_null(const char *str)
|
|
|
|
{
|
|
|
|
return str ? xstrdup(str) : NULL;
|
|
|
|
}
|
|
|
|
|
2007-03-07 02:44:37 +01:00
|
|
|
static inline size_t xsize_t(off_t len)
|
|
|
|
{
|
xsize_t: avoid implementation defined behavior when len < 0
The xsize_t helper aims to safely convert an off_t to a size_t,
erroring out when a file offset is too large to fit into a memory
address. It does this by using two casts:
size_t size = (size_t) len;
if (len != (off_t) size)
... error out ...
On a platform with sizeof(size_t) < sizeof(off_t), this check is safe
and correct. The first cast truncates to a size_t by finding the
remainder modulo SIZE_MAX+1 (see C99 section 6.3.1.3 Signed and
unsigned integers) and the second promotes to an off_t, meaning the
result is true if and only if len is representable as a size_t.
On other platforms, this two-casts strategy still works well (always
succeeds) for len >= 0. But for len < 0, when the first cast succeeds
and produces SIZE_MAX + 1 + len, the resulting value is too large to
be represented as an off_t, so the second cast produces implementation
defined behavior. In practice, it is likely to produce a result of
true despite len not being representable as size_t.
Simplify by replacing with a more straightforward check: compare len
to the relevant bounds and then cast it. (To avoid a -Wsign-compare
warning, after checking that len >= 0, we explicitly convert to a
sufficiently-large unsigned type before comparing to SIZE_MAX.)
In practice, this is not likely to come up since typical callers use
nonnegative len. Still, it's helpful to handle this case to make the
behavior easy to reason about.
Historical note: the original bounds-checking in 46be82dfd0 (xsize_t:
check whether we lose bits, 2010-07-28) did not produce this
implementation-defined behavior, though it still did not handle
negative offsets. It was not until 73560c793a (git-compat-util.h:
xsize_t() - avoid -Wsign-compare warnings, 2017-09-21) introduced the
double cast that the implementation-defined behavior was triggered.
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-19 03:52:56 +02:00
|
|
|
if (len < 0 || (uintmax_t) len > SIZE_MAX)
|
2010-07-28 18:36:31 +02:00
|
|
|
die("Cannot handle files this big");
|
xsize_t: avoid implementation defined behavior when len < 0
The xsize_t helper aims to safely convert an off_t to a size_t,
erroring out when a file offset is too large to fit into a memory
address. It does this by using two casts:
size_t size = (size_t) len;
if (len != (off_t) size)
... error out ...
On a platform with sizeof(size_t) < sizeof(off_t), this check is safe
and correct. The first cast truncates to a size_t by finding the
remainder modulo SIZE_MAX+1 (see C99 section 6.3.1.3 Signed and
unsigned integers) and the second promotes to an off_t, meaning the
result is true if and only if len is representable as a size_t.
On other platforms, this two-casts strategy still works well (always
succeeds) for len >= 0. But for len < 0, when the first cast succeeds
and produces SIZE_MAX + 1 + len, the resulting value is too large to
be represented as an off_t, so the second cast produces implementation
defined behavior. In practice, it is likely to produce a result of
true despite len not being representable as size_t.
Simplify by replacing with a more straightforward check: compare len
to the relevant bounds and then cast it. (To avoid a -Wsign-compare
warning, after checking that len >= 0, we explicitly convert to a
sufficiently-large unsigned type before comparing to SIZE_MAX.)
In practice, this is not likely to come up since typical callers use
nonnegative len. Still, it's helpful to handle this case to make the
behavior easy to reason about.
Historical note: the original bounds-checking in 46be82dfd0 (xsize_t:
check whether we lose bits, 2010-07-28) did not produce this
implementation-defined behavior, though it still did not handle
negative offsets. It was not until 73560c793a (git-compat-util.h:
xsize_t() - avoid -Wsign-compare warnings, 2017-09-21) introduced the
double cast that the implementation-defined behavior was triggered.
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-19 03:52:56 +02:00
|
|
|
return (size_t) len;
|
2007-03-07 02:44:37 +01:00
|
|
|
}
|
|
|
|
|
2015-09-24 23:05:37 +02:00
|
|
|
__attribute__((format (printf, 3, 4)))
|
2019-04-29 10:28:20 +02:00
|
|
|
int xsnprintf(char *dst, size_t max, const char *fmt, ...);
|
2015-09-24 23:05:37 +02:00
|
|
|
|
2017-04-18 23:57:42 +02:00
|
|
|
#ifndef HOST_NAME_MAX
|
|
|
|
#define HOST_NAME_MAX 256
|
|
|
|
#endif
|
|
|
|
|
2019-04-29 10:28:14 +02:00
|
|
|
int xgethostname(char *buf, size_t len);
|
2017-04-18 23:57:43 +02:00
|
|
|
|
2012-03-04 20:10:57 +01:00
|
|
|
/* in ctype.c, for kwset users */
|
2015-03-02 20:22:31 +01:00
|
|
|
extern const unsigned char tolower_trans_tbl[256];
|
2012-03-04 20:10:57 +01:00
|
|
|
|
2005-12-05 20:54:29 +01:00
|
|
|
/* Sane ctype - no locale, and works with signed chars */
|
2009-03-07 14:06:49 +01:00
|
|
|
#undef isascii
|
2005-12-05 20:54:29 +01:00
|
|
|
#undef isspace
|
|
|
|
#undef isdigit
|
|
|
|
#undef isalpha
|
|
|
|
#undef isalnum
|
2012-10-18 16:43:32 +02:00
|
|
|
#undef isprint
|
2012-02-10 03:13:31 +01:00
|
|
|
#undef islower
|
|
|
|
#undef isupper
|
2005-12-05 20:54:29 +01:00
|
|
|
#undef tolower
|
|
|
|
#undef toupper
|
2012-10-15 08:25:51 +02:00
|
|
|
#undef iscntrl
|
|
|
|
#undef ispunct
|
|
|
|
#undef isxdigit
|
2013-01-10 22:47:15 +01:00
|
|
|
|
2012-10-15 08:25:50 +02:00
|
|
|
extern const unsigned char sane_ctype[256];
|
2005-12-05 20:54:29 +01:00
|
|
|
#define GIT_SPACE 0x01
|
|
|
|
#define GIT_DIGIT 0x02
|
|
|
|
#define GIT_ALPHA 0x04
|
2009-01-17 16:50:34 +01:00
|
|
|
#define GIT_GLOB_SPECIAL 0x08
|
2009-01-17 16:50:37 +01:00
|
|
|
#define GIT_REGEX_SPECIAL 0x10
|
2011-04-09 01:18:46 +02:00
|
|
|
#define GIT_PATHSPEC_MAGIC 0x20
|
2012-10-15 08:25:51 +02:00
|
|
|
#define GIT_CNTRL 0x40
|
|
|
|
#define GIT_PUNCT 0x80
|
2005-12-05 20:54:29 +01:00
|
|
|
#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
|
2009-03-07 14:06:49 +01:00
|
|
|
#define isascii(x) (((x) & ~0x7f) == 0)
|
2005-12-05 20:54:29 +01:00
|
|
|
#define isspace(x) sane_istest(x,GIT_SPACE)
|
|
|
|
#define isdigit(x) sane_istest(x,GIT_DIGIT)
|
|
|
|
#define isalpha(x) sane_istest(x,GIT_ALPHA)
|
|
|
|
#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
|
2012-10-18 16:43:32 +02:00
|
|
|
#define isprint(x) ((x) >= 0x20 && (x) <= 0x7e)
|
2012-02-10 03:13:31 +01:00
|
|
|
#define islower(x) sane_iscase(x, 1)
|
|
|
|
#define isupper(x) sane_iscase(x, 0)
|
2009-01-17 16:50:34 +01:00
|
|
|
#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
|
2009-01-17 16:50:37 +01:00
|
|
|
#define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL)
|
2012-10-15 08:25:51 +02:00
|
|
|
#define iscntrl(x) (sane_istest(x,GIT_CNTRL))
|
|
|
|
#define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \
|
|
|
|
GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC)
|
2014-10-16 00:34:05 +02:00
|
|
|
#define isxdigit(x) (hexval_table[(unsigned char)(x)] != -1)
|
2005-12-05 20:54:29 +01:00
|
|
|
#define tolower(x) sane_case((unsigned char)(x), 0x20)
|
|
|
|
#define toupper(x) sane_case((unsigned char)(x), 0)
|
2011-04-09 01:18:46 +02:00
|
|
|
#define is_pathspec_magic(x) sane_istest(x,GIT_PATHSPEC_MAGIC)
|
2005-12-05 20:54:29 +01:00
|
|
|
|
|
|
|
static inline int sane_case(int x, int high)
|
|
|
|
{
|
|
|
|
if (sane_istest(x, GIT_ALPHA))
|
|
|
|
x = (x & ~0x20) | high;
|
|
|
|
return x;
|
|
|
|
}
|
|
|
|
|
2012-02-10 03:13:31 +01:00
|
|
|
static inline int sane_iscase(int x, int is_lower)
|
|
|
|
{
|
|
|
|
if (!sane_istest(x, GIT_ALPHA))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (is_lower)
|
|
|
|
return (x & 0x20) != 0;
|
|
|
|
else
|
|
|
|
return (x & 0x20) == 0;
|
|
|
|
}
|
|
|
|
|
2018-05-13 18:57:14 +02:00
|
|
|
/*
|
|
|
|
* Like skip_prefix, but compare case-insensitively. Note that the comparison
|
|
|
|
* is done via tolower(), so it is strictly ASCII (no multi-byte characters or
|
|
|
|
* locale-specific conversions).
|
|
|
|
*/
|
|
|
|
static inline int skip_iprefix(const char *str, const char *prefix,
|
|
|
|
const char **out)
|
|
|
|
{
|
|
|
|
do {
|
|
|
|
if (!*prefix) {
|
|
|
|
*out = str;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
} while (tolower(*str++) == tolower(*prefix++));
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-04-10 01:01:44 +02:00
|
|
|
static inline int strtoul_ui(char const *s, int base, unsigned int *result)
|
|
|
|
{
|
|
|
|
unsigned long ul;
|
|
|
|
char *p;
|
|
|
|
|
|
|
|
errno = 0;
|
2015-09-17 18:28:33 +02:00
|
|
|
/* negative values would be accepted by strtoul */
|
|
|
|
if (strchr(s, '-'))
|
|
|
|
return -1;
|
2007-04-10 01:01:44 +02:00
|
|
|
ul = strtoul(s, &p, base);
|
|
|
|
if (errno || *p || p == s || (unsigned int) ul != ul)
|
|
|
|
return -1;
|
|
|
|
*result = ul;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-10-23 22:33:26 +02:00
|
|
|
static inline int strtol_i(char const *s, int base, int *result)
|
|
|
|
{
|
|
|
|
long ul;
|
|
|
|
char *p;
|
|
|
|
|
|
|
|
errno = 0;
|
|
|
|
ul = strtol(s, &p, base);
|
|
|
|
if (errno || *p || p == s || (int) ul != ul)
|
|
|
|
return -1;
|
|
|
|
*result = ul;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-09-30 19:21:54 +02:00
|
|
|
void git_stable_qsort(void *base, size_t nmemb, size_t size,
|
|
|
|
int(*compar)(const void *, const void *));
|
2008-02-05 22:10:44 +01:00
|
|
|
#ifdef INTERNAL_QSORT
|
2019-09-30 19:21:54 +02:00
|
|
|
#define qsort git_stable_qsort
|
2008-02-05 22:10:44 +01:00
|
|
|
#endif
|
|
|
|
|
2016-09-29 17:23:43 +02:00
|
|
|
#define QSORT(base, n, compar) sane_qsort((base), (n), sizeof(*(base)), compar)
|
|
|
|
static inline void sane_qsort(void *base, size_t nmemb, size_t size,
|
|
|
|
int(*compar)(const void *, const void *))
|
|
|
|
{
|
|
|
|
if (nmemb > 1)
|
|
|
|
qsort(base, nmemb, size, compar);
|
|
|
|
}
|
|
|
|
|
2019-09-30 19:21:54 +02:00
|
|
|
#define STABLE_QSORT(base, n, compar) \
|
|
|
|
git_stable_qsort((base), (n), sizeof(*(base)), compar)
|
|
|
|
|
2017-01-22 18:51:11 +01:00
|
|
|
#ifndef HAVE_ISO_QSORT_S
|
|
|
|
int git_qsort_s(void *base, size_t nmemb, size_t size,
|
|
|
|
int (*compar)(const void *, const void *, void *), void *ctx);
|
|
|
|
#define qsort_s git_qsort_s
|
|
|
|
#endif
|
|
|
|
|
2017-01-22 18:52:13 +01:00
|
|
|
#define QSORT_S(base, n, compar, ctx) do { \
|
|
|
|
if (qsort_s((base), (n), sizeof(*(base)), compar, ctx)) \
|
2018-05-02 11:38:41 +02:00
|
|
|
BUG("qsort_s() failed"); \
|
2017-01-22 18:52:13 +01:00
|
|
|
} while (0)
|
|
|
|
|
2016-09-21 20:24:04 +02:00
|
|
|
#ifndef REG_STARTEND
|
|
|
|
#error "Git requires REG_STARTEND support. Compile with NO_REGEX=NeedsStartEnd"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static inline int regexec_buf(const regex_t *preg, const char *buf, size_t size,
|
|
|
|
size_t nmatch, regmatch_t pmatch[], int eflags)
|
|
|
|
{
|
|
|
|
assert(nmatch > 0 && pmatch);
|
|
|
|
pmatch[0].rm_so = 0;
|
|
|
|
pmatch[0].rm_eo = size;
|
|
|
|
return regexec(preg, buf, nmatch, pmatch, eflags | REG_STARTEND);
|
|
|
|
}
|
|
|
|
|
2008-03-05 00:15:39 +01:00
|
|
|
#ifndef DIR_HAS_BSD_GROUP_SEMANTICS
|
|
|
|
# define FORCE_DIR_SET_GID S_ISGID
|
|
|
|
#else
|
|
|
|
# define FORCE_DIR_SET_GID 0
|
|
|
|
#endif
|
|
|
|
|
2009-03-04 18:47:40 +01:00
|
|
|
#ifdef NO_NSEC
|
|
|
|
#undef USE_NSEC
|
|
|
|
#define ST_CTIME_NSEC(st) 0
|
|
|
|
#define ST_MTIME_NSEC(st) 0
|
|
|
|
#else
|
2009-03-08 21:04:28 +01:00
|
|
|
#ifdef USE_ST_TIMESPEC
|
|
|
|
#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctimespec.tv_nsec))
|
|
|
|
#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtimespec.tv_nsec))
|
|
|
|
#else
|
2009-03-04 18:47:40 +01:00
|
|
|
#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctim.tv_nsec))
|
|
|
|
#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtim.tv_nsec))
|
|
|
|
#endif
|
2009-03-08 21:04:28 +01:00
|
|
|
#endif
|
2009-03-04 18:47:40 +01:00
|
|
|
|
2009-04-20 10:17:00 +02:00
|
|
|
#ifdef UNRELIABLE_FSTAT
|
|
|
|
#define fstat_is_reliable() 0
|
|
|
|
#else
|
|
|
|
#define fstat_is_reliable() 1
|
|
|
|
#endif
|
|
|
|
|
2011-02-26 06:08:25 +01:00
|
|
|
#ifndef va_copy
|
2011-03-08 09:33:44 +01:00
|
|
|
/*
|
|
|
|
* Since an obvious implementation of va_list would be to make it a
|
|
|
|
* pointer into the stack frame, a simple assignment will work on
|
|
|
|
* many systems. But let's try to be more portable.
|
|
|
|
*/
|
|
|
|
#ifdef __va_copy
|
|
|
|
#define va_copy(dst, src) __va_copy(dst, src)
|
|
|
|
#else
|
|
|
|
#define va_copy(dst, src) ((dst) = (src))
|
|
|
|
#endif
|
2011-02-26 06:08:25 +01:00
|
|
|
#endif
|
|
|
|
|
git-compat-util: always enable variadic macros
We allow variadic macros in the code base, but only if there is fallback
code for platforms that lack it. This leads to some annoyances:
- the code is more complicated because of the fallbacks (e.g.,
trace_printf(), etc, is implemented twice with a set of parallel
wrappers).
- some constructs are just impossible and we've had to live without
them (e.g., a cross between FLEX_ALLOC and xstrfmt)
Since this feature is present in C99, we may be able to start counting
on it being available everywhere. Let's start with a weather balloon
patch to find out.
This patch makes the absolute minimal change by always setting
HAVE_VARIADIC_MACROS. If somebody runs into a platform where it's a
problem, they can undo it by commenting out the define. Likewise, if we
have to revert this, it would be quite unlikely to cause conflicts.
Once we feel comfortable that this is the right direction, then we can
start ripping out all the spots that actually look at the flag, and
removing the dead code.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-28 06:28:33 +01:00
|
|
|
/*
|
|
|
|
* This is always defined as a first step towards making the use of variadic
|
|
|
|
* macros unconditional. If it causes compilation problems on your platform,
|
|
|
|
* please report it to the Git mailing list at git@vger.kernel.org.
|
|
|
|
*/
|
2014-07-12 02:05:03 +02:00
|
|
|
#define HAVE_VARIADIC_MACROS 1
|
|
|
|
|
2018-05-09 19:04:06 +02:00
|
|
|
/* usage.c: only to be used for testing BUG() implementation (see test-tool) */
|
|
|
|
extern int BUG_exit_code;
|
|
|
|
|
usage.c: add BUG() function
There's a convention in Git's code base to write assertions
as:
if (...some_bad_thing...)
die("BUG: the terrible thing happened");
with the idea that users should never see a "BUG:" message
(but if they, it at least gives a clue what happened). We
use die() here because it's convenient, but there are a few
draw-backs:
1. Without parsing the messages, it's hard for callers to
distinguish BUG assertions from regular errors.
For instance, it would be nice if the test suite could
check that we don't hit any assertions, but
test_must_fail will pass BUG deaths as OK.
2. It would be useful to add more debugging features to
BUG assertions, like file/line numbers or dumping core.
3. The die() handler can be replaced, and might not
actually exit the whole program (e.g., it may just
pthread_exit()). This is convenient for normal errors,
but for an assertion failure (which is supposed to
never happen), we're probably better off taking down
the whole process as quickly and cleanly as possible.
We could address these by checking in die() whether the
error message starts with "BUG", and behaving appropriately.
But there's little advantage at that point to sharing the
die() code, and only downsides (e.g., we can't change the
BUG() interface independently). Moreover, converting all of
the existing BUG calls reveals that the test suite does
indeed trigger a few of them.
Instead, this patch introduces a new BUG() function, which
prints an error before dying via SIGABRT. This gives us test
suite checking and core dumps. The function is actually a
macro (when supported) so that we can show the file/line
number.
We can convert die("BUG") invocations to BUG() in further
patches, dealing with any test fallouts individually.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-13 05:28:50 +02:00
|
|
|
#ifdef HAVE_VARIADIC_MACROS
|
|
|
|
__attribute__((format (printf, 3, 4))) NORETURN
|
|
|
|
void BUG_fl(const char *file, int line, const char *fmt, ...);
|
|
|
|
#define BUG(...) BUG_fl(__FILE__, __LINE__, __VA_ARGS__)
|
|
|
|
#else
|
|
|
|
__attribute__((format (printf, 1, 2))) NORETURN
|
|
|
|
void BUG(const char *fmt, ...);
|
|
|
|
#endif
|
|
|
|
|
2009-04-29 23:21:46 +02:00
|
|
|
/*
|
|
|
|
* Preserves errno, prints a message, but gives no warning for ENOENT.
|
2014-07-16 20:01:18 +02:00
|
|
|
* Returns 0 on success, which includes trying to unlink an object that does
|
|
|
|
* not exist.
|
2009-04-29 23:21:46 +02:00
|
|
|
*/
|
|
|
|
int unlink_or_warn(const char *path);
|
2014-07-16 20:20:36 +02:00
|
|
|
/*
|
|
|
|
* Tries to unlink file. Returns 0 if unlink succeeded
|
|
|
|
* or the file already didn't exist. Returns -1 and
|
|
|
|
* appends a message to err suitable for
|
|
|
|
* 'error("%s", err->buf)' on error.
|
|
|
|
*/
|
|
|
|
int unlink_or_msg(const char *file, struct strbuf *err);
|
2010-03-26 16:25:33 +01:00
|
|
|
/*
|
2014-07-16 20:01:18 +02:00
|
|
|
* Preserves errno, prints a message, but gives no warning for ENOENT.
|
|
|
|
* Returns 0 on success, which includes trying to remove a directory that does
|
|
|
|
* not exist.
|
2010-03-26 16:25:33 +01:00
|
|
|
*/
|
|
|
|
int rmdir_or_warn(const char *path);
|
2010-03-26 16:25:34 +01:00
|
|
|
/*
|
|
|
|
* Calls the correct function out of {unlink,rmdir}_or_warn based on
|
|
|
|
* the supplied file mode.
|
|
|
|
*/
|
|
|
|
int remove_or_warn(unsigned int mode, const char *path);
|
2009-04-29 23:21:46 +02:00
|
|
|
|
2012-10-14 02:03:07 +02:00
|
|
|
/*
|
|
|
|
* Call access(2), but warn for any error except "missing file"
|
|
|
|
* (ENOENT or ENOTDIR).
|
|
|
|
*/
|
config: allow inaccessible configuration under $HOME
The changes v1.7.12.1~2^2~4 (config: warn on inaccessible files,
2012-08-21) and v1.8.1.1~22^2~2 (config: treat user and xdg config
permission problems as errors, 2012-10-13) were intended to prevent
important configuration (think "[transfer] fsckobjects") from being
ignored when the configuration is unintentionally unreadable (for
example with EIO on a flaky filesystem, or with ENOMEM due to a DoS
attack). Usually ~/.gitconfig and ~/.config/git are readable by the
current user, and if they aren't then it would be easy to fix those
permissions, so the damage from adding this check should have been
minimal.
Unfortunately the access() check often trips when git is being run as
a server. A daemon (such as inetd or git-daemon) starts as "root",
creates a listening socket, and then drops privileges, meaning that
when git commands are invoked they cannot access $HOME and die with
fatal: unable to access '/root/.config/git/config': Permission denied
Any patch to fix this would have one of three problems:
1. We annoy sysadmins who need to take an extra step to handle HOME
when dropping privileges (the current behavior, or any other
proposal that they have to opt into).
2. We annoy sysadmins who want to set HOME when dropping privileges,
either by making what they want to do impossible, or making them
set an extra variable or option to accomplish what used to work
(e.g., a patch to git-daemon to set HOME when --user is passed).
3. We loosen the check, so some cases which might be noteworthy are
not caught.
This patch is of type (3).
Treat user and xdg configuration that are inaccessible due to
permissions (EACCES) as though no user configuration was provided at
all.
An alternative method would be to check if $HOME is readable, but that
would not help in cases where the user who dropped privileges had a
globally readable HOME with only .config or .gitconfig being private.
This does not change the behavior when /etc/gitconfig or .git/config
is unreadable (since those are more serious configuration errors),
nor when ~/.gitconfig or ~/.config/git is unreadable due to problems
other than permissions.
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Improved-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-12 23:03:18 +02:00
|
|
|
#define ACCESS_EACCES_OK (1U << 0)
|
|
|
|
int access_or_warn(const char *path, int mode, unsigned flag);
|
|
|
|
int access_or_die(const char *path, int mode, unsigned flag);
|
2012-08-21 08:10:59 +02:00
|
|
|
|
2017-05-03 12:16:49 +02:00
|
|
|
/* Warn on an inaccessible file if errno indicates this is an error */
|
|
|
|
int warn_on_fopen_errors(const char *path);
|
2012-08-21 23:52:07 +02:00
|
|
|
|
2021-02-16 15:44:22 +01:00
|
|
|
/*
|
|
|
|
* Open with O_NOFOLLOW, or equivalent. Note that the fallback equivalent
|
|
|
|
* may be racy. Do not use this as protection against an attacker who can
|
|
|
|
* simultaneously create paths.
|
|
|
|
*/
|
|
|
|
int open_nofollow(const char *path, int flags);
|
|
|
|
|
2015-03-08 06:07:59 +01:00
|
|
|
#ifndef SHELL_PATH
|
|
|
|
# define SHELL_PATH "/bin/sh"
|
|
|
|
#endif
|
|
|
|
|
2015-04-16 10:48:45 +02:00
|
|
|
#ifndef _POSIX_THREAD_SAFE_FUNCTIONS
|
compat-util: type-check parameters of no-op replacement functions
When there is no need to run a specific function on certain platforms,
we often #define an empty function to swallow its parameters and
make it into a no-op, e.g.
#define precompose_argv(c,v) /* no-op */
While this guarantees that no unneeded code is generated, it also
discards type and other checks on these parameters, e.g. a new code
written with the argv-array API (diff_args is of type "struct
argv_array" that has .argc and .argv members):
precompose_argv(diff_args.argc, diff_args.argv);
must be updated to use "struct strvec diff_args" with .nr and .v
members, like so:
precompose_argv(diff_args.nr, diff_args.v);
after the argv-array API has been updated to the strvec API.
However, the "no oop" C preprocessor macro is too aggressive to
discard what is unused, and did not catch such a call that was left
unconverted.
Using a "static inline" function whose body is a no-op should still
result in the same binary with decent compilers yet catch such a
reference to a missing field or passing a value of a wrong type.
While at it, I notice that precompute_str() has never been used
anywhere in the code, since it was introduced at 76759c7d (git on
Mac OS and precomposed unicode, 2012-07-08). Instead of turning it
into a static inline, just remove it.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-08-07 02:25:37 +02:00
|
|
|
static inline void flockfile(FILE *fh)
|
|
|
|
{
|
|
|
|
; /* nothing */
|
|
|
|
}
|
|
|
|
static inline void funlockfile(FILE *fh)
|
|
|
|
{
|
|
|
|
; /* nothing */
|
|
|
|
}
|
2015-04-16 10:48:45 +02:00
|
|
|
#define getc_unlocked(fh) getc(fh)
|
|
|
|
#endif
|
|
|
|
|
2019-02-12 15:14:41 +01:00
|
|
|
#ifdef FILENO_IS_A_MACRO
|
|
|
|
int git_fileno(FILE *stream);
|
2019-04-25 09:01:56 +02:00
|
|
|
# ifndef COMPAT_CODE_FILENO
|
2019-02-12 15:14:41 +01:00
|
|
|
# undef fileno
|
|
|
|
# define fileno(p) git_fileno(p)
|
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
|
2019-04-25 09:01:56 +02:00
|
|
|
#ifdef NEED_ACCESS_ROOT_HANDLER
|
|
|
|
int git_access(const char *path, int mode);
|
|
|
|
# ifndef COMPAT_CODE_ACCESS
|
|
|
|
# ifdef access
|
|
|
|
# undef access
|
|
|
|
# endif
|
|
|
|
# define access(path, mode) git_access(path, mode)
|
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
|
2017-05-26 05:09:01 +02:00
|
|
|
/*
|
|
|
|
* Our code often opens a path to an optional file, to work on its
|
|
|
|
* contents when we can successfully open it. We can ignore a failure
|
|
|
|
* to open if such an optional file does not exist, but we do want to
|
|
|
|
* report a failure in opening for other reasons (e.g. we got an I/O
|
|
|
|
* error, or the file is there, but we lack the permission to open).
|
|
|
|
*
|
|
|
|
* Call this function after seeing an error from open() or fopen() to
|
|
|
|
* see if the errno indicates a missing file that we can safely ignore.
|
|
|
|
*/
|
|
|
|
static inline int is_missing_file_error(int errno_)
|
|
|
|
{
|
|
|
|
return (errno_ == ENOENT || errno_ == ENOTDIR);
|
|
|
|
}
|
|
|
|
|
2019-04-29 10:28:14 +02:00
|
|
|
int cmd_main(int, const char **);
|
2016-10-27 19:30:30 +02:00
|
|
|
|
2019-02-22 23:25:01 +01:00
|
|
|
/*
|
|
|
|
* Intercept all calls to exit() and route them to trace2 to
|
|
|
|
* optionally emit a message before calling the real exit().
|
|
|
|
*/
|
|
|
|
int trace2_cmd_exit_fl(const char *file, int line, int code);
|
|
|
|
#define exit(code) exit(trace2_cmd_exit_fl(__FILE__, __LINE__, (code)))
|
|
|
|
|
add UNLEAK annotation for reducing leak false positives
It's a common pattern in git commands to allocate some
memory that should last for the lifetime of the program and
then not bother to free it, relying on the OS to throw it
away.
This keeps the code simple, and it's fast (we don't waste
time traversing structures or calling free at the end of the
program). But it also triggers warnings from memory-leak
checkers like valgrind or LSAN. They know that the memory
was still allocated at program exit, but they don't know
_when_ the leaked memory stopped being useful. If it was
early in the program, then it's probably a real and
important leak. But if it was used right up until program
exit, it's not an interesting leak and we'd like to suppress
it so that we can see the real leaks.
This patch introduces an UNLEAK() macro that lets us do so.
To understand its design, let's first look at some of the
alternatives.
Unfortunately the suppression systems offered by
leak-checking tools don't quite do what we want. A
leak-checker basically knows two things:
1. Which blocks were allocated via malloc, and the
callstack during the allocation.
2. Which blocks were left un-freed at the end of the
program (and which are unreachable, but more on that
later).
Their suppressions work by mentioning the function or
callstack of a particular allocation, and marking it as OK
to leak. So imagine you have code like this:
int cmd_foo(...)
{
/* this allocates some memory */
char *p = some_function();
printf("%s", p);
return 0;
}
You can say "ignore allocations from some_function(),
they're not leaks". But that's not right. That function may
be called elsewhere, too, and we would potentially want to
know about those leaks.
So you can say "ignore the callstack when main calls
some_function". That works, but your annotations are
brittle. In this case it's only two functions, but you can
imagine that the actual allocation is much deeper. If any of
the intermediate code changes, you have to update the
suppression.
What we _really_ want to say is that "the value assigned to
p at the end of the function is not a real leak". But
leak-checkers can't understand that; they don't know about
"p" in the first place.
However, we can do something a little bit tricky if we make
some assumptions about how leak-checkers work. They
generally don't just report all un-freed blocks. That would
report even globals which are still accessible when the
leak-check is run. Instead they take some set of memory
(like BSS) as a root and mark it as "reachable". Then they
scan the reachable blocks for anything that looks like a
pointer to a malloc'd block, and consider that block
reachable. And then they scan those blocks, and so on,
transitively marking anything reachable from a global as
"not leaked" (or at least leaked in a different category).
So we can mark the value of "p" as reachable by putting it
into a variable with program lifetime. One way to do that is
to just mark "p" as static. But that actually affects the
run-time behavior if the function is called twice (you
aren't likely to call main() twice, but some of our cmd_*()
functions are called from other commands).
Instead, we can trick the leak-checker by putting the value
into _any_ reachable bytes. This patch keeps a global
linked-list of bytes copied from "unleaked" variables. That
list is reachable even at program exit, which confers
recursive reachability on whatever values we unleak.
In other words, you can do:
int cmd_foo(...)
{
char *p = some_function();
printf("%s", p);
UNLEAK(p);
return 0;
}
to annotate "p" and suppress the leak report.
But wait, couldn't we just say "free(p)"? In this toy
example, yes. But UNLEAK()'s byte-copying strategy has
several advantages over actually freeing the memory:
1. It's recursive across structures. In many cases our "p"
is not just a pointer, but a complex struct whose
fields may have been allocated by a sub-function. And
in some cases (e.g., dir_struct) we don't even have a
function which knows how to free all of the struct
members.
By marking the struct itself as reachable, that confers
reachability on any pointers it contains (including those
found in embedded structs, or reachable by walking
heap blocks recursively.
2. It works on cases where we're not sure if the value is
allocated or not. For example:
char *p = argc > 1 ? argv[1] : some_function();
It's safe to use UNLEAK(p) here, because it's not
freeing any memory. In the case that we're pointing to
argv here, the reachability checker will just ignore
our bytes.
3. Likewise, it works even if the variable has _already_
been freed. We're just copying the pointer bytes. If
the block has been freed, the leak-checker will skip
over those bytes as uninteresting.
4. Because it's not actually freeing memory, you can
UNLEAK() before we are finished accessing the variable.
This is helpful in cases like this:
char *p = some_function();
return another_function(p);
Writing this with free() requires:
int ret;
char *p = some_function();
ret = another_function(p);
free(p);
return ret;
But with unleak we can just write:
char *p = some_function();
UNLEAK(p);
return another_function(p);
This patch adds the UNLEAK() macro and enables it
automatically when Git is compiled with SANITIZE=leak. In
normal builds it's a noop, so we pay no runtime cost.
It also adds some UNLEAK() annotations to show off how the
feature works. On top of other recent leak fixes, these are
enough to get t0000 and t0001 to pass when compiled with
LSAN.
Note the case in commit.c which actually converts a
strbuf_release() into an UNLEAK. This code was already
non-leaky, but the free didn't do anything useful, since
we're exiting. Converting it to an annotation means that
non-leak-checking builds pay no runtime cost. The cost is
minimal enough that it's probably not worth going on a
crusade to convert these kinds of frees to UNLEAKS. I did it
here for consistency with the "sb" leak (though it would
have been equally correct to go the other way, and turn them
both into strbuf_release() calls).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 08:38:41 +02:00
|
|
|
/*
|
|
|
|
* You can mark a stack variable with UNLEAK(var) to avoid it being
|
|
|
|
* reported as a leak by tools like LSAN or valgrind. The argument
|
|
|
|
* should generally be the variable itself (not its address and not what
|
|
|
|
* it points to). It's safe to use this on pointers which may already
|
|
|
|
* have been freed, or on pointers which may still be in use.
|
|
|
|
*
|
|
|
|
* Use this _only_ for a variable that leaks by going out of scope at
|
|
|
|
* program exit (so only from cmd_* functions or their direct helpers).
|
|
|
|
* Normal functions, especially those which may be called multiple
|
|
|
|
* times, should actually free their memory. This is only meant as
|
|
|
|
* an annotation, and does nothing in non-leak-checking builds.
|
|
|
|
*/
|
|
|
|
#ifdef SUPPRESS_ANNOTATED_LEAKS
|
2019-04-29 10:28:14 +02:00
|
|
|
void unleak_memory(const void *ptr, size_t len);
|
2017-09-20 00:10:04 +02:00
|
|
|
#define UNLEAK(var) unleak_memory(&(var), sizeof(var))
|
add UNLEAK annotation for reducing leak false positives
It's a common pattern in git commands to allocate some
memory that should last for the lifetime of the program and
then not bother to free it, relying on the OS to throw it
away.
This keeps the code simple, and it's fast (we don't waste
time traversing structures or calling free at the end of the
program). But it also triggers warnings from memory-leak
checkers like valgrind or LSAN. They know that the memory
was still allocated at program exit, but they don't know
_when_ the leaked memory stopped being useful. If it was
early in the program, then it's probably a real and
important leak. But if it was used right up until program
exit, it's not an interesting leak and we'd like to suppress
it so that we can see the real leaks.
This patch introduces an UNLEAK() macro that lets us do so.
To understand its design, let's first look at some of the
alternatives.
Unfortunately the suppression systems offered by
leak-checking tools don't quite do what we want. A
leak-checker basically knows two things:
1. Which blocks were allocated via malloc, and the
callstack during the allocation.
2. Which blocks were left un-freed at the end of the
program (and which are unreachable, but more on that
later).
Their suppressions work by mentioning the function or
callstack of a particular allocation, and marking it as OK
to leak. So imagine you have code like this:
int cmd_foo(...)
{
/* this allocates some memory */
char *p = some_function();
printf("%s", p);
return 0;
}
You can say "ignore allocations from some_function(),
they're not leaks". But that's not right. That function may
be called elsewhere, too, and we would potentially want to
know about those leaks.
So you can say "ignore the callstack when main calls
some_function". That works, but your annotations are
brittle. In this case it's only two functions, but you can
imagine that the actual allocation is much deeper. If any of
the intermediate code changes, you have to update the
suppression.
What we _really_ want to say is that "the value assigned to
p at the end of the function is not a real leak". But
leak-checkers can't understand that; they don't know about
"p" in the first place.
However, we can do something a little bit tricky if we make
some assumptions about how leak-checkers work. They
generally don't just report all un-freed blocks. That would
report even globals which are still accessible when the
leak-check is run. Instead they take some set of memory
(like BSS) as a root and mark it as "reachable". Then they
scan the reachable blocks for anything that looks like a
pointer to a malloc'd block, and consider that block
reachable. And then they scan those blocks, and so on,
transitively marking anything reachable from a global as
"not leaked" (or at least leaked in a different category).
So we can mark the value of "p" as reachable by putting it
into a variable with program lifetime. One way to do that is
to just mark "p" as static. But that actually affects the
run-time behavior if the function is called twice (you
aren't likely to call main() twice, but some of our cmd_*()
functions are called from other commands).
Instead, we can trick the leak-checker by putting the value
into _any_ reachable bytes. This patch keeps a global
linked-list of bytes copied from "unleaked" variables. That
list is reachable even at program exit, which confers
recursive reachability on whatever values we unleak.
In other words, you can do:
int cmd_foo(...)
{
char *p = some_function();
printf("%s", p);
UNLEAK(p);
return 0;
}
to annotate "p" and suppress the leak report.
But wait, couldn't we just say "free(p)"? In this toy
example, yes. But UNLEAK()'s byte-copying strategy has
several advantages over actually freeing the memory:
1. It's recursive across structures. In many cases our "p"
is not just a pointer, but a complex struct whose
fields may have been allocated by a sub-function. And
in some cases (e.g., dir_struct) we don't even have a
function which knows how to free all of the struct
members.
By marking the struct itself as reachable, that confers
reachability on any pointers it contains (including those
found in embedded structs, or reachable by walking
heap blocks recursively.
2. It works on cases where we're not sure if the value is
allocated or not. For example:
char *p = argc > 1 ? argv[1] : some_function();
It's safe to use UNLEAK(p) here, because it's not
freeing any memory. In the case that we're pointing to
argv here, the reachability checker will just ignore
our bytes.
3. Likewise, it works even if the variable has _already_
been freed. We're just copying the pointer bytes. If
the block has been freed, the leak-checker will skip
over those bytes as uninteresting.
4. Because it's not actually freeing memory, you can
UNLEAK() before we are finished accessing the variable.
This is helpful in cases like this:
char *p = some_function();
return another_function(p);
Writing this with free() requires:
int ret;
char *p = some_function();
ret = another_function(p);
free(p);
return ret;
But with unleak we can just write:
char *p = some_function();
UNLEAK(p);
return another_function(p);
This patch adds the UNLEAK() macro and enables it
automatically when Git is compiled with SANITIZE=leak. In
normal builds it's a noop, so we pay no runtime cost.
It also adds some UNLEAK() annotations to show off how the
feature works. On top of other recent leak fixes, these are
enough to get t0000 and t0001 to pass when compiled with
LSAN.
Note the case in commit.c which actually converts a
strbuf_release() into an UNLEAK. This code was already
non-leaky, but the free didn't do anything useful, since
we're exiting. Converting it to an annotation means that
non-leak-checking builds pay no runtime cost. The cost is
minimal enough that it's probably not worth going on a
crusade to convert these kinds of frees to UNLEAKS. I did it
here for consistency with the "sb" leak (though it would
have been equally correct to go the other way, and turn them
both into strbuf_release() calls).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 08:38:41 +02:00
|
|
|
#else
|
2017-09-20 00:10:04 +02:00
|
|
|
#define UNLEAK(var) do {} while (0)
|
add UNLEAK annotation for reducing leak false positives
It's a common pattern in git commands to allocate some
memory that should last for the lifetime of the program and
then not bother to free it, relying on the OS to throw it
away.
This keeps the code simple, and it's fast (we don't waste
time traversing structures or calling free at the end of the
program). But it also triggers warnings from memory-leak
checkers like valgrind or LSAN. They know that the memory
was still allocated at program exit, but they don't know
_when_ the leaked memory stopped being useful. If it was
early in the program, then it's probably a real and
important leak. But if it was used right up until program
exit, it's not an interesting leak and we'd like to suppress
it so that we can see the real leaks.
This patch introduces an UNLEAK() macro that lets us do so.
To understand its design, let's first look at some of the
alternatives.
Unfortunately the suppression systems offered by
leak-checking tools don't quite do what we want. A
leak-checker basically knows two things:
1. Which blocks were allocated via malloc, and the
callstack during the allocation.
2. Which blocks were left un-freed at the end of the
program (and which are unreachable, but more on that
later).
Their suppressions work by mentioning the function or
callstack of a particular allocation, and marking it as OK
to leak. So imagine you have code like this:
int cmd_foo(...)
{
/* this allocates some memory */
char *p = some_function();
printf("%s", p);
return 0;
}
You can say "ignore allocations from some_function(),
they're not leaks". But that's not right. That function may
be called elsewhere, too, and we would potentially want to
know about those leaks.
So you can say "ignore the callstack when main calls
some_function". That works, but your annotations are
brittle. In this case it's only two functions, but you can
imagine that the actual allocation is much deeper. If any of
the intermediate code changes, you have to update the
suppression.
What we _really_ want to say is that "the value assigned to
p at the end of the function is not a real leak". But
leak-checkers can't understand that; they don't know about
"p" in the first place.
However, we can do something a little bit tricky if we make
some assumptions about how leak-checkers work. They
generally don't just report all un-freed blocks. That would
report even globals which are still accessible when the
leak-check is run. Instead they take some set of memory
(like BSS) as a root and mark it as "reachable". Then they
scan the reachable blocks for anything that looks like a
pointer to a malloc'd block, and consider that block
reachable. And then they scan those blocks, and so on,
transitively marking anything reachable from a global as
"not leaked" (or at least leaked in a different category).
So we can mark the value of "p" as reachable by putting it
into a variable with program lifetime. One way to do that is
to just mark "p" as static. But that actually affects the
run-time behavior if the function is called twice (you
aren't likely to call main() twice, but some of our cmd_*()
functions are called from other commands).
Instead, we can trick the leak-checker by putting the value
into _any_ reachable bytes. This patch keeps a global
linked-list of bytes copied from "unleaked" variables. That
list is reachable even at program exit, which confers
recursive reachability on whatever values we unleak.
In other words, you can do:
int cmd_foo(...)
{
char *p = some_function();
printf("%s", p);
UNLEAK(p);
return 0;
}
to annotate "p" and suppress the leak report.
But wait, couldn't we just say "free(p)"? In this toy
example, yes. But UNLEAK()'s byte-copying strategy has
several advantages over actually freeing the memory:
1. It's recursive across structures. In many cases our "p"
is not just a pointer, but a complex struct whose
fields may have been allocated by a sub-function. And
in some cases (e.g., dir_struct) we don't even have a
function which knows how to free all of the struct
members.
By marking the struct itself as reachable, that confers
reachability on any pointers it contains (including those
found in embedded structs, or reachable by walking
heap blocks recursively.
2. It works on cases where we're not sure if the value is
allocated or not. For example:
char *p = argc > 1 ? argv[1] : some_function();
It's safe to use UNLEAK(p) here, because it's not
freeing any memory. In the case that we're pointing to
argv here, the reachability checker will just ignore
our bytes.
3. Likewise, it works even if the variable has _already_
been freed. We're just copying the pointer bytes. If
the block has been freed, the leak-checker will skip
over those bytes as uninteresting.
4. Because it's not actually freeing memory, you can
UNLEAK() before we are finished accessing the variable.
This is helpful in cases like this:
char *p = some_function();
return another_function(p);
Writing this with free() requires:
int ret;
char *p = some_function();
ret = another_function(p);
free(p);
return ret;
But with unleak we can just write:
char *p = some_function();
UNLEAK(p);
return another_function(p);
This patch adds the UNLEAK() macro and enables it
automatically when Git is compiled with SANITIZE=leak. In
normal builds it's a noop, so we pay no runtime cost.
It also adds some UNLEAK() annotations to show off how the
feature works. On top of other recent leak fixes, these are
enough to get t0000 and t0001 to pass when compiled with
LSAN.
Note the case in commit.c which actually converts a
strbuf_release() into an UNLEAK. This code was already
non-leaky, but the free didn't do anything useful, since
we're exiting. Converting it to an annotation means that
non-leak-checking builds pay no runtime cost. The cost is
minimal enough that it's probably not worth going on a
crusade to convert these kinds of frees to UNLEAKS. I did it
here for consistency with the "sb" leak (though it would
have been equally correct to go the other way, and turn them
both into strbuf_release() calls).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 08:38:41 +02:00
|
|
|
#endif
|
|
|
|
|
automatically ban strcpy()
There are a few standard C functions (like strcpy) which are
easy to misuse. E.g.:
char path[PATH_MAX];
strcpy(path, arg);
may overflow the "path" buffer. Sometimes there's an earlier
constraint on the size of "arg", but even in such a case
it's hard to verify that the code is correct. If the size
really is unbounded, you're better off using a dynamic
helper like strbuf:
struct strbuf path = STRBUF_INIT;
strbuf_addstr(path, arg);
or if it really is bounded, then use xsnprintf to show your
expectation (and get a run-time assertion):
char path[PATH_MAX];
xsnprintf(path, sizeof(path), "%s", arg);
which makes further auditing easier.
We'd usually catch undesirable code like this in a review,
but there's no automated enforcement. Adding that
enforcement can help us be more consistent and save effort
(and a round-trip) during review.
This patch teaches the compiler to report an error when it
sees strcpy (and will become a model for banning a few other
functions). This has a few advantages over a separate
linting tool:
1. We know it's run as part of a build cycle, so it's
hard to ignore. Whereas an external linter is an extra
step the developer needs to remember to do.
2. Likewise, it's basically free since the compiler is
parsing the code anyway.
3. We know it's robust against false positives (unlike a
grep-based linter).
The two big disadvantages are:
1. We'll only check code that is actually compiled, so it
may miss code that isn't triggered on your particular
system. But since presumably people don't add new code
without compiling it (and if they do, the banned
function list is the least of their worries), we really
only care about failing to clean up old code when
adding new functions to the list. And that's easy
enough to address with a manual audit when adding a new
function (which is what I did for the functions here).
2. If this ends up generating false positives, it's going
to be harder to disable (as opposed to a separate
linter, which may have mechanisms for overriding a
particular case).
But the intent is to only ban functions which are
obviously bad, and for which we accept using an
alternative even when this particular use isn't buggy
(e.g., the xsnprintf alternative above).
The implementation here is simple: we'll define a macro for
the banned function which replaces it with a reference to a
descriptively named but undeclared identifier. Replacing it
with any invalid code would work (since we just want to
break compilation). But ideally we'd meet these goals:
- it should be portable; ideally this would trigger
everywhere, and does not need to be part of a DEVELOPER=1
setup (because unlike warnings which may depend on the
compiler or system, this is a clear indicator of
something wrong in the code).
- it should generate a readable error that gives the
developer a clue what happened
- it should avoid generating too much other cruft that
makes it hard to see the actual error
- it should mention the original callsite in the error
The output with this patch looks like this (using gcc 7, on
a checkout with 022d2ac1f3 reverted, which removed the final
strcpy from blame.c):
CC builtin/blame.o
In file included from ./git-compat-util.h:1246,
from ./cache.h:4,
from builtin/blame.c:8:
builtin/blame.c: In function ‘cmd_blame’:
./banned.h:11:22: error: ‘sorry_strcpy_is_a_banned_function’ undeclared (first use in this function)
#define BANNED(func) sorry_##func##_is_a_banned_function
^~~~~~
./banned.h:14:21: note: in expansion of macro ‘BANNED’
#define strcpy(x,y) BANNED(strcpy)
^~~~~~
builtin/blame.c:1074:4: note: in expansion of macro ‘strcpy’
strcpy(repeated_meta_color, GIT_COLOR_CYAN);
^~~~~~
./banned.h:11:22: note: each undeclared identifier is reported only once for each function it appears in
#define BANNED(func) sorry_##func##_is_a_banned_function
^~~~~~
./banned.h:14:21: note: in expansion of macro ‘BANNED’
#define strcpy(x,y) BANNED(strcpy)
^~~~~~
builtin/blame.c:1074:4: note: in expansion of macro ‘strcpy’
strcpy(repeated_meta_color, GIT_COLOR_CYAN);
^~~~~~
This prominently shows the phrase "strcpy is a banned
function", along with the original callsite in blame.c and
the location of the ban code in banned.h. Which should be
enough to get even a developer seeing this for the first
time pointed in the right direction.
This doesn't match our ideals perfectly, but it's a pretty
good balance. A few alternatives I tried:
1. Instead of using an undeclared variable, using an
undeclared function. This shortens the message, because
the "each undeclared identifier" message is not needed
(and as you can see above, it triggers a separate
mention of each of the expansion points).
But it doesn't actually stop compilation unless you use
-Werror=implicit-function-declaration in your CFLAGS.
This is the case for DEVELOPER=1, but not for a default
build (on the other hand, we'd eventually produce a
link error pointing to the correct source line with the
descriptive name).
2. The linux kernel uses a similar mechanism in its
BUILD_BUG_ON_MSG(), where they actually declare the
function but do so with gcc's error attribute. But
that's not portable to other compilers (and it also
runs afoul of our error() macro).
We could make a gcc-specific technique and fallback on
other compilers, but it's probably not worth the
complexity. It also isn't significantly shorter than
the error message shown above.
3. We could drop the BANNED() macro, which would shorten
the number of lines in the error. But curiously,
removing it (and just expanding strcpy directly to the
bogus identifier) causes gcc _not_ to report the
original line of code.
So this strategy seems to be an acceptable mix of
information, portability, simplicity, and robustness,
without _too_ much extra clutter. I also tested it with
clang, and it looks as good (actually, slightly less
cluttered than with gcc).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-26 09:21:05 +02:00
|
|
|
/*
|
|
|
|
* This include must come after system headers, since it introduces macros that
|
|
|
|
* replace system names.
|
|
|
|
*/
|
|
|
|
#include "banned.h"
|
|
|
|
|
2019-10-07 01:30:33 +02:00
|
|
|
/*
|
|
|
|
* container_of - Get the address of an object containing a field.
|
|
|
|
*
|
|
|
|
* @ptr: pointer to the field.
|
|
|
|
* @type: type of the object.
|
|
|
|
* @member: name of the field within the object.
|
|
|
|
*/
|
|
|
|
#define container_of(ptr, type, member) \
|
|
|
|
((type *) ((char *)(ptr) - offsetof(type, member)))
|
|
|
|
|
2019-10-07 01:30:35 +02:00
|
|
|
/*
|
|
|
|
* helper function for `container_of_or_null' to avoid multiple
|
|
|
|
* evaluation of @ptr
|
|
|
|
*/
|
|
|
|
static inline void *container_of_or_null_offset(void *ptr, size_t offset)
|
|
|
|
{
|
|
|
|
return ptr ? (char *)ptr - offset : NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* like `container_of', but allows returned value to be NULL
|
|
|
|
*/
|
|
|
|
#define container_of_or_null(ptr, type, member) \
|
|
|
|
(type *)container_of_or_null_offset(ptr, offsetof(type, member))
|
|
|
|
|
2019-10-07 01:30:41 +02:00
|
|
|
/*
|
2021-06-11 13:18:50 +02:00
|
|
|
* like offsetof(), but takes a pointer to a variable of type which
|
2019-10-07 01:30:41 +02:00
|
|
|
* contains @member, instead of a specified type.
|
|
|
|
* @ptr is subject to multiple evaluation since we can't rely on __typeof__
|
|
|
|
* everywhere.
|
|
|
|
*/
|
|
|
|
#if defined(__GNUC__) /* clang sets this, too */
|
|
|
|
#define OFFSETOF_VAR(ptr, member) offsetof(__typeof__(*ptr), member)
|
|
|
|
#else /* !__GNUC__ */
|
|
|
|
#define OFFSETOF_VAR(ptr, member) \
|
|
|
|
((uintptr_t)&(ptr)->member - (uintptr_t)(ptr))
|
|
|
|
#endif /* !__GNUC__ */
|
|
|
|
|
2020-11-24 20:10:11 +01:00
|
|
|
void sleep_millisec(int millisec);
|
|
|
|
|
2016-10-27 19:30:30 +02:00
|
|
|
#endif
|