git-commit-vandalism/git-compat-util.h

1271 lines
34 KiB
C
Raw Normal View History

#ifndef GIT_COMPAT_UTIL_H
#define GIT_COMPAT_UTIL_H
#define _FILE_OFFSET_BITS 64
/* Derived from Linux "Features Test Macro" header
* Convenience macros to test the versions of gcc (or
* a compatible compiler).
* Use them like this:
* #if GIT_GNUC_PREREQ (2,8)
* ... code requiring gcc 2.8 or later ...
* #endif
*/
#if defined(__GNUC__) && defined(__GNUC_MINOR__)
# define GIT_GNUC_PREREQ(maj, min) \
((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
#else
#define GIT_GNUC_PREREQ(maj, min) 0
#endif
#ifndef FLEX_ARRAY
/*
* See if our compiler is known to support flexible array members.
*/
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && (!defined(__SUNPRO_C) || (__SUNPRO_C > 0x580))
# define FLEX_ARRAY /* empty */
#elif defined(__GNUC__)
# if (__GNUC__ >= 3)
# define FLEX_ARRAY /* empty */
# else
# define FLEX_ARRAY 0 /* older GNU extension */
# endif
#endif
/*
* Otherwise, default to safer but a bit wasteful traditional style
*/
#ifndef FLEX_ARRAY
# define FLEX_ARRAY 1
#endif
#endif
/*
* BUILD_ASSERT_OR_ZERO - assert a build-time dependency, as an expression.
* @cond: the compile-time condition which must be true.
*
* Your compile will fail if the condition isn't true, or can't be evaluated
* by the compiler. This can be used in an expression: its value is "0".
*
* Example:
* #define foo_to_char(foo) \
* ((char *)(foo) \
* + BUILD_ASSERT_OR_ZERO(offsetof(struct foo, string) == 0))
*/
#define BUILD_ASSERT_OR_ZERO(cond) \
(sizeof(char [1 - 2*!(cond)]) - 1)
#if GIT_GNUC_PREREQ(3, 1)
/* &arr[0] degrades to a pointer: a different type from an array */
# define BARF_UNLESS_AN_ARRAY(arr) \
BUILD_ASSERT_OR_ZERO(!__builtin_types_compatible_p(__typeof__(arr), \
__typeof__(&(arr)[0])))
#else
# define BARF_UNLESS_AN_ARRAY(arr) 0
#endif
/*
* ARRAY_SIZE - get the number of elements in a visible array
* <at> x: the array whose size you want.
*
* This does not work on pointers, or arrays declared as [], or
* function parameters. With correct compiler support, such usage
* will cause a build error (see the build_assert_or_zero macro).
*/
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]) + BARF_UNLESS_AN_ARRAY(x))
#define bitsizeof(x) (CHAR_BIT * sizeof(x))
#define maximum_signed_value_of_type(a) \
(INTMAX_MAX >> (bitsizeof(intmax_t) - bitsizeof(a)))
#define maximum_unsigned_value_of_type(a) \
(UINTMAX_MAX >> (bitsizeof(uintmax_t) - bitsizeof(a)))
/*
* Signed integer overflow is undefined in C, so here's a helper macro
* to detect if the sum of two integers will overflow.
*
* Requires: a >= 0, typeof(a) equals typeof(b)
*/
#define signed_add_overflows(a, b) \
((b) > maximum_signed_value_of_type(a) - (a))
#define unsigned_add_overflows(a, b) \
((b) > maximum_unsigned_value_of_type(a) - (a))
/*
* Returns true if the multiplication of "a" and "b" will
* overflow. The types of "a" and "b" must match and must be unsigned.
* Note that this macro evaluates "a" twice!
*/
#define unsigned_mult_overflows(a, b) \
((a) && (b) > maximum_unsigned_value_of_type(a) / (a))
#ifdef __GNUC__
#define TYPEOF(x) (__typeof__(x))
#else
#define TYPEOF(x)
#endif
#define MSB(x, bits) ((x) & TYPEOF(x)(~0ULL << (bitsizeof(x) - (bits))))
#define HAS_MULTI_BITS(i) ((i) & ((i) - 1)) /* checks if an integer has more than 1 bit set */
#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
/* Approximation of the length of the decimal representation of this type. */
#define decimal_length(x) ((int)(sizeof(x) * 2.56 + 0.5) + 1)
#if defined(__sun__)
/*
* On Solaris, when _XOPEN_EXTENDED is set, its header file
* forces the programs to be XPG4v2, defeating any _XOPEN_SOURCE
* setting to say we are XPG5 or XPG6. Also on Solaris,
* XPG6 programs must be compiled with a c99 compiler, while
* non XPG6 programs must be compiled with a pre-c99 compiler.
*/
# if __STDC_VERSION__ - 0 >= 199901L
# define _XOPEN_SOURCE 600
# else
# define _XOPEN_SOURCE 500
# endif
#elif !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__USLC__) && \
!defined(_M_UNIX) && !defined(__sgi) && !defined(__DragonFly__) && \
!defined(__TANDEM) && !defined(__QNX__) && !defined(__MirBSD__) && \
!defined(__CYGWIN__)
#define _XOPEN_SOURCE 600 /* glibc2 and AIX 5.3L need 500, OpenBSD needs 600 for S_ISLNK() */
#define _XOPEN_SOURCE_EXTENDED 1 /* AIX 5.3L needs this */
#endif
#define _ALL_SOURCE 1
#define _GNU_SOURCE 1
#define _BSD_SOURCE 1
#define _DEFAULT_SOURCE 1
#define _NETBSD_SOURCE 1
#define _SGI_SOURCE 1
#if defined(WIN32) && !defined(__CYGWIN__) /* Both MinGW and MSVC */
# if !defined(_WIN32_WINNT)
# define _WIN32_WINNT 0x0600
# endif
#define WIN32_LEAN_AND_MEAN /* stops windows.h including winsock.h */
#include <winsock2.h>
#include <windows.h>
#define GIT_WINDOWS_NATIVE
#endif
#include <unistd.h>
#include <stdio.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#ifdef HAVE_STRINGS_H
#include <strings.h> /* for strcasecmp() */
#endif
#include <errno.h>
#include <limits.h>
#ifdef NEEDS_SYS_PARAM_H
#include <sys/param.h>
#endif
#include <sys/types.h>
#include <dirent.h>
#include <sys/time.h>
#include <time.h>
#include <signal.h>
#include <assert.h>
#include <regex.h>
#include <utime.h>
#include <syslog.h>
#if !defined(NO_POLL_H)
#include <poll.h>
#elif !defined(NO_SYS_POLL_H)
#include <sys/poll.h>
#else
/* Pull the compat stuff */
#include <poll.h>
#endif
#ifdef HAVE_BSD_SYSCTL
#include <sys/sysctl.h>
#endif
#if defined(__CYGWIN__)
#include "compat/cygwin.h"
#endif
#if defined(__MINGW32__)
/* pull in Windows compatibility stuff */
#include "compat/mingw.h"
#elif defined(_MSC_VER)
#include "compat/msvc.h"
#else
#include <sys/utsname.h>
#include <sys/wait.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <termios.h>
#ifndef NO_SYS_SELECT_H
#include <sys/select.h>
#endif
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <pwd.h>
#include <sys/un.h>
#ifndef NO_INTTYPES_H
#include <inttypes.h>
#else
#include <stdint.h>
#endif
#ifdef NO_INTPTR_T
/*
* On I16LP32, ILP32 and LP64 "long" is the safe bet, however
* on LLP86, IL33LLP64 and P64 it needs to be "long long",
* while on IP16 and IP16L32 it is "int" (resp. "short")
* Size needs to match (or exceed) 'sizeof(void *)'.
* We can't take "long long" here as not everybody has it.
*/
typedef long intptr_t;
typedef unsigned long uintptr_t;
#endif
#undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */
#include <grp.h>
#define _ALL_SOURCE 1
#endif
git on Mac OS and precomposed unicode Mac OS X mangles file names containing unicode on file systems HFS+, VFAT or SAMBA. When a file using unicode code points outside ASCII is created on a HFS+ drive, the file name is converted into decomposed unicode and written to disk. No conversion is done if the file name is already decomposed unicode. Calling open("\xc3\x84", ...) with a precomposed "Ä" yields the same result as open("\x41\xcc\x88",...) with a decomposed "Ä". As a consequence, readdir() returns the file names in decomposed unicode, even if the user expects precomposed unicode. Unlike on HFS+, Mac OS X stores files on a VFAT drive (e.g. an USB drive) in precomposed unicode, but readdir() still returns file names in decomposed unicode. When a git repository is stored on a network share using SAMBA, file names are send over the wire and written to disk on the remote system in precomposed unicode, but Mac OS X readdir() returns decomposed unicode to be compatible with its behaviour on HFS+ and VFAT. The unicode decomposition causes many problems: - The names "git add" and other commands get from the end user may often be precomposed form (the decomposed form is not easily input from the keyboard), but when the commands read from the filesystem to see what it is going to update the index with already is on the filesystem, readdir() will give decomposed form, which is different. - Similarly "git log", "git mv" and all other commands that need to compare pathnames found on the command line (often but not always precomposed form; a command line input resulting from globbing may be in decomposed) with pathnames found in the tree objects (should be precomposed form to be compatible with other systems and for consistency in general). - The same for names stored in the index, which should be precomposed, that may need to be compared with the names read from readdir(). NFS mounted from Linux is fully transparent and does not suffer from the above. As Mac OS X treats precomposed and decomposed file names as equal, we can - wrap readdir() on Mac OS X to return the precomposed form, and - normalize decomposed form given from the command line also to the precomposed form, to ensure that all pathnames used in Git are always in the precomposed form. This behaviour can be requested by setting "core.precomposedunicode" configuration variable to true. The code in compat/precomposed_utf8.c implements basically 4 new functions: precomposed_utf8_opendir(), precomposed_utf8_readdir(), precomposed_utf8_closedir() and precompose_argv(). The first three are to wrap opendir(3), readdir(3), and closedir(3) functions. The argv[] conversion allows to use the TAB filename completion done by the shell on command line. It tolerates other tools which use readdir() to feed decomposed file names into git. When creating a new git repository with "git init" or "git clone", "core.precomposedunicode" will be set "false". The user needs to activate this feature manually. She typically sets core.precomposedunicode to "true" on HFS and VFAT, or file systems mounted via SAMBA. Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Torsten Bögershausen <tboegi@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-07-08 15:50:25 +02:00
/* used on Mac OS X */
#ifdef PRECOMPOSE_UNICODE
#include "compat/precompose_utf8.h"
#else
#define precompose_str(in,i_nfd2nfc)
#define precompose_argv(c,v)
#define probe_utf8_pathname_composition()
git on Mac OS and precomposed unicode Mac OS X mangles file names containing unicode on file systems HFS+, VFAT or SAMBA. When a file using unicode code points outside ASCII is created on a HFS+ drive, the file name is converted into decomposed unicode and written to disk. No conversion is done if the file name is already decomposed unicode. Calling open("\xc3\x84", ...) with a precomposed "Ä" yields the same result as open("\x41\xcc\x88",...) with a decomposed "Ä". As a consequence, readdir() returns the file names in decomposed unicode, even if the user expects precomposed unicode. Unlike on HFS+, Mac OS X stores files on a VFAT drive (e.g. an USB drive) in precomposed unicode, but readdir() still returns file names in decomposed unicode. When a git repository is stored on a network share using SAMBA, file names are send over the wire and written to disk on the remote system in precomposed unicode, but Mac OS X readdir() returns decomposed unicode to be compatible with its behaviour on HFS+ and VFAT. The unicode decomposition causes many problems: - The names "git add" and other commands get from the end user may often be precomposed form (the decomposed form is not easily input from the keyboard), but when the commands read from the filesystem to see what it is going to update the index with already is on the filesystem, readdir() will give decomposed form, which is different. - Similarly "git log", "git mv" and all other commands that need to compare pathnames found on the command line (often but not always precomposed form; a command line input resulting from globbing may be in decomposed) with pathnames found in the tree objects (should be precomposed form to be compatible with other systems and for consistency in general). - The same for names stored in the index, which should be precomposed, that may need to be compared with the names read from readdir(). NFS mounted from Linux is fully transparent and does not suffer from the above. As Mac OS X treats precomposed and decomposed file names as equal, we can - wrap readdir() on Mac OS X to return the precomposed form, and - normalize decomposed form given from the command line also to the precomposed form, to ensure that all pathnames used in Git are always in the precomposed form. This behaviour can be requested by setting "core.precomposedunicode" configuration variable to true. The code in compat/precomposed_utf8.c implements basically 4 new functions: precomposed_utf8_opendir(), precomposed_utf8_readdir(), precomposed_utf8_closedir() and precompose_argv(). The first three are to wrap opendir(3), readdir(3), and closedir(3) functions. The argv[] conversion allows to use the TAB filename completion done by the shell on command line. It tolerates other tools which use readdir() to feed decomposed file names into git. When creating a new git repository with "git init" or "git clone", "core.precomposedunicode" will be set "false". The user needs to activate this feature manually. She typically sets core.precomposedunicode to "true" on HFS and VFAT, or file systems mounted via SAMBA. Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Torsten Bögershausen <tboegi@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-07-08 15:50:25 +02:00
#endif
#ifdef MKDIR_WO_TRAILING_SLASH
#define mkdir(a,b) compat_mkdir_wo_trailing_slash((a),(b))
extern int compat_mkdir_wo_trailing_slash(const char*, mode_t);
#endif
#ifdef NO_STRUCT_ITIMERVAL
struct itimerval {
struct timeval it_interval;
struct timeval it_value;
};
#endif
#ifdef NO_SETITIMER
#define setitimer(which,value,ovalue)
#endif
#ifndef NO_LIBGEN_H
#include <libgen.h>
#else
#define basename gitbasename
extern char *gitbasename(char *);
#define dirname gitdirname
extern char *gitdirname(char *);
#endif
#ifndef NO_ICONV
#include <iconv.h>
#endif
#ifndef NO_OPENSSL
#ifdef __APPLE__
git-compat-util: suppress unavoidable Apple-specific deprecation warnings With the release of Mac OS X 10.7 in July 2011, Apple deprecated all openssl.h functionality due to OpenSSL ABI (application binary interface) instability, resulting in an explosion of compilation warnings about deprecated SSL, SHA1, and X509 functions (among others). 61067954ce (cache.h: eliminate SHA-1 deprecation warnings on Mac OS X; 2013-05-19) and be4c828b76 (imap-send: eliminate HMAC deprecation warnings on Mac OS X; 2013-05-19) attempted to ameliorate the situation by taking advantage of drop-in replacement functionality provided by Apple's (ABI-stable) CommonCrypto facility, however CommonCrypto supplies only a subset of deprecated OpenSSL functionality, thus a host of warnings remain. Despite this shortcoming, it was hoped that Apple would ultimately provide CommonCrypto replacements for all deprecated OpenSSL functionality, and that the effort started by 61067954ce and be4c828b76 would be continued and eventually eliminate all deprecation warnings. However, now 3.5 years later, and with Mac OS X at 10.10, the hoped-for CommonCrypto replacements have not yet materialized, nor is there any indication that they will be forthcoming. These Apple-specific warnings are pure noise: they don't tell us anything useful and we have no control over them, nor is Apple likely to provide replacements any time soon. Such noise may obscure other legitimate warnings, therefore silence them. Signed-off-by: Eric Sunshine <sunshine@sunshineco.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-12-17 00:19:36 +01:00
#define __AVAILABILITY_MACROS_USES_AVAILABILITY 0
#include <AvailabilityMacros.h>
#undef DEPRECATED_ATTRIBUTE
#define DEPRECATED_ATTRIBUTE
#undef __AVAILABILITY_MACROS_USES_AVAILABILITY
#endif
#include <openssl/ssl.h>
#include <openssl/err.h>
#endif
#ifdef HAVE_SYSINFO
# include <sys/sysinfo.h>
#endif
/* On most systems <netdb.h> would have given us this, but
* not on some systems (e.g. z/OS).
*/
#ifndef NI_MAXHOST
#define NI_MAXHOST 1025
#endif
#ifndef NI_MAXSERV
#define NI_MAXSERV 32
#endif
/* On most systems <limits.h> would have given us this, but
* not on some systems (e.g. GNU/Hurd).
*/
#ifndef PATH_MAX
#define PATH_MAX 4096
#endif
#ifndef PRIuMAX
#define PRIuMAX "llu"
#endif
#ifndef SCNuMAX
#define SCNuMAX PRIuMAX
#endif
#ifndef PRIu32
#define PRIu32 "u"
#endif
#ifndef PRIx32
#define PRIx32 "x"
#endif
#ifndef PRIo32
#define PRIo32 "o"
#endif
typedef uintmax_t timestamp_t;
#define PRItime PRIuMAX
#define parse_timestamp strtoumax
#define TIME_MAX UINTMAX_MAX
#ifndef PATH_SEP
#define PATH_SEP ':'
#endif
#ifdef HAVE_PATHS_H
#include <paths.h>
#endif
#ifndef _PATH_DEFPATH
#define _PATH_DEFPATH "/usr/local/bin:/usr/bin:/bin"
#endif
#ifndef platform_core_config
static inline int noop_core_config(const char *var, const char *value, void *cb)
{
return 0;
}
#define platform_core_config noop_core_config
#endif
#ifndef has_dos_drive_prefix
static inline int git_has_dos_drive_prefix(const char *path)
{
return 0;
}
#define has_dos_drive_prefix git_has_dos_drive_prefix
#endif
#ifndef skip_dos_drive_prefix
static inline int git_skip_dos_drive_prefix(char **path)
{
return 0;
}
#define skip_dos_drive_prefix git_skip_dos_drive_prefix
#endif
#ifndef is_dir_sep
static inline int git_is_dir_sep(int c)
{
return c == '/';
}
#define is_dir_sep git_is_dir_sep
#endif
#ifndef offset_1st_component
static inline int git_offset_1st_component(const char *path)
{
return is_dir_sep(path[0]);
}
#define offset_1st_component git_offset_1st_component
#endif
mingw: refuse to access paths with trailing spaces or periods When creating a directory on Windows whose path ends in a space or a period (or chains thereof), the Win32 API "helpfully" trims those. For example, `mkdir("abc ");` will return success, but actually create a directory called `abc` instead. This stems back to the DOS days, when all file names had exactly 8 characters plus exactly 3 characters for the file extension, and the only way to have shorter names was by padding with spaces. Sadly, this "helpful" behavior is a bit inconsistent: after a successful `mkdir("abc ");`, a `mkdir("abc /def")` will actually _fail_ (because the directory `abc ` does not actually exist). Even if it would work, we now have a serious problem because a Git repository could contain directories `abc` and `abc `, and on Windows, they would be "merged" unintentionally. As these paths are illegal on Windows, anyway, let's disallow any accesses to such paths on that Operating System. For practical reasons, this behavior is still guarded by the config setting `core.protectNTFS`: it is possible (and at least two regression tests make use of it) to create commits without involving the worktree. In such a scenario, it is of course possible -- even on Windows -- to create such file names. Among other consequences, this patch disallows submodules' paths to end in spaces on Windows (which would formerly have confused Git enough to try to write into incorrect paths, anyway). While this patch does not fix a vulnerability on its own, it prevents an attack vector that was exploited in demonstrations of a number of recently-fixed security bugs. The regression test added to `t/t7417-submodule-path-url.sh` reflects that attack vector. Note that we have to adjust the test case "prevent git~1 squatting on Windows" in `t/t7415-submodule-names.sh` because of a very subtle issue. It tries to clone two submodules whose names differ only in a trailing period character, and as a consequence their git directories differ in the same way. Previously, when Git tried to clone the second submodule, it thought that the git directory already existed (because on Windows, when you create a directory with the name `b.` it actually creates `b`), but with this patch, the first submodule's clone will fail because of the illegal name of the git directory. Therefore, when cloning the second submodule, Git will take a different code path: a fresh clone (without an existing git directory). Both code paths fail to clone the second submodule, both because the the corresponding worktree directory exists and is not empty, but the error messages are worded differently. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
2019-09-05 13:27:53 +02:00
#ifndef is_valid_path
#define is_valid_path(path) 1
#endif
#ifndef find_last_dir_sep
static inline char *git_find_last_dir_sep(const char *path)
{
return strrchr(path, '/');
}
#define find_last_dir_sep git_find_last_dir_sep
#endif
#ifndef query_user_email
#define query_user_email() NULL
#endif
#if defined(__HP_cc) && (__HP_cc >= 61000)
#define NORETURN __attribute__((noreturn))
#define NORETURN_PTR
#elif defined(__GNUC__) && !defined(NO_NORETURN)
#define NORETURN __attribute__((__noreturn__))
#define NORETURN_PTR __attribute__((__noreturn__))
#elif defined(_MSC_VER)
#define NORETURN __declspec(noreturn)
#define NORETURN_PTR
#else
#define NORETURN
#define NORETURN_PTR
#ifndef __GNUC__
#ifndef __attribute__
#define __attribute__(x)
#endif
#endif
#endif
/* The sentinel attribute is valid from gcc version 4.0 */
#if defined(__GNUC__) && (__GNUC__ >= 4)
#define LAST_ARG_MUST_BE_NULL __attribute__((sentinel))
#else
#define LAST_ARG_MUST_BE_NULL
#endif
#define MAYBE_UNUSED __attribute__((__unused__))
#include "compat/bswap.h"
#include "wildmatch.h"
struct strbuf;
/* General helper functions */
extern void vreportf(const char *prefix, const char *err, va_list params);
extern NORETURN void usage(const char *err);
extern NORETURN void usagef(const char *err, ...) __attribute__((format (printf, 1, 2)));
extern NORETURN void die(const char *err, ...) __attribute__((format (printf, 1, 2)));
extern NORETURN void die_errno(const char *err, ...) __attribute__((format (printf, 1, 2)));
extern int error(const char *err, ...) __attribute__((format (printf, 1, 2)));
extern int error_errno(const char *err, ...) __attribute__((format (printf, 1, 2)));
extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
extern void warning_errno(const char *err, ...) __attribute__((format (printf, 1, 2)));
#ifndef NO_OPENSSL
#ifdef APPLE_COMMON_CRYPTO
#include "compat/apple-common-crypto.h"
#else
#include <openssl/evp.h>
#include <openssl/hmac.h>
#endif /* APPLE_COMMON_CRYPTO */
#include <openssl/x509v3.h>
#endif /* NO_OPENSSL */
make error()'s constant return value more visible When git is compiled with "gcc -Wuninitialized -O3", some inlined calls provide an additional opportunity for the compiler to do static analysis on variable initialization. For example, with two functions like this: int get_foo(int *foo) { if (something_that_might_fail() < 0) return error("unable to get foo"); *foo = 0; return 0; } void some_fun(void) { int foo; if (get_foo(&foo) < 0) return -1; printf("foo is %d\n", foo); } If get_foo() is not inlined, then when compiling some_fun, gcc sees only that a pointer to the local variable is passed, and must assume that it is an out parameter that is initialized after get_foo returns. However, when get_foo() is inlined, the compiler may look at all of the code together and see that some code paths in get_foo() do not initialize the variable. As a result, it prints a warning. But what the compiler can't see is that error() always returns -1, and therefore we know that either we return early from some_fun, or foo ends up initialized, and the code is safe. The warning is a false positive. If we can make the compiler aware that error() will always return -1, it can do a better job of analysis. The simplest method would be to inline the error() function. However, this doesn't work, because gcc will not inline a variadc function. We can work around this by defining a macro. This relies on two gcc extensions: 1. Variadic macros (these are present in C99, but we do not rely on that). 2. Gcc treats the "##" paste operator specially between a comma and __VA_ARGS__, which lets our variadic macro work even if no format parameters are passed to error(). Since we are using these extra features, we hide the macro behind an #ifdef. This is OK, though, because our goal was just to help gcc. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-12-15 18:37:36 +01:00
/*
* Let callers be aware of the constant return value; this can help
* gcc with -Wuninitialized analysis. We restrict this trick to gcc, though,
* because some compilers may not support variadic macros. Since we're only
* trying to help gcc, anyway, it's OK; other compilers will fall back to
* using the function as usual.
make error()'s constant return value more visible When git is compiled with "gcc -Wuninitialized -O3", some inlined calls provide an additional opportunity for the compiler to do static analysis on variable initialization. For example, with two functions like this: int get_foo(int *foo) { if (something_that_might_fail() < 0) return error("unable to get foo"); *foo = 0; return 0; } void some_fun(void) { int foo; if (get_foo(&foo) < 0) return -1; printf("foo is %d\n", foo); } If get_foo() is not inlined, then when compiling some_fun, gcc sees only that a pointer to the local variable is passed, and must assume that it is an out parameter that is initialized after get_foo returns. However, when get_foo() is inlined, the compiler may look at all of the code together and see that some code paths in get_foo() do not initialize the variable. As a result, it prints a warning. But what the compiler can't see is that error() always returns -1, and therefore we know that either we return early from some_fun, or foo ends up initialized, and the code is safe. The warning is a false positive. If we can make the compiler aware that error() will always return -1, it can do a better job of analysis. The simplest method would be to inline the error() function. However, this doesn't work, because gcc will not inline a variadc function. We can work around this by defining a macro. This relies on two gcc extensions: 1. Variadic macros (these are present in C99, but we do not rely on that). 2. Gcc treats the "##" paste operator specially between a comma and __VA_ARGS__, which lets our variadic macro work even if no format parameters are passed to error(). Since we are using these extra features, we hide the macro behind an #ifdef. This is OK, though, because our goal was just to help gcc. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-12-15 18:37:36 +01:00
*/
#if defined(__GNUC__)
static inline int const_error(void)
{
return -1;
}
#define error(...) (error(__VA_ARGS__), const_error())
#define error_errno(...) (error_errno(__VA_ARGS__), const_error())
make error()'s constant return value more visible When git is compiled with "gcc -Wuninitialized -O3", some inlined calls provide an additional opportunity for the compiler to do static analysis on variable initialization. For example, with two functions like this: int get_foo(int *foo) { if (something_that_might_fail() < 0) return error("unable to get foo"); *foo = 0; return 0; } void some_fun(void) { int foo; if (get_foo(&foo) < 0) return -1; printf("foo is %d\n", foo); } If get_foo() is not inlined, then when compiling some_fun, gcc sees only that a pointer to the local variable is passed, and must assume that it is an out parameter that is initialized after get_foo returns. However, when get_foo() is inlined, the compiler may look at all of the code together and see that some code paths in get_foo() do not initialize the variable. As a result, it prints a warning. But what the compiler can't see is that error() always returns -1, and therefore we know that either we return early from some_fun, or foo ends up initialized, and the code is safe. The warning is a false positive. If we can make the compiler aware that error() will always return -1, it can do a better job of analysis. The simplest method would be to inline the error() function. However, this doesn't work, because gcc will not inline a variadc function. We can work around this by defining a macro. This relies on two gcc extensions: 1. Variadic macros (these are present in C99, but we do not rely on that). 2. Gcc treats the "##" paste operator specially between a comma and __VA_ARGS__, which lets our variadic macro work even if no format parameters are passed to error(). Since we are using these extra features, we hide the macro behind an #ifdef. This is OK, though, because our goal was just to help gcc. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-12-15 18:37:36 +01:00
#endif
extern void set_die_routine(NORETURN_PTR void (*routine)(const char *err, va_list params));
extern void set_error_routine(void (*routine)(const char *err, va_list params));
extern void (*get_error_routine(void))(const char *err, va_list params);
extern void set_warn_routine(void (*routine)(const char *warn, va_list params));
extern void (*get_warn_routine(void))(const char *warn, va_list params);
extern void set_die_is_recursing_routine(int (*routine)(void));
extern int starts_with(const char *str, const char *prefix);
extern int istarts_with(const char *str, const char *prefix);
/*
* If the string "str" begins with the string found in "prefix", return 1.
* The "out" parameter is set to "str + strlen(prefix)" (i.e., to the point in
* the string right after the prefix).
*
* Otherwise, return 0 and leave "out" untouched.
*
* Examples:
*
* [extract branch name, fail if not a branch]
* if (!skip_prefix(ref, "refs/heads/", &branch)
* return -1;
*
* [skip prefix if present, otherwise use whole string]
* skip_prefix(name, "refs/heads/", &name);
*/
static inline int skip_prefix(const char *str, const char *prefix,
const char **out)
{
do {
if (!*prefix) {
*out = str;
return 1;
}
} while (*str++ == *prefix++);
return 0;
}
/*
* If the string "str" is the same as the string in "prefix", then the "arg"
* parameter is set to the "def" parameter and 1 is returned.
* If the string "str" begins with the string found in "prefix" and then a
* "=" sign, then the "arg" parameter is set to "str + strlen(prefix) + 1"
* (i.e., to the point in the string right after the prefix and the "=" sign),
* and 1 is returned.
*
* Otherwise, return 0 and leave "arg" untouched.
*
* When we accept both a "--key" and a "--key=<val>" option, this function
* can be used instead of !strcmp(arg, "--key") and then
* skip_prefix(arg, "--key=", &arg) to parse such an option.
*/
int skip_to_optional_arg_default(const char *str, const char *prefix,
const char **arg, const char *def);
static inline int skip_to_optional_arg(const char *str, const char *prefix,
const char **arg)
{
return skip_to_optional_arg_default(str, prefix, arg, "");
}
/*
* Like skip_prefix, but promises never to read past "len" bytes of the input
* buffer, and returns the remaining number of bytes in "out" via "outlen".
*/
static inline int skip_prefix_mem(const char *buf, size_t len,
const char *prefix,
const char **out, size_t *outlen)
{
size_t prefix_len = strlen(prefix);
if (prefix_len <= len && !memcmp(buf, prefix, prefix_len)) {
*out = buf + prefix_len;
*outlen = len - prefix_len;
return 1;
}
return 0;
}
/*
* If buf ends with suffix, return 1 and subtract the length of the suffix
* from *len. Otherwise, return 0 and leave *len untouched.
*/
static inline int strip_suffix_mem(const char *buf, size_t *len,
const char *suffix)
{
size_t suflen = strlen(suffix);
if (*len < suflen || memcmp(buf + (*len - suflen), suffix, suflen))
return 0;
*len -= suflen;
return 1;
}
/*
* If str ends with suffix, return 1 and set *len to the size of the string
* without the suffix. Otherwise, return 0 and set *len to the size of the
* string.
*
* Note that we do _not_ NUL-terminate str to the new length.
*/
static inline int strip_suffix(const char *str, const char *suffix, size_t *len)
{
*len = strlen(str);
return strip_suffix_mem(str, len, suffix);
}
static inline int ends_with(const char *str, const char *suffix)
{
size_t len;
return strip_suffix(str, suffix, &len);
}
#define SWAP(a, b) do { \
void *_swap_a_ptr = &(a); \
void *_swap_b_ptr = &(b); \
unsigned char _swap_buffer[sizeof(a)]; \
memcpy(_swap_buffer, _swap_a_ptr, sizeof(a)); \
memcpy(_swap_a_ptr, _swap_b_ptr, sizeof(a) + \
BUILD_ASSERT_OR_ZERO(sizeof(a) == sizeof(b))); \
memcpy(_swap_b_ptr, _swap_buffer, sizeof(a)); \
} while (0)
#if defined(NO_MMAP) || defined(USE_WIN32_MMAP)
#ifndef PROT_READ
#define PROT_READ 1
#define PROT_WRITE 2
#define MAP_PRIVATE 1
#endif
#define mmap git_mmap
#define munmap git_munmap
extern void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
extern int git_munmap(void *start, size_t length);
#else /* NO_MMAP || USE_WIN32_MMAP */
#include <sys/mman.h>
#endif /* NO_MMAP || USE_WIN32_MMAP */
#ifdef NO_MMAP
/* This value must be multiple of (pagesize * 2) */
#define DEFAULT_PACKED_GIT_WINDOW_SIZE (1 * 1024 * 1024)
#else /* NO_MMAP */
/* This value must be multiple of (pagesize * 2) */
#define DEFAULT_PACKED_GIT_WINDOW_SIZE \
(sizeof(void*) >= 8 \
? 1 * 1024 * 1024 * 1024 \
: 32 * 1024 * 1024)
#endif /* NO_MMAP */
#ifndef MAP_FAILED
#define MAP_FAILED ((void *)-1)
#endif
#ifdef NO_ST_BLOCKS_IN_STRUCT_STAT
#define on_disk_bytes(st) ((st).st_size)
#else
#define on_disk_bytes(st) ((st).st_blocks * 512)
#endif
#ifdef NEEDS_MODE_TRANSLATION
#undef S_IFMT
#undef S_IFREG
#undef S_IFDIR
#undef S_IFLNK
#undef S_IFBLK
#undef S_IFCHR
#undef S_IFIFO
#undef S_IFSOCK
#define S_IFMT 0170000
#define S_IFREG 0100000
#define S_IFDIR 0040000
#define S_IFLNK 0120000
#define S_IFBLK 0060000
#define S_IFCHR 0020000
#define S_IFIFO 0010000
#define S_IFSOCK 0140000
#ifdef stat
#undef stat
#endif
#define stat(path, buf) git_stat(path, buf)
extern int git_stat(const char *, struct stat *);
#ifdef fstat
#undef fstat
#endif
#define fstat(fd, buf) git_fstat(fd, buf)
extern int git_fstat(int, struct stat *);
#ifdef lstat
#undef lstat
#endif
#define lstat(path, buf) git_lstat(path, buf)
extern int git_lstat(const char *, struct stat *);
#endif
#define DEFAULT_PACKED_GIT_LIMIT \
((1024L * 1024L) * (size_t)(sizeof(void*) >= 8 ? (32 * 1024L * 1024L) : 256))
#ifdef NO_PREAD
#define pread git_pread
extern ssize_t git_pread(int fd, void *buf, size_t count, off_t offset);
#endif
/*
* Forward decl that will remind us if its twin in cache.h changes.
* This function is used in compat/pread.c. But we can't include
* cache.h there.
*/
extern ssize_t read_in_full(int fd, void *buf, size_t count);
#ifdef NO_SETENV
#define setenv gitsetenv
extern int gitsetenv(const char *, const char *, int);
#endif
#ifdef NO_MKDTEMP
#define mkdtemp gitmkdtemp
extern char *gitmkdtemp(char *);
#endif
#ifdef NO_UNSETENV
#define unsetenv gitunsetenv
extern void gitunsetenv(const char *);
#endif
#ifdef NO_STRCASESTR
#define strcasestr gitstrcasestr
extern char *gitstrcasestr(const char *haystack, const char *needle);
#endif
#ifdef NO_STRLCPY
#define strlcpy gitstrlcpy
extern size_t gitstrlcpy(char *, const char *, size_t);
#endif
#ifdef NO_STRTOUMAX
#define strtoumax gitstrtoumax
extern uintmax_t gitstrtoumax(const char *, char **, int);
#define strtoimax gitstrtoimax
extern intmax_t gitstrtoimax(const char *, char **, int);
#endif
#ifdef NO_HSTRERROR
#define hstrerror githstrerror
extern const char *githstrerror(int herror);
#endif
#ifdef NO_MEMMEM
#define memmem gitmemmem
void *gitmemmem(const void *haystack, size_t haystacklen,
const void *needle, size_t needlelen);
#endif
#ifdef OVERRIDE_STRDUP
#ifdef strdup
#undef strdup
#endif
#define strdup gitstrdup
char *gitstrdup(const char *s);
#endif
#ifdef NO_GETPAGESIZE
#define getpagesize() sysconf(_SC_PAGESIZE)
#endif
2016-08-22 14:47:55 +02:00
#ifndef O_CLOEXEC
#define O_CLOEXEC 0
#endif
#ifdef FREAD_READS_DIRECTORIES
# if !defined(SUPPRESS_FOPEN_REDEFINITION)
# ifdef fopen
# undef fopen
# endif
# define fopen(a,b) git_fopen(a,b)
# endif
extern FILE *git_fopen(const char*, const char*);
#endif
#ifdef SNPRINTF_RETURNS_BOGUS
#ifdef snprintf
#undef snprintf
#endif
#define snprintf git_snprintf
extern int git_snprintf(char *str, size_t maxsize,
const char *format, ...);
#ifdef vsnprintf
#undef vsnprintf
#endif
#define vsnprintf git_vsnprintf
extern int git_vsnprintf(char *str, size_t maxsize,
const char *format, va_list ap);
#endif
#ifdef __GLIBC_PREREQ
#if __GLIBC_PREREQ(2, 1)
#define HAVE_STRCHRNUL
#endif
#endif
#ifndef HAVE_STRCHRNUL
#define strchrnul gitstrchrnul
static inline char *gitstrchrnul(const char *s, int c)
{
while (*s && *s != c)
s++;
return (char *)s;
}
#endif
#ifdef NO_INET_PTON
int inet_pton(int af, const char *src, void *dst);
#endif
#ifdef NO_INET_NTOP
const char *inet_ntop(int af, const void *src, char *dst, size_t size);
#endif
#ifdef NO_PTHREADS
#define atexit git_atexit
extern int git_atexit(void (*handler)(void));
#endif
typedef void (*try_to_free_t)(size_t);
extern try_to_free_t set_try_to_free_routine(try_to_free_t);
static inline size_t st_add(size_t a, size_t b)
{
if (unsigned_add_overflows(a, b))
die("size_t overflow: %"PRIuMAX" + %"PRIuMAX,
(uintmax_t)a, (uintmax_t)b);
return a + b;
}
#define st_add3(a,b,c) st_add(st_add((a),(b)),(c))
#define st_add4(a,b,c,d) st_add(st_add3((a),(b),(c)),(d))
static inline size_t st_mult(size_t a, size_t b)
{
if (unsigned_mult_overflows(a, b))
die("size_t overflow: %"PRIuMAX" * %"PRIuMAX,
(uintmax_t)a, (uintmax_t)b);
return a * b;
}
static inline size_t st_sub(size_t a, size_t b)
{
if (a < b)
die("size_t underflow: %"PRIuMAX" - %"PRIuMAX,
(uintmax_t)a, (uintmax_t)b);
return a - b;
}
Portable alloca for Git In the next patch we'll have to use alloca() for performance reasons, but since alloca is non-standardized and is not portable, let's have a trick with compatibility wrappers: 1. at configure time, determine, do we have working alloca() through alloca.h, and define #define HAVE_ALLOCA_H if yes. 2. in code #ifdef HAVE_ALLOCA_H # include <alloca.h> # define xalloca(size) (alloca(size)) # define xalloca_free(p) do {} while(0) #else # define xalloca(size) (xmalloc(size)) # define xalloca_free(p) (free(p)) #endif and use it like func() { p = xalloca(size); ... xalloca_free(p); } This way, for systems, where alloca is available, we'll have optimal on-stack allocations with fast executions. On the other hand, on systems, where alloca is not available, this gracefully fallbacks to xmalloc/free. Both autoconf and config.mak.uname configurations were updated. For autoconf, we are not bothering considering cases, when no alloca.h is available, but alloca() works some other way - its simply alloca.h is available and works or not, everything else is deep legacy. For config.mak.uname, I've tried to make my almost-sure guess for where alloca() is available, but since I only have access to Linux it is the only change I can be sure about myself, with relevant to other changed systems people Cc'ed. NOTE SunOS and Windows had explicit -DHAVE_ALLOCA_H in their configurations. I've changed that to now-common HAVE_ALLOCA_H=YesPlease which should be correct. Cc: Brandon Casey <drafnel@gmail.com> Cc: Marius Storm-Olsen <mstormo@gmail.com> Cc: Johannes Sixt <j6t@kdbg.org> Cc: Johannes Schindelin <Johannes.Schindelin@gmx.de> Cc: Ramsay Jones <ramsay@ramsay1.demon.co.uk> Cc: Gerrit Pape <pape@smarden.org> Cc: Petr Salinger <Petr.Salinger@seznam.cz> Cc: Jonathan Nieder <jrnieder@gmail.com> Acked-by: Thomas Schwinge <thomas@codesourcery.com> (GNU Hurd changes) Signed-off-by: Kirill Smelkov <kirr@mns.spb.ru> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-03-27 15:22:50 +01:00
#ifdef HAVE_ALLOCA_H
# include <alloca.h>
# define xalloca(size) (alloca(size))
# define xalloca_free(p) do {} while (0)
#else
# define xalloca(size) (xmalloc(size))
# define xalloca_free(p) (free(p))
#endif
Shrink the git binary a bit by avoiding unnecessary inline functions So I was looking at the disgusting size of the git binary, and even with the debugging removed, and using -Os instead of -O2, the size of the text section was pretty high. In this day and age I guess almost a megabyte of text isn't really all that surprising, but it still doesn't exactly make me think "lean and mean". With -Os, a surprising amount of text space is wasted on inline functions that end up just being replicated multiple times, and where performance really isn't a valid reason to inline them. In particular, the trivial wrapper functions like "xmalloc()" are used _everywhere_, and making them inline just duplicates the text (and the string we use to 'die()' on failure) unnecessarily. So this just moves them into a "wrapper.c" file, getting rid of a tiny bit of unnecessary bloat. The following numbers are both with "CFLAGS=-Os": Before: [torvalds@woody git]$ size git text data bss dec hex filename 700460 15160 292184 1007804 f60bc git After: [torvalds@woody git]$ size git text data bss dec hex filename 670540 15160 292184 977884 eebdc git so it saves almost 30k of text-space (it actually saves more than that with the default -O2, but I don't think that's necessarily a very relevant number from a "try to shrink git" standpoint). It might conceivably have a performance impact, but none of this should be _that_ performance critical. The real cost is not generally in the wrapper anyway, but in the code it wraps (ie the cost of "xread()" is all in the read itself, not in the trivial wrapping of it). Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
extern char *xstrdup(const char *str);
extern void *xmalloc(size_t size);
extern void *xmallocz(size_t size);
extern void *xmallocz_gently(size_t size);
Shrink the git binary a bit by avoiding unnecessary inline functions So I was looking at the disgusting size of the git binary, and even with the debugging removed, and using -Os instead of -O2, the size of the text section was pretty high. In this day and age I guess almost a megabyte of text isn't really all that surprising, but it still doesn't exactly make me think "lean and mean". With -Os, a surprising amount of text space is wasted on inline functions that end up just being replicated multiple times, and where performance really isn't a valid reason to inline them. In particular, the trivial wrapper functions like "xmalloc()" are used _everywhere_, and making them inline just duplicates the text (and the string we use to 'die()' on failure) unnecessarily. So this just moves them into a "wrapper.c" file, getting rid of a tiny bit of unnecessary bloat. The following numbers are both with "CFLAGS=-Os": Before: [torvalds@woody git]$ size git text data bss dec hex filename 700460 15160 292184 1007804 f60bc git After: [torvalds@woody git]$ size git text data bss dec hex filename 670540 15160 292184 977884 eebdc git so it saves almost 30k of text-space (it actually saves more than that with the default -O2, but I don't think that's necessarily a very relevant number from a "try to shrink git" standpoint). It might conceivably have a performance impact, but none of this should be _that_ performance critical. The real cost is not generally in the wrapper anyway, but in the code it wraps (ie the cost of "xread()" is all in the read itself, not in the trivial wrapping of it). Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
extern void *xmemdupz(const void *data, size_t len);
extern char *xstrndup(const char *str, size_t len);
extern void *xrealloc(void *ptr, size_t size);
extern void *xcalloc(size_t nmemb, size_t size);
extern void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
extern void *xmmap_gently(void *start, size_t length, int prot, int flags, int fd, off_t offset);
extern int xopen(const char *path, int flags, ...);
Shrink the git binary a bit by avoiding unnecessary inline functions So I was looking at the disgusting size of the git binary, and even with the debugging removed, and using -Os instead of -O2, the size of the text section was pretty high. In this day and age I guess almost a megabyte of text isn't really all that surprising, but it still doesn't exactly make me think "lean and mean". With -Os, a surprising amount of text space is wasted on inline functions that end up just being replicated multiple times, and where performance really isn't a valid reason to inline them. In particular, the trivial wrapper functions like "xmalloc()" are used _everywhere_, and making them inline just duplicates the text (and the string we use to 'die()' on failure) unnecessarily. So this just moves them into a "wrapper.c" file, getting rid of a tiny bit of unnecessary bloat. The following numbers are both with "CFLAGS=-Os": Before: [torvalds@woody git]$ size git text data bss dec hex filename 700460 15160 292184 1007804 f60bc git After: [torvalds@woody git]$ size git text data bss dec hex filename 670540 15160 292184 977884 eebdc git so it saves almost 30k of text-space (it actually saves more than that with the default -O2, but I don't think that's necessarily a very relevant number from a "try to shrink git" standpoint). It might conceivably have a performance impact, but none of this should be _that_ performance critical. The real cost is not generally in the wrapper anyway, but in the code it wraps (ie the cost of "xread()" is all in the read itself, not in the trivial wrapping of it). Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
extern ssize_t xread(int fd, void *buf, size_t len);
extern ssize_t xwrite(int fd, const void *buf, size_t len);
extern ssize_t xpread(int fd, void *buf, size_t len, off_t offset);
Shrink the git binary a bit by avoiding unnecessary inline functions So I was looking at the disgusting size of the git binary, and even with the debugging removed, and using -Os instead of -O2, the size of the text section was pretty high. In this day and age I guess almost a megabyte of text isn't really all that surprising, but it still doesn't exactly make me think "lean and mean". With -Os, a surprising amount of text space is wasted on inline functions that end up just being replicated multiple times, and where performance really isn't a valid reason to inline them. In particular, the trivial wrapper functions like "xmalloc()" are used _everywhere_, and making them inline just duplicates the text (and the string we use to 'die()' on failure) unnecessarily. So this just moves them into a "wrapper.c" file, getting rid of a tiny bit of unnecessary bloat. The following numbers are both with "CFLAGS=-Os": Before: [torvalds@woody git]$ size git text data bss dec hex filename 700460 15160 292184 1007804 f60bc git After: [torvalds@woody git]$ size git text data bss dec hex filename 670540 15160 292184 977884 eebdc git so it saves almost 30k of text-space (it actually saves more than that with the default -O2, but I don't think that's necessarily a very relevant number from a "try to shrink git" standpoint). It might conceivably have a performance impact, but none of this should be _that_ performance critical. The real cost is not generally in the wrapper anyway, but in the code it wraps (ie the cost of "xread()" is all in the read itself, not in the trivial wrapping of it). Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
extern int xdup(int fd);
extern FILE *xfopen(const char *path, const char *mode);
Shrink the git binary a bit by avoiding unnecessary inline functions So I was looking at the disgusting size of the git binary, and even with the debugging removed, and using -Os instead of -O2, the size of the text section was pretty high. In this day and age I guess almost a megabyte of text isn't really all that surprising, but it still doesn't exactly make me think "lean and mean". With -Os, a surprising amount of text space is wasted on inline functions that end up just being replicated multiple times, and where performance really isn't a valid reason to inline them. In particular, the trivial wrapper functions like "xmalloc()" are used _everywhere_, and making them inline just duplicates the text (and the string we use to 'die()' on failure) unnecessarily. So this just moves them into a "wrapper.c" file, getting rid of a tiny bit of unnecessary bloat. The following numbers are both with "CFLAGS=-Os": Before: [torvalds@woody git]$ size git text data bss dec hex filename 700460 15160 292184 1007804 f60bc git After: [torvalds@woody git]$ size git text data bss dec hex filename 670540 15160 292184 977884 eebdc git so it saves almost 30k of text-space (it actually saves more than that with the default -O2, but I don't think that's necessarily a very relevant number from a "try to shrink git" standpoint). It might conceivably have a performance impact, but none of this should be _that_ performance critical. The real cost is not generally in the wrapper anyway, but in the code it wraps (ie the cost of "xread()" is all in the read itself, not in the trivial wrapping of it). Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-22 21:19:25 +02:00
extern FILE *xfdopen(int fd, const char *mode);
extern int xmkstemp(char *temp_filename);
extern int xmkstemp_mode(char *temp_filename, int mode);
extern char *xgetcwd(void);
extern FILE *fopen_for_writing(const char *path);
extern FILE *fopen_or_warn(const char *path, const char *mode);
/*
* FREE_AND_NULL(ptr) is like free(ptr) followed by ptr = NULL. Note
* that ptr is used twice, so don't pass e.g. ptr++.
*/
#define FREE_AND_NULL(p) do { free(p); (p) = NULL; } while (0)
#define ALLOC_ARRAY(x, alloc) (x) = xmalloc(st_mult(sizeof(*(x)), (alloc)))
pack-objects: zero-initialize tree_depth/layer arrays Commit 108f530385 (pack-objects: move tree_depth into 'struct packing_data', 2018-08-16) started maintaining a tree_depth array that matches the "objects" array. We extend the array when: 1. The objects array is extended, in which case we use realloc to extend the tree_depth array. 2. A caller asks to store a tree_depth for object N, and this is the first such request; we create the array from scratch and store the value for N. In the latter case, though, we use regular xmalloc(), and the depth values for any objects besides N is undefined. This happens to not trigger a bug with the current code, but the reasons are quite subtle: - we never ask about the depth for any object with index i < N. This is because we store the depth immediately for all trees and blobs. So any such "i" must be a non-tree, and therefore we will never need to care about its depth (in fact, we really only care about the depth of trees). - there are no objects at this point with index i > N, because we always fill in the depth for a tree immediately after its object entry is created (we may still allocate uninitialized depth entries, but they'll be initialized by packlist_alloc() when it initializes the entry in the "objects" array). So it works, but only by chance. To be defensive, let's zero the array, which matches the "unset" values which would be handed out by oe_tree_depth() already. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-11-20 10:48:57 +01:00
#define CALLOC_ARRAY(x, alloc) (x) = xcalloc((alloc), sizeof(*(x)));
#define REALLOC_ARRAY(x, alloc) (x) = xrealloc((x), st_mult(sizeof(*(x)), (alloc)))
#define COPY_ARRAY(dst, src, n) copy_array((dst), (src), (n), sizeof(*(dst)) + \
BUILD_ASSERT_OR_ZERO(sizeof(*(dst)) == sizeof(*(src))))
static inline void copy_array(void *dst, const void *src, size_t n, size_t size)
{
if (n)
memcpy(dst, src, st_mult(size, n));
}
#define MOVE_ARRAY(dst, src, n) move_array((dst), (src), (n), sizeof(*(dst)) + \
BUILD_ASSERT_OR_ZERO(sizeof(*(dst)) == sizeof(*(src))))
static inline void move_array(void *dst, const void *src, size_t n, size_t size)
{
if (n)
memmove(dst, src, st_mult(size, n));
}
/*
* These functions help you allocate structs with flex arrays, and copy
* the data directly into the array. For example, if you had:
*
* struct foo {
* int bar;
* char name[FLEX_ARRAY];
* };
*
* you can do:
*
* struct foo *f;
* FLEX_ALLOC_MEM(f, name, src, len);
*
* to allocate a "foo" with the contents of "src" in the "name" field.
* The resulting struct is automatically zero'd, and the flex-array field
* is NUL-terminated (whether the incoming src buffer was or not).
*
* The FLEXPTR_* variants operate on structs that don't use flex-arrays,
* but do want to store a pointer to some extra data in the same allocated
* block. For example, if you have:
*
* struct foo {
* char *name;
* int bar;
* };
*
* you can do:
*
* struct foo *f;
* FLEXPTR_ALLOC_STR(f, name, src);
*
* and "name" will point to a block of memory after the struct, which will be
* freed along with the struct (but the pointer can be repointed anywhere).
*
* The *_STR variants accept a string parameter rather than a ptr/len
* combination.
*
* Note that these macros will evaluate the first parameter multiple
* times, and it must be assignable as an lvalue.
*/
#define FLEX_ALLOC_MEM(x, flexname, buf, len) do { \
size_t flex_array_len_ = (len); \
(x) = xcalloc(1, st_add3(sizeof(*(x)), flex_array_len_, 1)); \
memcpy((void *)(x)->flexname, (buf), flex_array_len_); \
} while (0)
#define FLEXPTR_ALLOC_MEM(x, ptrname, buf, len) do { \
size_t flex_array_len_ = (len); \
(x) = xcalloc(1, st_add3(sizeof(*(x)), flex_array_len_, 1)); \
memcpy((x) + 1, (buf), flex_array_len_); \
(x)->ptrname = (void *)((x)+1); \
} while(0)
#define FLEX_ALLOC_STR(x, flexname, str) \
FLEX_ALLOC_MEM((x), flexname, (str), strlen(str))
#define FLEXPTR_ALLOC_STR(x, ptrname, str) \
FLEXPTR_ALLOC_MEM((x), ptrname, (str), strlen(str))
static inline char *xstrdup_or_null(const char *str)
{
return str ? xstrdup(str) : NULL;
}
static inline size_t xsize_t(off_t len)
{
size_t size = (size_t) len;
if (len != (off_t) size)
die("Cannot handle files this big");
return size;
}
__attribute__((format (printf, 3, 4)))
extern int xsnprintf(char *dst, size_t max, const char *fmt, ...);
#ifndef HOST_NAME_MAX
#define HOST_NAME_MAX 256
#endif
extern int xgethostname(char *buf, size_t len);
/* in ctype.c, for kwset users */
extern const unsigned char tolower_trans_tbl[256];
/* Sane ctype - no locale, and works with signed chars */
#undef isascii
#undef isspace
#undef isdigit
#undef isalpha
#undef isalnum
#undef isprint
#undef islower
#undef isupper
#undef tolower
#undef toupper
#undef iscntrl
#undef ispunct
#undef isxdigit
extern const unsigned char sane_ctype[256];
#define GIT_SPACE 0x01
#define GIT_DIGIT 0x02
#define GIT_ALPHA 0x04
#define GIT_GLOB_SPECIAL 0x08
#define GIT_REGEX_SPECIAL 0x10
#define GIT_PATHSPEC_MAGIC 0x20
#define GIT_CNTRL 0x40
#define GIT_PUNCT 0x80
#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
#define isascii(x) (((x) & ~0x7f) == 0)
#define isspace(x) sane_istest(x,GIT_SPACE)
#define isdigit(x) sane_istest(x,GIT_DIGIT)
#define isalpha(x) sane_istest(x,GIT_ALPHA)
#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
#define isprint(x) ((x) >= 0x20 && (x) <= 0x7e)
#define islower(x) sane_iscase(x, 1)
#define isupper(x) sane_iscase(x, 0)
#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
#define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL)
#define iscntrl(x) (sane_istest(x,GIT_CNTRL))
#define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \
GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC)
#define isxdigit(x) (hexval_table[(unsigned char)(x)] != -1)
#define tolower(x) sane_case((unsigned char)(x), 0x20)
#define toupper(x) sane_case((unsigned char)(x), 0)
#define is_pathspec_magic(x) sane_istest(x,GIT_PATHSPEC_MAGIC)
static inline int sane_case(int x, int high)
{
if (sane_istest(x, GIT_ALPHA))
x = (x & ~0x20) | high;
return x;
}
static inline int sane_iscase(int x, int is_lower)
{
if (!sane_istest(x, GIT_ALPHA))
return 0;
if (is_lower)
return (x & 0x20) != 0;
else
return (x & 0x20) == 0;
}
/*
* Like skip_prefix, but compare case-insensitively. Note that the comparison
* is done via tolower(), so it is strictly ASCII (no multi-byte characters or
* locale-specific conversions).
*/
static inline int skip_iprefix(const char *str, const char *prefix,
const char **out)
{
do {
if (!*prefix) {
*out = str;
return 1;
}
} while (tolower(*str++) == tolower(*prefix++));
return 0;
}
static inline int strtoul_ui(char const *s, int base, unsigned int *result)
{
unsigned long ul;
char *p;
errno = 0;
/* negative values would be accepted by strtoul */
if (strchr(s, '-'))
return -1;
ul = strtoul(s, &p, base);
if (errno || *p || p == s || (unsigned int) ul != ul)
return -1;
*result = ul;
return 0;
}
static inline int strtol_i(char const *s, int base, int *result)
{
long ul;
char *p;
errno = 0;
ul = strtol(s, &p, base);
if (errno || *p || p == s || (int) ul != ul)
return -1;
*result = ul;
return 0;
}
#ifdef INTERNAL_QSORT
void git_qsort(void *base, size_t nmemb, size_t size,
int(*compar)(const void *, const void *));
#define qsort git_qsort
#endif
#define QSORT(base, n, compar) sane_qsort((base), (n), sizeof(*(base)), compar)
static inline void sane_qsort(void *base, size_t nmemb, size_t size,
int(*compar)(const void *, const void *))
{
if (nmemb > 1)
qsort(base, nmemb, size, compar);
}
#ifndef HAVE_ISO_QSORT_S
int git_qsort_s(void *base, size_t nmemb, size_t size,
int (*compar)(const void *, const void *, void *), void *ctx);
#define qsort_s git_qsort_s
#endif
#define QSORT_S(base, n, compar, ctx) do { \
if (qsort_s((base), (n), sizeof(*(base)), compar, ctx)) \
BUG("qsort_s() failed"); \
} while (0)
#ifndef REG_STARTEND
#error "Git requires REG_STARTEND support. Compile with NO_REGEX=NeedsStartEnd"
#endif
static inline int regexec_buf(const regex_t *preg, const char *buf, size_t size,
size_t nmatch, regmatch_t pmatch[], int eflags)
{
assert(nmatch > 0 && pmatch);
pmatch[0].rm_so = 0;
pmatch[0].rm_eo = size;
return regexec(preg, buf, nmatch, pmatch, eflags | REG_STARTEND);
}
#ifndef DIR_HAS_BSD_GROUP_SEMANTICS
# define FORCE_DIR_SET_GID S_ISGID
#else
# define FORCE_DIR_SET_GID 0
#endif
#ifdef NO_NSEC
#undef USE_NSEC
#define ST_CTIME_NSEC(st) 0
#define ST_MTIME_NSEC(st) 0
#else
#ifdef USE_ST_TIMESPEC
#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctimespec.tv_nsec))
#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtimespec.tv_nsec))
#else
#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctim.tv_nsec))
#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtim.tv_nsec))
#endif
#endif
#ifdef UNRELIABLE_FSTAT
#define fstat_is_reliable() 0
#else
#define fstat_is_reliable() 1
#endif
#ifndef va_copy
/*
* Since an obvious implementation of va_list would be to make it a
* pointer into the stack frame, a simple assignment will work on
* many systems. But let's try to be more portable.
*/
#ifdef __va_copy
#define va_copy(dst, src) __va_copy(dst, src)
#else
#define va_copy(dst, src) ((dst) = (src))
#endif
#endif
#if defined(__GNUC__) || (_MSC_VER >= 1400) || defined(__C99_MACRO_WITH_VA_ARGS)
#define HAVE_VARIADIC_MACROS 1
#endif
/* usage.c: only to be used for testing BUG() implementation (see test-tool) */
extern int BUG_exit_code;
usage.c: add BUG() function There's a convention in Git's code base to write assertions as: if (...some_bad_thing...) die("BUG: the terrible thing happened"); with the idea that users should never see a "BUG:" message (but if they, it at least gives a clue what happened). We use die() here because it's convenient, but there are a few draw-backs: 1. Without parsing the messages, it's hard for callers to distinguish BUG assertions from regular errors. For instance, it would be nice if the test suite could check that we don't hit any assertions, but test_must_fail will pass BUG deaths as OK. 2. It would be useful to add more debugging features to BUG assertions, like file/line numbers or dumping core. 3. The die() handler can be replaced, and might not actually exit the whole program (e.g., it may just pthread_exit()). This is convenient for normal errors, but for an assertion failure (which is supposed to never happen), we're probably better off taking down the whole process as quickly and cleanly as possible. We could address these by checking in die() whether the error message starts with "BUG", and behaving appropriately. But there's little advantage at that point to sharing the die() code, and only downsides (e.g., we can't change the BUG() interface independently). Moreover, converting all of the existing BUG calls reveals that the test suite does indeed trigger a few of them. Instead, this patch introduces a new BUG() function, which prints an error before dying via SIGABRT. This gives us test suite checking and core dumps. The function is actually a macro (when supported) so that we can show the file/line number. We can convert die("BUG") invocations to BUG() in further patches, dealing with any test fallouts individually. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-13 05:28:50 +02:00
#ifdef HAVE_VARIADIC_MACROS
__attribute__((format (printf, 3, 4))) NORETURN
void BUG_fl(const char *file, int line, const char *fmt, ...);
#define BUG(...) BUG_fl(__FILE__, __LINE__, __VA_ARGS__)
#else
__attribute__((format (printf, 1, 2))) NORETURN
void BUG(const char *fmt, ...);
#endif
/*
* Preserves errno, prints a message, but gives no warning for ENOENT.
* Returns 0 on success, which includes trying to unlink an object that does
* not exist.
*/
int unlink_or_warn(const char *path);
/*
* Tries to unlink file. Returns 0 if unlink succeeded
* or the file already didn't exist. Returns -1 and
* appends a message to err suitable for
* 'error("%s", err->buf)' on error.
*/
int unlink_or_msg(const char *file, struct strbuf *err);
/*
* Preserves errno, prints a message, but gives no warning for ENOENT.
* Returns 0 on success, which includes trying to remove a directory that does
* not exist.
*/
int rmdir_or_warn(const char *path);
/*
* Calls the correct function out of {unlink,rmdir}_or_warn based on
* the supplied file mode.
*/
int remove_or_warn(unsigned int mode, const char *path);
/*
* Call access(2), but warn for any error except "missing file"
* (ENOENT or ENOTDIR).
*/
config: allow inaccessible configuration under $HOME The changes v1.7.12.1~2^2~4 (config: warn on inaccessible files, 2012-08-21) and v1.8.1.1~22^2~2 (config: treat user and xdg config permission problems as errors, 2012-10-13) were intended to prevent important configuration (think "[transfer] fsckobjects") from being ignored when the configuration is unintentionally unreadable (for example with EIO on a flaky filesystem, or with ENOMEM due to a DoS attack). Usually ~/.gitconfig and ~/.config/git are readable by the current user, and if they aren't then it would be easy to fix those permissions, so the damage from adding this check should have been minimal. Unfortunately the access() check often trips when git is being run as a server. A daemon (such as inetd or git-daemon) starts as "root", creates a listening socket, and then drops privileges, meaning that when git commands are invoked they cannot access $HOME and die with fatal: unable to access '/root/.config/git/config': Permission denied Any patch to fix this would have one of three problems: 1. We annoy sysadmins who need to take an extra step to handle HOME when dropping privileges (the current behavior, or any other proposal that they have to opt into). 2. We annoy sysadmins who want to set HOME when dropping privileges, either by making what they want to do impossible, or making them set an extra variable or option to accomplish what used to work (e.g., a patch to git-daemon to set HOME when --user is passed). 3. We loosen the check, so some cases which might be noteworthy are not caught. This patch is of type (3). Treat user and xdg configuration that are inaccessible due to permissions (EACCES) as though no user configuration was provided at all. An alternative method would be to check if $HOME is readable, but that would not help in cases where the user who dropped privileges had a globally readable HOME with only .config or .gitconfig being private. This does not change the behavior when /etc/gitconfig or .git/config is unreadable (since those are more serious configuration errors), nor when ~/.gitconfig or ~/.config/git is unreadable due to problems other than permissions. Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Improved-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-12 23:03:18 +02:00
#define ACCESS_EACCES_OK (1U << 0)
int access_or_warn(const char *path, int mode, unsigned flag);
int access_or_die(const char *path, int mode, unsigned flag);
/* Warn on an inaccessible file if errno indicates this is an error */
int warn_on_fopen_errors(const char *path);
#ifdef GMTIME_UNRELIABLE_ERRORS
struct tm *git_gmtime(const time_t *);
struct tm *git_gmtime_r(const time_t *, struct tm *);
#define gmtime git_gmtime
#define gmtime_r git_gmtime_r
#endif
gettext.h: add parentheses around N_ expansion if supported The gettext N_ macro is used to mark strings for translation without actually translating them. At runtime the string is expected to be passed to the gettext API for translation. If two N_ macro invocations appear next to each other with only whitespace (or nothing at all) between them, the two separate strings will be marked for translation, but the preprocessor will then silently combine the strings into one and at runtime the string passed to gettext will not match the strings that were translated so no translation will actually occur. Avoid this by adding parentheses around the expansion of the N_ macro so that instead of ending up with two adjacent strings that are then combined by the preprocessor, two adjacent strings surrounded by parentheses result instead which causes a compile error so the mistake can be quickly found and corrected. However, since these string literals are typically assigned to static variables and not all compilers support parenthesized string literal assignments, allow this to be controlled by the Makefile with the default only enabled when the compiler is known to support the syntax. For now only __GNUC__ enables this by default which covers both gcc and clang which should result in early detection of any adjacent N_ macros. Although the necessary tests make the affected files a bit less elegant, the benefit of avoiding propagation of a translation- marking error to all the translation teams thus creating extra work for them when the error is eventually detected and fixed would seem to outweigh the minor inelegance the additional configuration tests introduce. Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Kyle J. McKay <mackyle@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-01-11 21:09:22 +01:00
#if !defined(USE_PARENS_AROUND_GETTEXT_N) && defined(__GNUC__)
#define USE_PARENS_AROUND_GETTEXT_N 1
#endif
#ifndef SHELL_PATH
# define SHELL_PATH "/bin/sh"
#endif
#ifndef _POSIX_THREAD_SAFE_FUNCTIONS
#define flockfile(fh)
#define funlockfile(fh)
#define getc_unlocked(fh) getc(fh)
#endif
/*
* Our code often opens a path to an optional file, to work on its
* contents when we can successfully open it. We can ignore a failure
* to open if such an optional file does not exist, but we do want to
* report a failure in opening for other reasons (e.g. we got an I/O
* error, or the file is there, but we lack the permission to open).
*
* Call this function after seeing an error from open() or fopen() to
* see if the errno indicates a missing file that we can safely ignore.
*/
static inline int is_missing_file_error(int errno_)
{
return (errno_ == ENOENT || errno_ == ENOTDIR);
}
add an extra level of indirection to main() There are certain startup tasks that we expect every git process to do. In some cases this is just to improve the quality of the program (e.g., setting up gettext()). In others it is a requirement for using certain functions in libgit.a (e.g., system_path() expects that you have called git_extract_argv0_path()). Most commands are builtins and are covered by the git.c version of main(). However, there are still a few external commands that use their own main(). Each of these has to remember to include the correct startup sequence, and we are not always consistent. Rather than just fix the inconsistencies, let's make this harder to get wrong by providing a common main() that can run this standard startup. We basically have two options to do this: - the compat/mingw.h file already does something like this by adding a #define that replaces the definition of main with a wrapper that calls mingw_startup(). The upside is that the code in each program doesn't need to be changed at all; it's rewritten on the fly by the preprocessor. The downside is that it may make debugging of the startup sequence a bit more confusing, as the preprocessor is quietly inserting new code. - the builtin functions are all of the form cmd_foo(), and git.c's main() calls them. This is much more explicit, which may make things more obvious to somebody reading the code. It's also more flexible (because of course we have to figure out _which_ cmd_foo() to call). The downside is that each of the builtins must define cmd_foo(), instead of just main(). This patch chooses the latter option, preferring the more explicit approach, even though it is more invasive. We introduce a new file common-main.c, with the "real" main. It expects to call cmd_main() from whatever other objects it is linked against. We link common-main.o against anything that links against libgit.a, since we know that such programs will need to do this setup. Note that common-main.o can't actually go inside libgit.a, as the linker would not pick up its main() function automatically (it has no callers). The rest of the patch is just adjusting all of the various external programs (mostly in t/helper) to use cmd_main(). I've provided a global declaration for cmd_main(), which means that all of the programs also need to match its signature. In particular, many functions need to switch to "const char **" instead of "char **" for argv. This effect ripples out to a few other variables and functions, as well. This makes the patch even more invasive, but the end result is much better. We should be treating argv strings as const anyway, and now all programs conform to the same signature (which also matches the way builtins are defined). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-01 07:58:58 +02:00
extern int cmd_main(int, const char **);
add UNLEAK annotation for reducing leak false positives It's a common pattern in git commands to allocate some memory that should last for the lifetime of the program and then not bother to free it, relying on the OS to throw it away. This keeps the code simple, and it's fast (we don't waste time traversing structures or calling free at the end of the program). But it also triggers warnings from memory-leak checkers like valgrind or LSAN. They know that the memory was still allocated at program exit, but they don't know _when_ the leaked memory stopped being useful. If it was early in the program, then it's probably a real and important leak. But if it was used right up until program exit, it's not an interesting leak and we'd like to suppress it so that we can see the real leaks. This patch introduces an UNLEAK() macro that lets us do so. To understand its design, let's first look at some of the alternatives. Unfortunately the suppression systems offered by leak-checking tools don't quite do what we want. A leak-checker basically knows two things: 1. Which blocks were allocated via malloc, and the callstack during the allocation. 2. Which blocks were left un-freed at the end of the program (and which are unreachable, but more on that later). Their suppressions work by mentioning the function or callstack of a particular allocation, and marking it as OK to leak. So imagine you have code like this: int cmd_foo(...) { /* this allocates some memory */ char *p = some_function(); printf("%s", p); return 0; } You can say "ignore allocations from some_function(), they're not leaks". But that's not right. That function may be called elsewhere, too, and we would potentially want to know about those leaks. So you can say "ignore the callstack when main calls some_function". That works, but your annotations are brittle. In this case it's only two functions, but you can imagine that the actual allocation is much deeper. If any of the intermediate code changes, you have to update the suppression. What we _really_ want to say is that "the value assigned to p at the end of the function is not a real leak". But leak-checkers can't understand that; they don't know about "p" in the first place. However, we can do something a little bit tricky if we make some assumptions about how leak-checkers work. They generally don't just report all un-freed blocks. That would report even globals which are still accessible when the leak-check is run. Instead they take some set of memory (like BSS) as a root and mark it as "reachable". Then they scan the reachable blocks for anything that looks like a pointer to a malloc'd block, and consider that block reachable. And then they scan those blocks, and so on, transitively marking anything reachable from a global as "not leaked" (or at least leaked in a different category). So we can mark the value of "p" as reachable by putting it into a variable with program lifetime. One way to do that is to just mark "p" as static. But that actually affects the run-time behavior if the function is called twice (you aren't likely to call main() twice, but some of our cmd_*() functions are called from other commands). Instead, we can trick the leak-checker by putting the value into _any_ reachable bytes. This patch keeps a global linked-list of bytes copied from "unleaked" variables. That list is reachable even at program exit, which confers recursive reachability on whatever values we unleak. In other words, you can do: int cmd_foo(...) { char *p = some_function(); printf("%s", p); UNLEAK(p); return 0; } to annotate "p" and suppress the leak report. But wait, couldn't we just say "free(p)"? In this toy example, yes. But UNLEAK()'s byte-copying strategy has several advantages over actually freeing the memory: 1. It's recursive across structures. In many cases our "p" is not just a pointer, but a complex struct whose fields may have been allocated by a sub-function. And in some cases (e.g., dir_struct) we don't even have a function which knows how to free all of the struct members. By marking the struct itself as reachable, that confers reachability on any pointers it contains (including those found in embedded structs, or reachable by walking heap blocks recursively. 2. It works on cases where we're not sure if the value is allocated or not. For example: char *p = argc > 1 ? argv[1] : some_function(); It's safe to use UNLEAK(p) here, because it's not freeing any memory. In the case that we're pointing to argv here, the reachability checker will just ignore our bytes. 3. Likewise, it works even if the variable has _already_ been freed. We're just copying the pointer bytes. If the block has been freed, the leak-checker will skip over those bytes as uninteresting. 4. Because it's not actually freeing memory, you can UNLEAK() before we are finished accessing the variable. This is helpful in cases like this: char *p = some_function(); return another_function(p); Writing this with free() requires: int ret; char *p = some_function(); ret = another_function(p); free(p); return ret; But with unleak we can just write: char *p = some_function(); UNLEAK(p); return another_function(p); This patch adds the UNLEAK() macro and enables it automatically when Git is compiled with SANITIZE=leak. In normal builds it's a noop, so we pay no runtime cost. It also adds some UNLEAK() annotations to show off how the feature works. On top of other recent leak fixes, these are enough to get t0000 and t0001 to pass when compiled with LSAN. Note the case in commit.c which actually converts a strbuf_release() into an UNLEAK. This code was already non-leaky, but the free didn't do anything useful, since we're exiting. Converting it to an annotation means that non-leak-checking builds pay no runtime cost. The cost is minimal enough that it's probably not worth going on a crusade to convert these kinds of frees to UNLEAKS. I did it here for consistency with the "sb" leak (though it would have been equally correct to go the other way, and turn them both into strbuf_release() calls). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 08:38:41 +02:00
/*
* You can mark a stack variable with UNLEAK(var) to avoid it being
* reported as a leak by tools like LSAN or valgrind. The argument
* should generally be the variable itself (not its address and not what
* it points to). It's safe to use this on pointers which may already
* have been freed, or on pointers which may still be in use.
*
* Use this _only_ for a variable that leaks by going out of scope at
* program exit (so only from cmd_* functions or their direct helpers).
* Normal functions, especially those which may be called multiple
* times, should actually free their memory. This is only meant as
* an annotation, and does nothing in non-leak-checking builds.
*/
#ifdef SUPPRESS_ANNOTATED_LEAKS
extern void unleak_memory(const void *ptr, size_t len);
#define UNLEAK(var) unleak_memory(&(var), sizeof(var))
add UNLEAK annotation for reducing leak false positives It's a common pattern in git commands to allocate some memory that should last for the lifetime of the program and then not bother to free it, relying on the OS to throw it away. This keeps the code simple, and it's fast (we don't waste time traversing structures or calling free at the end of the program). But it also triggers warnings from memory-leak checkers like valgrind or LSAN. They know that the memory was still allocated at program exit, but they don't know _when_ the leaked memory stopped being useful. If it was early in the program, then it's probably a real and important leak. But if it was used right up until program exit, it's not an interesting leak and we'd like to suppress it so that we can see the real leaks. This patch introduces an UNLEAK() macro that lets us do so. To understand its design, let's first look at some of the alternatives. Unfortunately the suppression systems offered by leak-checking tools don't quite do what we want. A leak-checker basically knows two things: 1. Which blocks were allocated via malloc, and the callstack during the allocation. 2. Which blocks were left un-freed at the end of the program (and which are unreachable, but more on that later). Their suppressions work by mentioning the function or callstack of a particular allocation, and marking it as OK to leak. So imagine you have code like this: int cmd_foo(...) { /* this allocates some memory */ char *p = some_function(); printf("%s", p); return 0; } You can say "ignore allocations from some_function(), they're not leaks". But that's not right. That function may be called elsewhere, too, and we would potentially want to know about those leaks. So you can say "ignore the callstack when main calls some_function". That works, but your annotations are brittle. In this case it's only two functions, but you can imagine that the actual allocation is much deeper. If any of the intermediate code changes, you have to update the suppression. What we _really_ want to say is that "the value assigned to p at the end of the function is not a real leak". But leak-checkers can't understand that; they don't know about "p" in the first place. However, we can do something a little bit tricky if we make some assumptions about how leak-checkers work. They generally don't just report all un-freed blocks. That would report even globals which are still accessible when the leak-check is run. Instead they take some set of memory (like BSS) as a root and mark it as "reachable". Then they scan the reachable blocks for anything that looks like a pointer to a malloc'd block, and consider that block reachable. And then they scan those blocks, and so on, transitively marking anything reachable from a global as "not leaked" (or at least leaked in a different category). So we can mark the value of "p" as reachable by putting it into a variable with program lifetime. One way to do that is to just mark "p" as static. But that actually affects the run-time behavior if the function is called twice (you aren't likely to call main() twice, but some of our cmd_*() functions are called from other commands). Instead, we can trick the leak-checker by putting the value into _any_ reachable bytes. This patch keeps a global linked-list of bytes copied from "unleaked" variables. That list is reachable even at program exit, which confers recursive reachability on whatever values we unleak. In other words, you can do: int cmd_foo(...) { char *p = some_function(); printf("%s", p); UNLEAK(p); return 0; } to annotate "p" and suppress the leak report. But wait, couldn't we just say "free(p)"? In this toy example, yes. But UNLEAK()'s byte-copying strategy has several advantages over actually freeing the memory: 1. It's recursive across structures. In many cases our "p" is not just a pointer, but a complex struct whose fields may have been allocated by a sub-function. And in some cases (e.g., dir_struct) we don't even have a function which knows how to free all of the struct members. By marking the struct itself as reachable, that confers reachability on any pointers it contains (including those found in embedded structs, or reachable by walking heap blocks recursively. 2. It works on cases where we're not sure if the value is allocated or not. For example: char *p = argc > 1 ? argv[1] : some_function(); It's safe to use UNLEAK(p) here, because it's not freeing any memory. In the case that we're pointing to argv here, the reachability checker will just ignore our bytes. 3. Likewise, it works even if the variable has _already_ been freed. We're just copying the pointer bytes. If the block has been freed, the leak-checker will skip over those bytes as uninteresting. 4. Because it's not actually freeing memory, you can UNLEAK() before we are finished accessing the variable. This is helpful in cases like this: char *p = some_function(); return another_function(p); Writing this with free() requires: int ret; char *p = some_function(); ret = another_function(p); free(p); return ret; But with unleak we can just write: char *p = some_function(); UNLEAK(p); return another_function(p); This patch adds the UNLEAK() macro and enables it automatically when Git is compiled with SANITIZE=leak. In normal builds it's a noop, so we pay no runtime cost. It also adds some UNLEAK() annotations to show off how the feature works. On top of other recent leak fixes, these are enough to get t0000 and t0001 to pass when compiled with LSAN. Note the case in commit.c which actually converts a strbuf_release() into an UNLEAK. This code was already non-leaky, but the free didn't do anything useful, since we're exiting. Converting it to an annotation means that non-leak-checking builds pay no runtime cost. The cost is minimal enough that it's probably not worth going on a crusade to convert these kinds of frees to UNLEAKS. I did it here for consistency with the "sb" leak (though it would have been equally correct to go the other way, and turn them both into strbuf_release() calls). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 08:38:41 +02:00
#else
#define UNLEAK(var) do {} while (0)
add UNLEAK annotation for reducing leak false positives It's a common pattern in git commands to allocate some memory that should last for the lifetime of the program and then not bother to free it, relying on the OS to throw it away. This keeps the code simple, and it's fast (we don't waste time traversing structures or calling free at the end of the program). But it also triggers warnings from memory-leak checkers like valgrind or LSAN. They know that the memory was still allocated at program exit, but they don't know _when_ the leaked memory stopped being useful. If it was early in the program, then it's probably a real and important leak. But if it was used right up until program exit, it's not an interesting leak and we'd like to suppress it so that we can see the real leaks. This patch introduces an UNLEAK() macro that lets us do so. To understand its design, let's first look at some of the alternatives. Unfortunately the suppression systems offered by leak-checking tools don't quite do what we want. A leak-checker basically knows two things: 1. Which blocks were allocated via malloc, and the callstack during the allocation. 2. Which blocks were left un-freed at the end of the program (and which are unreachable, but more on that later). Their suppressions work by mentioning the function or callstack of a particular allocation, and marking it as OK to leak. So imagine you have code like this: int cmd_foo(...) { /* this allocates some memory */ char *p = some_function(); printf("%s", p); return 0; } You can say "ignore allocations from some_function(), they're not leaks". But that's not right. That function may be called elsewhere, too, and we would potentially want to know about those leaks. So you can say "ignore the callstack when main calls some_function". That works, but your annotations are brittle. In this case it's only two functions, but you can imagine that the actual allocation is much deeper. If any of the intermediate code changes, you have to update the suppression. What we _really_ want to say is that "the value assigned to p at the end of the function is not a real leak". But leak-checkers can't understand that; they don't know about "p" in the first place. However, we can do something a little bit tricky if we make some assumptions about how leak-checkers work. They generally don't just report all un-freed blocks. That would report even globals which are still accessible when the leak-check is run. Instead they take some set of memory (like BSS) as a root and mark it as "reachable". Then they scan the reachable blocks for anything that looks like a pointer to a malloc'd block, and consider that block reachable. And then they scan those blocks, and so on, transitively marking anything reachable from a global as "not leaked" (or at least leaked in a different category). So we can mark the value of "p" as reachable by putting it into a variable with program lifetime. One way to do that is to just mark "p" as static. But that actually affects the run-time behavior if the function is called twice (you aren't likely to call main() twice, but some of our cmd_*() functions are called from other commands). Instead, we can trick the leak-checker by putting the value into _any_ reachable bytes. This patch keeps a global linked-list of bytes copied from "unleaked" variables. That list is reachable even at program exit, which confers recursive reachability on whatever values we unleak. In other words, you can do: int cmd_foo(...) { char *p = some_function(); printf("%s", p); UNLEAK(p); return 0; } to annotate "p" and suppress the leak report. But wait, couldn't we just say "free(p)"? In this toy example, yes. But UNLEAK()'s byte-copying strategy has several advantages over actually freeing the memory: 1. It's recursive across structures. In many cases our "p" is not just a pointer, but a complex struct whose fields may have been allocated by a sub-function. And in some cases (e.g., dir_struct) we don't even have a function which knows how to free all of the struct members. By marking the struct itself as reachable, that confers reachability on any pointers it contains (including those found in embedded structs, or reachable by walking heap blocks recursively. 2. It works on cases where we're not sure if the value is allocated or not. For example: char *p = argc > 1 ? argv[1] : some_function(); It's safe to use UNLEAK(p) here, because it's not freeing any memory. In the case that we're pointing to argv here, the reachability checker will just ignore our bytes. 3. Likewise, it works even if the variable has _already_ been freed. We're just copying the pointer bytes. If the block has been freed, the leak-checker will skip over those bytes as uninteresting. 4. Because it's not actually freeing memory, you can UNLEAK() before we are finished accessing the variable. This is helpful in cases like this: char *p = some_function(); return another_function(p); Writing this with free() requires: int ret; char *p = some_function(); ret = another_function(p); free(p); return ret; But with unleak we can just write: char *p = some_function(); UNLEAK(p); return another_function(p); This patch adds the UNLEAK() macro and enables it automatically when Git is compiled with SANITIZE=leak. In normal builds it's a noop, so we pay no runtime cost. It also adds some UNLEAK() annotations to show off how the feature works. On top of other recent leak fixes, these are enough to get t0000 and t0001 to pass when compiled with LSAN. Note the case in commit.c which actually converts a strbuf_release() into an UNLEAK. This code was already non-leaky, but the free didn't do anything useful, since we're exiting. Converting it to an annotation means that non-leak-checking builds pay no runtime cost. The cost is minimal enough that it's probably not worth going on a crusade to convert these kinds of frees to UNLEAKS. I did it here for consistency with the "sb" leak (though it would have been equally correct to go the other way, and turn them both into strbuf_release() calls). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 08:38:41 +02:00
#endif
automatically ban strcpy() There are a few standard C functions (like strcpy) which are easy to misuse. E.g.: char path[PATH_MAX]; strcpy(path, arg); may overflow the "path" buffer. Sometimes there's an earlier constraint on the size of "arg", but even in such a case it's hard to verify that the code is correct. If the size really is unbounded, you're better off using a dynamic helper like strbuf: struct strbuf path = STRBUF_INIT; strbuf_addstr(path, arg); or if it really is bounded, then use xsnprintf to show your expectation (and get a run-time assertion): char path[PATH_MAX]; xsnprintf(path, sizeof(path), "%s", arg); which makes further auditing easier. We'd usually catch undesirable code like this in a review, but there's no automated enforcement. Adding that enforcement can help us be more consistent and save effort (and a round-trip) during review. This patch teaches the compiler to report an error when it sees strcpy (and will become a model for banning a few other functions). This has a few advantages over a separate linting tool: 1. We know it's run as part of a build cycle, so it's hard to ignore. Whereas an external linter is an extra step the developer needs to remember to do. 2. Likewise, it's basically free since the compiler is parsing the code anyway. 3. We know it's robust against false positives (unlike a grep-based linter). The two big disadvantages are: 1. We'll only check code that is actually compiled, so it may miss code that isn't triggered on your particular system. But since presumably people don't add new code without compiling it (and if they do, the banned function list is the least of their worries), we really only care about failing to clean up old code when adding new functions to the list. And that's easy enough to address with a manual audit when adding a new function (which is what I did for the functions here). 2. If this ends up generating false positives, it's going to be harder to disable (as opposed to a separate linter, which may have mechanisms for overriding a particular case). But the intent is to only ban functions which are obviously bad, and for which we accept using an alternative even when this particular use isn't buggy (e.g., the xsnprintf alternative above). The implementation here is simple: we'll define a macro for the banned function which replaces it with a reference to a descriptively named but undeclared identifier. Replacing it with any invalid code would work (since we just want to break compilation). But ideally we'd meet these goals: - it should be portable; ideally this would trigger everywhere, and does not need to be part of a DEVELOPER=1 setup (because unlike warnings which may depend on the compiler or system, this is a clear indicator of something wrong in the code). - it should generate a readable error that gives the developer a clue what happened - it should avoid generating too much other cruft that makes it hard to see the actual error - it should mention the original callsite in the error The output with this patch looks like this (using gcc 7, on a checkout with 022d2ac1f3 reverted, which removed the final strcpy from blame.c): CC builtin/blame.o In file included from ./git-compat-util.h:1246, from ./cache.h:4, from builtin/blame.c:8: builtin/blame.c: In function ‘cmd_blame’: ./banned.h:11:22: error: ‘sorry_strcpy_is_a_banned_function’ undeclared (first use in this function) #define BANNED(func) sorry_##func##_is_a_banned_function ^~~~~~ ./banned.h:14:21: note: in expansion of macro ‘BANNED’ #define strcpy(x,y) BANNED(strcpy) ^~~~~~ builtin/blame.c:1074:4: note: in expansion of macro ‘strcpy’ strcpy(repeated_meta_color, GIT_COLOR_CYAN); ^~~~~~ ./banned.h:11:22: note: each undeclared identifier is reported only once for each function it appears in #define BANNED(func) sorry_##func##_is_a_banned_function ^~~~~~ ./banned.h:14:21: note: in expansion of macro ‘BANNED’ #define strcpy(x,y) BANNED(strcpy) ^~~~~~ builtin/blame.c:1074:4: note: in expansion of macro ‘strcpy’ strcpy(repeated_meta_color, GIT_COLOR_CYAN); ^~~~~~ This prominently shows the phrase "strcpy is a banned function", along with the original callsite in blame.c and the location of the ban code in banned.h. Which should be enough to get even a developer seeing this for the first time pointed in the right direction. This doesn't match our ideals perfectly, but it's a pretty good balance. A few alternatives I tried: 1. Instead of using an undeclared variable, using an undeclared function. This shortens the message, because the "each undeclared identifier" message is not needed (and as you can see above, it triggers a separate mention of each of the expansion points). But it doesn't actually stop compilation unless you use -Werror=implicit-function-declaration in your CFLAGS. This is the case for DEVELOPER=1, but not for a default build (on the other hand, we'd eventually produce a link error pointing to the correct source line with the descriptive name). 2. The linux kernel uses a similar mechanism in its BUILD_BUG_ON_MSG(), where they actually declare the function but do so with gcc's error attribute. But that's not portable to other compilers (and it also runs afoul of our error() macro). We could make a gcc-specific technique and fallback on other compilers, but it's probably not worth the complexity. It also isn't significantly shorter than the error message shown above. 3. We could drop the BANNED() macro, which would shorten the number of lines in the error. But curiously, removing it (and just expanding strcpy directly to the bogus identifier) causes gcc _not_ to report the original line of code. So this strategy seems to be an acceptable mix of information, portability, simplicity, and robustness, without _too_ much extra clutter. I also tested it with clang, and it looks as good (actually, slightly less cluttered than with gcc). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-26 09:21:05 +02:00
/*
* This include must come after system headers, since it introduces macros that
* replace system names.
*/
#include "banned.h"
#endif