2022-08-12 22:10:13 +02:00
|
|
|
#include "cache.h"
|
|
|
|
#include "diagnose.h"
|
|
|
|
#include "compat/disk.h"
|
|
|
|
#include "archive.h"
|
|
|
|
#include "dir.h"
|
|
|
|
#include "help.h"
|
2023-03-21 07:25:54 +01:00
|
|
|
#include "gettext.h"
|
2023-02-24 01:09:27 +01:00
|
|
|
#include "hex.h"
|
2022-08-12 22:10:13 +02:00
|
|
|
#include "strvec.h"
|
|
|
|
#include "object-store.h"
|
|
|
|
#include "packfile.h"
|
2023-03-21 07:26:07 +01:00
|
|
|
#include "write-or-die.h"
|
2022-08-12 22:10:13 +02:00
|
|
|
|
diagnose.c: add option to configure archive contents
Update 'create_diagnostics_archive()' to take an argument 'mode'. When
archiving diagnostics for a repository, 'mode' is used to selectively
include/exclude information based on its value. The initial options for
'mode' are:
* DIAGNOSE_NONE: do not collect any diagnostics or create an archive
(no-op).
* DIAGNOSE_STATS: collect basic repository metadata (Git version, repo path,
filesystem available space) as well as sizing and count statistics for the
repository's objects and packfiles.
* DIAGNOSE_ALL: collect basic repository metadata, sizing/count statistics,
and copies of the '.git', '.git/hooks', '.git/info', '.git/logs', and
'.git/objects/info' directories.
These modes are introduced to provide users the option to collect
diagnostics without the sensitive information included in copies of '.git'
dir contents. At the moment, only 'scalar diagnose' uses
'create_diagnostics_archive()' (with a hardcoded 'DIAGNOSE_ALL' mode to
match existing functionality), but more callers will be introduced in
subsequent patches.
Finally, refactor from a hardcoded set of 'add_directory_to_archiver()'
calls to iterative invocations gated by 'DIAGNOSE_ALL'. This allows for
easier future modification of the set of directories to archive and improves
error reporting when 'add_directory_to_archiver()' fails.
Helped-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-08-12 22:10:14 +02:00
|
|
|
struct archive_dir {
|
|
|
|
const char *path;
|
|
|
|
int recursive;
|
|
|
|
};
|
|
|
|
|
2022-08-12 22:10:16 +02:00
|
|
|
struct diagnose_option {
|
|
|
|
enum diagnose_mode mode;
|
|
|
|
const char *option_name;
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct diagnose_option diagnose_options[] = {
|
|
|
|
{ DIAGNOSE_STATS, "stats" },
|
|
|
|
{ DIAGNOSE_ALL, "all" },
|
|
|
|
};
|
|
|
|
|
|
|
|
int option_parse_diagnose(const struct option *opt, const char *arg, int unset)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
enum diagnose_mode *diagnose = opt->value;
|
|
|
|
|
|
|
|
if (!arg) {
|
|
|
|
*diagnose = unset ? DIAGNOSE_NONE : DIAGNOSE_STATS;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(diagnose_options); i++) {
|
|
|
|
if (!strcmp(arg, diagnose_options[i].option_name)) {
|
|
|
|
*diagnose = diagnose_options[i].mode;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return error(_("invalid --%s value '%s'"), opt->long_name, arg);
|
|
|
|
}
|
|
|
|
|
2023-02-24 07:39:24 +01:00
|
|
|
static void dir_file_stats_objects(const char *full_path,
|
|
|
|
size_t full_path_len UNUSED,
|
2022-08-12 22:10:13 +02:00
|
|
|
const char *file_name, void *data)
|
|
|
|
{
|
|
|
|
struct strbuf *buf = data;
|
|
|
|
struct stat st;
|
|
|
|
|
|
|
|
if (!stat(full_path, &st))
|
|
|
|
strbuf_addf(buf, "%-70s %16" PRIuMAX "\n", file_name,
|
|
|
|
(uintmax_t)st.st_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int dir_file_stats(struct object_directory *object_dir, void *data)
|
|
|
|
{
|
|
|
|
struct strbuf *buf = data;
|
|
|
|
|
|
|
|
strbuf_addf(buf, "Contents of %s:\n", object_dir->path);
|
|
|
|
|
|
|
|
for_each_file_in_pack_dir(object_dir->path, dir_file_stats_objects,
|
|
|
|
data);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
diagnose.c: refactor to safely use 'd_type'
Refactor usage of the 'd_type' property of 'struct dirent' in 'diagnose.c'
to instead utilize the compatibility macro 'DTYPE()'. On systems where
'd_type' is not present in 'struct dirent', this macro will always return
'DT_UNKNOWN'. In that case, instead fall back on using the 'stat.st_mode' to
determine whether the dirent points to a dir, file, or link.
Additionally, add a test to 't0092-diagnose.sh' to verify that files (e.g.,
loose objects) are counted properly.
Note that the new function 'get_dtype()' is based on 'resolve_dtype()' in
'dir.c' (which itself was refactored from a prior 'get_dtype()' in
ad6f2157f9 (dir: restructure in a way to avoid passing around a struct
dirent, 2020-01-16)), but differs in that it is meant for use on arbitrary
files, such as those inside the '.git' dir. Because of this, it does not
search the index for a matching entry to derive the 'd_type'.
Reported-by: Randall S. Becker <rsbecker@nexbridge.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-17 20:16:55 +02:00
|
|
|
/*
|
|
|
|
* Get the d_type of a dirent. If the d_type is unknown, derive it from
|
|
|
|
* stat.st_mode.
|
|
|
|
*
|
|
|
|
* Note that 'path' is assumed to have a trailing slash. It is also modified
|
|
|
|
* in-place during the execution of the function, but is then reverted to its
|
|
|
|
* original value before returning.
|
|
|
|
*/
|
|
|
|
static unsigned char get_dtype(struct dirent *e, struct strbuf *path)
|
2022-08-12 22:10:13 +02:00
|
|
|
{
|
diagnose.c: refactor to safely use 'd_type'
Refactor usage of the 'd_type' property of 'struct dirent' in 'diagnose.c'
to instead utilize the compatibility macro 'DTYPE()'. On systems where
'd_type' is not present in 'struct dirent', this macro will always return
'DT_UNKNOWN'. In that case, instead fall back on using the 'stat.st_mode' to
determine whether the dirent points to a dir, file, or link.
Additionally, add a test to 't0092-diagnose.sh' to verify that files (e.g.,
loose objects) are counted properly.
Note that the new function 'get_dtype()' is based on 'resolve_dtype()' in
'dir.c' (which itself was refactored from a prior 'get_dtype()' in
ad6f2157f9 (dir: restructure in a way to avoid passing around a struct
dirent, 2020-01-16)), but differs in that it is meant for use on arbitrary
files, such as those inside the '.git' dir. Because of this, it does not
search the index for a matching entry to derive the 'd_type'.
Reported-by: Randall S. Becker <rsbecker@nexbridge.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-17 20:16:55 +02:00
|
|
|
struct stat st;
|
|
|
|
unsigned char dtype = DTYPE(e);
|
|
|
|
size_t base_path_len;
|
|
|
|
|
|
|
|
if (dtype != DT_UNKNOWN)
|
|
|
|
return dtype;
|
|
|
|
|
|
|
|
/* d_type unknown in dirent, try to fall back on lstat results */
|
|
|
|
base_path_len = path->len;
|
|
|
|
strbuf_addstr(path, e->d_name);
|
|
|
|
if (lstat(path->buf, &st))
|
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
/* determine d_type from st_mode */
|
|
|
|
if (S_ISREG(st.st_mode))
|
|
|
|
dtype = DT_REG;
|
|
|
|
else if (S_ISDIR(st.st_mode))
|
|
|
|
dtype = DT_DIR;
|
|
|
|
else if (S_ISLNK(st.st_mode))
|
|
|
|
dtype = DT_LNK;
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
strbuf_setlen(path, base_path_len);
|
|
|
|
return dtype;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int count_files(struct strbuf *path)
|
|
|
|
{
|
|
|
|
DIR *dir = opendir(path->buf);
|
2022-08-12 22:10:13 +02:00
|
|
|
struct dirent *e;
|
|
|
|
int count = 0;
|
|
|
|
|
|
|
|
if (!dir)
|
|
|
|
return 0;
|
|
|
|
|
diagnose.c: refactor to safely use 'd_type'
Refactor usage of the 'd_type' property of 'struct dirent' in 'diagnose.c'
to instead utilize the compatibility macro 'DTYPE()'. On systems where
'd_type' is not present in 'struct dirent', this macro will always return
'DT_UNKNOWN'. In that case, instead fall back on using the 'stat.st_mode' to
determine whether the dirent points to a dir, file, or link.
Additionally, add a test to 't0092-diagnose.sh' to verify that files (e.g.,
loose objects) are counted properly.
Note that the new function 'get_dtype()' is based on 'resolve_dtype()' in
'dir.c' (which itself was refactored from a prior 'get_dtype()' in
ad6f2157f9 (dir: restructure in a way to avoid passing around a struct
dirent, 2020-01-16)), but differs in that it is meant for use on arbitrary
files, such as those inside the '.git' dir. Because of this, it does not
search the index for a matching entry to derive the 'd_type'.
Reported-by: Randall S. Becker <rsbecker@nexbridge.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-17 20:16:55 +02:00
|
|
|
while ((e = readdir_skip_dot_and_dotdot(dir)) != NULL)
|
|
|
|
if (get_dtype(e, path) == DT_REG)
|
2022-08-12 22:10:13 +02:00
|
|
|
count++;
|
|
|
|
|
|
|
|
closedir(dir);
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void loose_objs_stats(struct strbuf *buf, const char *path)
|
|
|
|
{
|
|
|
|
DIR *dir = opendir(path);
|
|
|
|
struct dirent *e;
|
|
|
|
int count;
|
|
|
|
int total = 0;
|
|
|
|
unsigned char c;
|
|
|
|
struct strbuf count_path = STRBUF_INIT;
|
|
|
|
size_t base_path_len;
|
|
|
|
|
|
|
|
if (!dir)
|
|
|
|
return;
|
|
|
|
|
|
|
|
strbuf_addstr(buf, "Object directory stats for ");
|
|
|
|
strbuf_add_absolute_path(buf, path);
|
|
|
|
strbuf_addstr(buf, ":\n");
|
|
|
|
|
|
|
|
strbuf_add_absolute_path(&count_path, path);
|
|
|
|
strbuf_addch(&count_path, '/');
|
|
|
|
base_path_len = count_path.len;
|
|
|
|
|
diagnose.c: refactor to safely use 'd_type'
Refactor usage of the 'd_type' property of 'struct dirent' in 'diagnose.c'
to instead utilize the compatibility macro 'DTYPE()'. On systems where
'd_type' is not present in 'struct dirent', this macro will always return
'DT_UNKNOWN'. In that case, instead fall back on using the 'stat.st_mode' to
determine whether the dirent points to a dir, file, or link.
Additionally, add a test to 't0092-diagnose.sh' to verify that files (e.g.,
loose objects) are counted properly.
Note that the new function 'get_dtype()' is based on 'resolve_dtype()' in
'dir.c' (which itself was refactored from a prior 'get_dtype()' in
ad6f2157f9 (dir: restructure in a way to avoid passing around a struct
dirent, 2020-01-16)), but differs in that it is meant for use on arbitrary
files, such as those inside the '.git' dir. Because of this, it does not
search the index for a matching entry to derive the 'd_type'.
Reported-by: Randall S. Becker <rsbecker@nexbridge.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-17 20:16:55 +02:00
|
|
|
while ((e = readdir_skip_dot_and_dotdot(dir)) != NULL)
|
|
|
|
if (get_dtype(e, &count_path) == DT_DIR &&
|
|
|
|
strlen(e->d_name) == 2 &&
|
2022-08-12 22:10:13 +02:00
|
|
|
!hex_to_bytes(&c, e->d_name, 1)) {
|
|
|
|
strbuf_setlen(&count_path, base_path_len);
|
diagnose.c: refactor to safely use 'd_type'
Refactor usage of the 'd_type' property of 'struct dirent' in 'diagnose.c'
to instead utilize the compatibility macro 'DTYPE()'. On systems where
'd_type' is not present in 'struct dirent', this macro will always return
'DT_UNKNOWN'. In that case, instead fall back on using the 'stat.st_mode' to
determine whether the dirent points to a dir, file, or link.
Additionally, add a test to 't0092-diagnose.sh' to verify that files (e.g.,
loose objects) are counted properly.
Note that the new function 'get_dtype()' is based on 'resolve_dtype()' in
'dir.c' (which itself was refactored from a prior 'get_dtype()' in
ad6f2157f9 (dir: restructure in a way to avoid passing around a struct
dirent, 2020-01-16)), but differs in that it is meant for use on arbitrary
files, such as those inside the '.git' dir. Because of this, it does not
search the index for a matching entry to derive the 'd_type'.
Reported-by: Randall S. Becker <rsbecker@nexbridge.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-17 20:16:55 +02:00
|
|
|
strbuf_addf(&count_path, "%s/", e->d_name);
|
|
|
|
total += (count = count_files(&count_path));
|
2022-08-12 22:10:13 +02:00
|
|
|
strbuf_addf(buf, "%s : %7d files\n", e->d_name, count);
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_addf(buf, "Total: %d loose objects", total);
|
|
|
|
|
|
|
|
strbuf_release(&count_path);
|
|
|
|
closedir(dir);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int add_directory_to_archiver(struct strvec *archiver_args,
|
|
|
|
const char *path, int recurse)
|
|
|
|
{
|
|
|
|
int at_root = !*path;
|
|
|
|
DIR *dir;
|
|
|
|
struct dirent *e;
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
|
|
|
size_t len;
|
|
|
|
int res = 0;
|
|
|
|
|
|
|
|
dir = opendir(at_root ? "." : path);
|
|
|
|
if (!dir) {
|
|
|
|
if (errno == ENOENT) {
|
|
|
|
warning(_("could not archive missing directory '%s'"), path);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return error_errno(_("could not open directory '%s'"), path);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!at_root)
|
|
|
|
strbuf_addf(&buf, "%s/", path);
|
|
|
|
len = buf.len;
|
|
|
|
strvec_pushf(archiver_args, "--prefix=%s", buf.buf);
|
|
|
|
|
diagnose.c: refactor to safely use 'd_type'
Refactor usage of the 'd_type' property of 'struct dirent' in 'diagnose.c'
to instead utilize the compatibility macro 'DTYPE()'. On systems where
'd_type' is not present in 'struct dirent', this macro will always return
'DT_UNKNOWN'. In that case, instead fall back on using the 'stat.st_mode' to
determine whether the dirent points to a dir, file, or link.
Additionally, add a test to 't0092-diagnose.sh' to verify that files (e.g.,
loose objects) are counted properly.
Note that the new function 'get_dtype()' is based on 'resolve_dtype()' in
'dir.c' (which itself was refactored from a prior 'get_dtype()' in
ad6f2157f9 (dir: restructure in a way to avoid passing around a struct
dirent, 2020-01-16)), but differs in that it is meant for use on arbitrary
files, such as those inside the '.git' dir. Because of this, it does not
search the index for a matching entry to derive the 'd_type'.
Reported-by: Randall S. Becker <rsbecker@nexbridge.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-17 20:16:55 +02:00
|
|
|
while (!res && (e = readdir_skip_dot_and_dotdot(dir))) {
|
|
|
|
struct strbuf abspath = STRBUF_INIT;
|
|
|
|
unsigned char dtype;
|
|
|
|
|
|
|
|
strbuf_add_absolute_path(&abspath, at_root ? "." : path);
|
|
|
|
strbuf_addch(&abspath, '/');
|
|
|
|
dtype = get_dtype(e, &abspath);
|
2022-08-12 22:10:13 +02:00
|
|
|
|
|
|
|
strbuf_setlen(&buf, len);
|
|
|
|
strbuf_addstr(&buf, e->d_name);
|
|
|
|
|
diagnose.c: refactor to safely use 'd_type'
Refactor usage of the 'd_type' property of 'struct dirent' in 'diagnose.c'
to instead utilize the compatibility macro 'DTYPE()'. On systems where
'd_type' is not present in 'struct dirent', this macro will always return
'DT_UNKNOWN'. In that case, instead fall back on using the 'stat.st_mode' to
determine whether the dirent points to a dir, file, or link.
Additionally, add a test to 't0092-diagnose.sh' to verify that files (e.g.,
loose objects) are counted properly.
Note that the new function 'get_dtype()' is based on 'resolve_dtype()' in
'dir.c' (which itself was refactored from a prior 'get_dtype()' in
ad6f2157f9 (dir: restructure in a way to avoid passing around a struct
dirent, 2020-01-16)), but differs in that it is meant for use on arbitrary
files, such as those inside the '.git' dir. Because of this, it does not
search the index for a matching entry to derive the 'd_type'.
Reported-by: Randall S. Becker <rsbecker@nexbridge.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-17 20:16:55 +02:00
|
|
|
if (dtype == DT_REG)
|
2022-08-12 22:10:13 +02:00
|
|
|
strvec_pushf(archiver_args, "--add-file=%s", buf.buf);
|
diagnose.c: refactor to safely use 'd_type'
Refactor usage of the 'd_type' property of 'struct dirent' in 'diagnose.c'
to instead utilize the compatibility macro 'DTYPE()'. On systems where
'd_type' is not present in 'struct dirent', this macro will always return
'DT_UNKNOWN'. In that case, instead fall back on using the 'stat.st_mode' to
determine whether the dirent points to a dir, file, or link.
Additionally, add a test to 't0092-diagnose.sh' to verify that files (e.g.,
loose objects) are counted properly.
Note that the new function 'get_dtype()' is based on 'resolve_dtype()' in
'dir.c' (which itself was refactored from a prior 'get_dtype()' in
ad6f2157f9 (dir: restructure in a way to avoid passing around a struct
dirent, 2020-01-16)), but differs in that it is meant for use on arbitrary
files, such as those inside the '.git' dir. Because of this, it does not
search the index for a matching entry to derive the 'd_type'.
Reported-by: Randall S. Becker <rsbecker@nexbridge.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-17 20:16:55 +02:00
|
|
|
else if (dtype != DT_DIR)
|
2022-08-12 22:10:13 +02:00
|
|
|
warning(_("skipping '%s', which is neither file nor "
|
|
|
|
"directory"), buf.buf);
|
|
|
|
else if (recurse &&
|
|
|
|
add_directory_to_archiver(archiver_args,
|
|
|
|
buf.buf, recurse) < 0)
|
|
|
|
res = -1;
|
diagnose.c: refactor to safely use 'd_type'
Refactor usage of the 'd_type' property of 'struct dirent' in 'diagnose.c'
to instead utilize the compatibility macro 'DTYPE()'. On systems where
'd_type' is not present in 'struct dirent', this macro will always return
'DT_UNKNOWN'. In that case, instead fall back on using the 'stat.st_mode' to
determine whether the dirent points to a dir, file, or link.
Additionally, add a test to 't0092-diagnose.sh' to verify that files (e.g.,
loose objects) are counted properly.
Note that the new function 'get_dtype()' is based on 'resolve_dtype()' in
'dir.c' (which itself was refactored from a prior 'get_dtype()' in
ad6f2157f9 (dir: restructure in a way to avoid passing around a struct
dirent, 2020-01-16)), but differs in that it is meant for use on arbitrary
files, such as those inside the '.git' dir. Because of this, it does not
search the index for a matching entry to derive the 'd_type'.
Reported-by: Randall S. Becker <rsbecker@nexbridge.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-09-17 20:16:55 +02:00
|
|
|
|
|
|
|
strbuf_release(&abspath);
|
2022-08-12 22:10:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
closedir(dir);
|
|
|
|
strbuf_release(&buf);
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
diagnose.c: add option to configure archive contents
Update 'create_diagnostics_archive()' to take an argument 'mode'. When
archiving diagnostics for a repository, 'mode' is used to selectively
include/exclude information based on its value. The initial options for
'mode' are:
* DIAGNOSE_NONE: do not collect any diagnostics or create an archive
(no-op).
* DIAGNOSE_STATS: collect basic repository metadata (Git version, repo path,
filesystem available space) as well as sizing and count statistics for the
repository's objects and packfiles.
* DIAGNOSE_ALL: collect basic repository metadata, sizing/count statistics,
and copies of the '.git', '.git/hooks', '.git/info', '.git/logs', and
'.git/objects/info' directories.
These modes are introduced to provide users the option to collect
diagnostics without the sensitive information included in copies of '.git'
dir contents. At the moment, only 'scalar diagnose' uses
'create_diagnostics_archive()' (with a hardcoded 'DIAGNOSE_ALL' mode to
match existing functionality), but more callers will be introduced in
subsequent patches.
Finally, refactor from a hardcoded set of 'add_directory_to_archiver()'
calls to iterative invocations gated by 'DIAGNOSE_ALL'. This allows for
easier future modification of the set of directories to archive and improves
error reporting when 'add_directory_to_archiver()' fails.
Helped-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-08-12 22:10:14 +02:00
|
|
|
int create_diagnostics_archive(struct strbuf *zip_path, enum diagnose_mode mode)
|
2022-08-12 22:10:13 +02:00
|
|
|
{
|
|
|
|
struct strvec archiver_args = STRVEC_INIT;
|
|
|
|
char **argv_copy = NULL;
|
|
|
|
int stdout_fd = -1, archiver_fd = -1;
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
diagnose.c: add option to configure archive contents
Update 'create_diagnostics_archive()' to take an argument 'mode'. When
archiving diagnostics for a repository, 'mode' is used to selectively
include/exclude information based on its value. The initial options for
'mode' are:
* DIAGNOSE_NONE: do not collect any diagnostics or create an archive
(no-op).
* DIAGNOSE_STATS: collect basic repository metadata (Git version, repo path,
filesystem available space) as well as sizing and count statistics for the
repository's objects and packfiles.
* DIAGNOSE_ALL: collect basic repository metadata, sizing/count statistics,
and copies of the '.git', '.git/hooks', '.git/info', '.git/logs', and
'.git/objects/info' directories.
These modes are introduced to provide users the option to collect
diagnostics without the sensitive information included in copies of '.git'
dir contents. At the moment, only 'scalar diagnose' uses
'create_diagnostics_archive()' (with a hardcoded 'DIAGNOSE_ALL' mode to
match existing functionality), but more callers will be introduced in
subsequent patches.
Finally, refactor from a hardcoded set of 'add_directory_to_archiver()'
calls to iterative invocations gated by 'DIAGNOSE_ALL'. This allows for
easier future modification of the set of directories to archive and improves
error reporting when 'add_directory_to_archiver()' fails.
Helped-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-08-12 22:10:14 +02:00
|
|
|
int res, i;
|
|
|
|
struct archive_dir archive_dirs[] = {
|
|
|
|
{ ".git", 0 },
|
|
|
|
{ ".git/hooks", 0 },
|
|
|
|
{ ".git/info", 0 },
|
|
|
|
{ ".git/logs", 1 },
|
|
|
|
{ ".git/objects/info", 0 }
|
|
|
|
};
|
|
|
|
|
|
|
|
if (mode == DIAGNOSE_NONE) {
|
|
|
|
res = 0;
|
|
|
|
goto diagnose_cleanup;
|
|
|
|
}
|
2022-08-12 22:10:13 +02:00
|
|
|
|
|
|
|
stdout_fd = dup(STDOUT_FILENO);
|
|
|
|
if (stdout_fd < 0) {
|
|
|
|
res = error_errno(_("could not duplicate stdout"));
|
|
|
|
goto diagnose_cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
archiver_fd = xopen(zip_path->buf, O_CREAT | O_WRONLY | O_TRUNC, 0666);
|
|
|
|
if (dup2(archiver_fd, STDOUT_FILENO) < 0) {
|
|
|
|
res = error_errno(_("could not redirect output"));
|
|
|
|
goto diagnose_cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
init_zip_archiver();
|
|
|
|
strvec_pushl(&archiver_args, "git-diagnose", "--format=zip", NULL);
|
|
|
|
|
|
|
|
strbuf_reset(&buf);
|
|
|
|
strbuf_addstr(&buf, "Collecting diagnostic info\n\n");
|
|
|
|
get_version_info(&buf, 1);
|
|
|
|
|
|
|
|
strbuf_addf(&buf, "Repository root: %s\n", the_repository->worktree);
|
|
|
|
get_disk_info(&buf);
|
|
|
|
write_or_die(stdout_fd, buf.buf, buf.len);
|
|
|
|
strvec_pushf(&archiver_args,
|
|
|
|
"--add-virtual-file=diagnostics.log:%.*s",
|
|
|
|
(int)buf.len, buf.buf);
|
|
|
|
|
|
|
|
strbuf_reset(&buf);
|
|
|
|
strbuf_addstr(&buf, "--add-virtual-file=packs-local.txt:");
|
|
|
|
dir_file_stats(the_repository->objects->odb, &buf);
|
|
|
|
foreach_alt_odb(dir_file_stats, &buf);
|
|
|
|
strvec_push(&archiver_args, buf.buf);
|
|
|
|
|
|
|
|
strbuf_reset(&buf);
|
|
|
|
strbuf_addstr(&buf, "--add-virtual-file=objects-local.txt:");
|
|
|
|
loose_objs_stats(&buf, ".git/objects");
|
|
|
|
strvec_push(&archiver_args, buf.buf);
|
|
|
|
|
diagnose.c: add option to configure archive contents
Update 'create_diagnostics_archive()' to take an argument 'mode'. When
archiving diagnostics for a repository, 'mode' is used to selectively
include/exclude information based on its value. The initial options for
'mode' are:
* DIAGNOSE_NONE: do not collect any diagnostics or create an archive
(no-op).
* DIAGNOSE_STATS: collect basic repository metadata (Git version, repo path,
filesystem available space) as well as sizing and count statistics for the
repository's objects and packfiles.
* DIAGNOSE_ALL: collect basic repository metadata, sizing/count statistics,
and copies of the '.git', '.git/hooks', '.git/info', '.git/logs', and
'.git/objects/info' directories.
These modes are introduced to provide users the option to collect
diagnostics without the sensitive information included in copies of '.git'
dir contents. At the moment, only 'scalar diagnose' uses
'create_diagnostics_archive()' (with a hardcoded 'DIAGNOSE_ALL' mode to
match existing functionality), but more callers will be introduced in
subsequent patches.
Finally, refactor from a hardcoded set of 'add_directory_to_archiver()'
calls to iterative invocations gated by 'DIAGNOSE_ALL'. This allows for
easier future modification of the set of directories to archive and improves
error reporting when 'add_directory_to_archiver()' fails.
Helped-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-08-12 22:10:14 +02:00
|
|
|
/* Only include this if explicitly requested */
|
|
|
|
if (mode == DIAGNOSE_ALL) {
|
|
|
|
for (i = 0; i < ARRAY_SIZE(archive_dirs); i++) {
|
|
|
|
if (add_directory_to_archiver(&archiver_args,
|
|
|
|
archive_dirs[i].path,
|
|
|
|
archive_dirs[i].recursive)) {
|
|
|
|
res = error_errno(_("could not add directory '%s' to archiver"),
|
|
|
|
archive_dirs[i].path);
|
|
|
|
goto diagnose_cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-08-12 22:10:13 +02:00
|
|
|
|
|
|
|
strvec_pushl(&archiver_args, "--prefix=",
|
|
|
|
oid_to_hex(the_hash_algo->empty_tree), "--", NULL);
|
|
|
|
|
|
|
|
/* `write_archive()` modifies the `argv` passed to it. Let it. */
|
|
|
|
argv_copy = xmemdupz(archiver_args.v,
|
|
|
|
sizeof(char *) * archiver_args.nr);
|
|
|
|
res = write_archive(archiver_args.nr, (const char **)argv_copy, NULL,
|
|
|
|
the_repository, NULL, 0);
|
|
|
|
if (res) {
|
|
|
|
error(_("failed to write archive"));
|
|
|
|
goto diagnose_cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
fprintf(stderr, "\n"
|
|
|
|
"Diagnostics complete.\n"
|
|
|
|
"All of the gathered info is captured in '%s'\n",
|
|
|
|
zip_path->buf);
|
|
|
|
|
|
|
|
diagnose_cleanup:
|
|
|
|
if (archiver_fd >= 0) {
|
|
|
|
dup2(stdout_fd, STDOUT_FILENO);
|
|
|
|
close(stdout_fd);
|
|
|
|
close(archiver_fd);
|
|
|
|
}
|
|
|
|
free(argv_copy);
|
|
|
|
strvec_clear(&archiver_args);
|
|
|
|
strbuf_release(&buf);
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|