git-commit-vandalism/builtin/commit-graph.c

237 lines
6.6 KiB
C
Raw Normal View History

#include "builtin.h"
#include "config.h"
#include "dir.h"
#include "lockfile.h"
#include "parse-options.h"
#include "repository.h"
#include "commit-graph.h"
#include "object-store.h"
static char const * const builtin_commit_graph_usage[] = {
N_("git commit-graph verify [--object-dir <objdir>] [--shallow] [--[no-]progress]"),
N_("git commit-graph write [--object-dir <objdir>] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] [--[no-]progress] <split options>"),
NULL
};
static const char * const builtin_commit_graph_verify_usage[] = {
N_("git commit-graph verify [--object-dir <objdir>] [--shallow] [--[no-]progress]"),
NULL
};
static const char * const builtin_commit_graph_write_usage[] = {
N_("git commit-graph write [--object-dir <objdir>] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] [--[no-]progress] <split options>"),
NULL
};
static struct opts_commit_graph {
const char *obj_dir;
int reachable;
int stdin_packs;
int stdin_commits;
int append;
int split;
int shallow;
int progress;
} opts;
commit-graph.h: store an odb in 'struct write_commit_graph_context' There are lots of places in 'commit-graph.h' where a function either has (or almost has) a full 'struct object_directory *', accesses '->path', and then throws away the rest of the struct. This can cause headaches when comparing the locations of object directories across alternates (e.g., in the case of deciding if two commit-graph layers can be merged). These paths are normalized with 'normalize_path_copy()' which mitigates some comparison issues, but not all [1]. Replace usage of 'char *object_dir' with 'odb->path' by storing a 'struct object_directory *' in the 'write_commit_graph_context' structure. This is an intermediate step towards getting rid of all path normalization in 'commit-graph.c'. Resolving a user-provided '--object-dir' argument now requires that we compare it to the known alternates for equality. Prior to this patch, an unknown '--object-dir' argument would silently exit with status zero. This can clearly lead to unintended behavior, such as verifying commit-graphs that aren't in a repository's own object store (or one of its alternates), or causing a typo to mask a legitimate commit-graph verification failure. Make this error non-silent by 'die()'-ing when the given '--object-dir' does not match any known alternate object store. [1]: In my testing, for example, I can get one side of the commit-graph code to fill object_dir with "./objects" and the other with just "objects". Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-04 06:51:50 +01:00
static struct object_directory *find_odb(struct repository *r,
const char *obj_dir)
{
struct object_directory *odb;
char *obj_dir_real = real_pathdup(obj_dir, 1);
prepare_alt_odb(r);
for (odb = r->objects->odb; odb; odb = odb->next) {
if (!strcmp(obj_dir_real, real_path(odb->path)))
break;
}
free(obj_dir_real);
if (!odb)
die(_("could not find object directory matching %s"), obj_dir);
return odb;
}
static int graph_verify(int argc, const char **argv)
{
struct commit_graph *graph = NULL;
commit-graph.h: store an odb in 'struct write_commit_graph_context' There are lots of places in 'commit-graph.h' where a function either has (or almost has) a full 'struct object_directory *', accesses '->path', and then throws away the rest of the struct. This can cause headaches when comparing the locations of object directories across alternates (e.g., in the case of deciding if two commit-graph layers can be merged). These paths are normalized with 'normalize_path_copy()' which mitigates some comparison issues, but not all [1]. Replace usage of 'char *object_dir' with 'odb->path' by storing a 'struct object_directory *' in the 'write_commit_graph_context' structure. This is an intermediate step towards getting rid of all path normalization in 'commit-graph.c'. Resolving a user-provided '--object-dir' argument now requires that we compare it to the known alternates for equality. Prior to this patch, an unknown '--object-dir' argument would silently exit with status zero. This can clearly lead to unintended behavior, such as verifying commit-graphs that aren't in a repository's own object store (or one of its alternates), or causing a typo to mask a legitimate commit-graph verification failure. Make this error non-silent by 'die()'-ing when the given '--object-dir' does not match any known alternate object store. [1]: In my testing, for example, I can get one side of the commit-graph code to fill object_dir with "./objects" and the other with just "objects". Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-04 06:51:50 +01:00
struct object_directory *odb = NULL;
char *graph_name;
commit-graph: don't early exit(1) on e.g. "git status" Make the commit-graph loading code work as a library that returns an error code instead of calling exit(1) when the commit-graph is corrupt. This means that e.g. "status" will now report commit-graph corruption as an "error: [...]" at the top of its output, but then proceed to work normally. This required splitting up the load_commit_graph_one() function so that the code that deals with open()-ing and stat()-ing the graph can now be called independently as open_commit_graph(). This is needed because "commit-graph verify" where the graph doesn't exist isn't an error. See the third paragraph in 283e68c72f ("commit-graph: add 'verify' subcommand", 2018-06-27). There's a bug in that logic where we conflate the intended ENOENT with other errno values (e.g. EACCES), but this change doesn't address that. That'll be addressed in a follow-up change. I'm then splitting most of the logic out of load_commit_graph_one() into load_commit_graph_one_fd_st(), which allows for providing an existing file descriptor and stat information to the loading code. This isn't strictly needed, but it would be redundant and confusing to open() and stat() the file twice for some of the codepaths, this allows for calling open_commit_graph() followed by load_commit_graph_one_fd_st(). The "graph_file" still needs to be passed to that function for the the "graph file %s is too small" error message. This leaves load_commit_graph_one() unused by everything except the internal prepare_commit_graph_one() function, so let's mark it as "static". If someone needs it in the future we can remove the "static" attribute. I could also rewrite its sole remaining user ("prepare_commit_graph_one()") to use load_commit_graph_one_fd_st() instead, but let's leave it at this. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-25 13:08:30 +01:00
int open_ok;
int fd;
struct stat st;
int flags = 0;
static struct option builtin_commit_graph_verify_options[] = {
OPT_STRING(0, "object-dir", &opts.obj_dir,
N_("dir"),
N_("The object directory to store the graph")),
OPT_BOOL(0, "shallow", &opts.shallow,
N_("if the commit-graph is split, only verify the tip file")),
OPT_BOOL(0, "progress", &opts.progress, N_("force progress reporting")),
OPT_END(),
};
trace2_cmd_mode("verify");
opts.progress = isatty(2);
argc = parse_options(argc, argv, NULL,
builtin_commit_graph_verify_options,
builtin_commit_graph_verify_usage, 0);
if (!opts.obj_dir)
opts.obj_dir = get_object_directory();
if (opts.shallow)
flags |= COMMIT_GRAPH_VERIFY_SHALLOW;
if (opts.progress)
flags |= COMMIT_GRAPH_WRITE_PROGRESS;
commit-graph.h: store an odb in 'struct write_commit_graph_context' There are lots of places in 'commit-graph.h' where a function either has (or almost has) a full 'struct object_directory *', accesses '->path', and then throws away the rest of the struct. This can cause headaches when comparing the locations of object directories across alternates (e.g., in the case of deciding if two commit-graph layers can be merged). These paths are normalized with 'normalize_path_copy()' which mitigates some comparison issues, but not all [1]. Replace usage of 'char *object_dir' with 'odb->path' by storing a 'struct object_directory *' in the 'write_commit_graph_context' structure. This is an intermediate step towards getting rid of all path normalization in 'commit-graph.c'. Resolving a user-provided '--object-dir' argument now requires that we compare it to the known alternates for equality. Prior to this patch, an unknown '--object-dir' argument would silently exit with status zero. This can clearly lead to unintended behavior, such as verifying commit-graphs that aren't in a repository's own object store (or one of its alternates), or causing a typo to mask a legitimate commit-graph verification failure. Make this error non-silent by 'die()'-ing when the given '--object-dir' does not match any known alternate object store. [1]: In my testing, for example, I can get one side of the commit-graph code to fill object_dir with "./objects" and the other with just "objects". Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-04 06:51:50 +01:00
odb = find_odb(the_repository, opts.obj_dir);
graph_name = get_commit_graph_filename(odb->path);
commit-graph: don't early exit(1) on e.g. "git status" Make the commit-graph loading code work as a library that returns an error code instead of calling exit(1) when the commit-graph is corrupt. This means that e.g. "status" will now report commit-graph corruption as an "error: [...]" at the top of its output, but then proceed to work normally. This required splitting up the load_commit_graph_one() function so that the code that deals with open()-ing and stat()-ing the graph can now be called independently as open_commit_graph(). This is needed because "commit-graph verify" where the graph doesn't exist isn't an error. See the third paragraph in 283e68c72f ("commit-graph: add 'verify' subcommand", 2018-06-27). There's a bug in that logic where we conflate the intended ENOENT with other errno values (e.g. EACCES), but this change doesn't address that. That'll be addressed in a follow-up change. I'm then splitting most of the logic out of load_commit_graph_one() into load_commit_graph_one_fd_st(), which allows for providing an existing file descriptor and stat information to the loading code. This isn't strictly needed, but it would be redundant and confusing to open() and stat() the file twice for some of the codepaths, this allows for calling open_commit_graph() followed by load_commit_graph_one_fd_st(). The "graph_file" still needs to be passed to that function for the the "graph file %s is too small" error message. This leaves load_commit_graph_one() unused by everything except the internal prepare_commit_graph_one() function, so let's mark it as "static". If someone needs it in the future we can remove the "static" attribute. I could also rewrite its sole remaining user ("prepare_commit_graph_one()") to use load_commit_graph_one_fd_st() instead, but let's leave it at this. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-25 13:08:30 +01:00
open_ok = open_commit_graph(graph_name, &fd, &st);
if (!open_ok && errno != ENOENT)
die_errno(_("Could not open commit-graph '%s'"), graph_name);
FREE_AND_NULL(graph_name);
if (open_ok)
graph = load_commit_graph_one_fd_st(fd, &st);
commit-graph.h: store an odb in 'struct write_commit_graph_context' There are lots of places in 'commit-graph.h' where a function either has (or almost has) a full 'struct object_directory *', accesses '->path', and then throws away the rest of the struct. This can cause headaches when comparing the locations of object directories across alternates (e.g., in the case of deciding if two commit-graph layers can be merged). These paths are normalized with 'normalize_path_copy()' which mitigates some comparison issues, but not all [1]. Replace usage of 'char *object_dir' with 'odb->path' by storing a 'struct object_directory *' in the 'write_commit_graph_context' structure. This is an intermediate step towards getting rid of all path normalization in 'commit-graph.c'. Resolving a user-provided '--object-dir' argument now requires that we compare it to the known alternates for equality. Prior to this patch, an unknown '--object-dir' argument would silently exit with status zero. This can clearly lead to unintended behavior, such as verifying commit-graphs that aren't in a repository's own object store (or one of its alternates), or causing a typo to mask a legitimate commit-graph verification failure. Make this error non-silent by 'die()'-ing when the given '--object-dir' does not match any known alternate object store. [1]: In my testing, for example, I can get one side of the commit-graph code to fill object_dir with "./objects" and the other with just "objects". Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-04 06:51:50 +01:00
else
graph = read_commit_graph_one(the_repository, odb->path);
/* Return failure if open_ok predicted success */
if (!graph)
return !!open_ok;
UNLEAK(graph);
return verify_commit_graph(the_repository, graph, flags);
}
extern int read_replace_refs;
static struct split_commit_graph_opts split_opts;
static int graph_write(int argc, const char **argv)
{
struct string_list *pack_indexes = NULL;
struct string_list *commit_hex = NULL;
commit-graph.h: store an odb in 'struct write_commit_graph_context' There are lots of places in 'commit-graph.h' where a function either has (or almost has) a full 'struct object_directory *', accesses '->path', and then throws away the rest of the struct. This can cause headaches when comparing the locations of object directories across alternates (e.g., in the case of deciding if two commit-graph layers can be merged). These paths are normalized with 'normalize_path_copy()' which mitigates some comparison issues, but not all [1]. Replace usage of 'char *object_dir' with 'odb->path' by storing a 'struct object_directory *' in the 'write_commit_graph_context' structure. This is an intermediate step towards getting rid of all path normalization in 'commit-graph.c'. Resolving a user-provided '--object-dir' argument now requires that we compare it to the known alternates for equality. Prior to this patch, an unknown '--object-dir' argument would silently exit with status zero. This can clearly lead to unintended behavior, such as verifying commit-graphs that aren't in a repository's own object store (or one of its alternates), or causing a typo to mask a legitimate commit-graph verification failure. Make this error non-silent by 'die()'-ing when the given '--object-dir' does not match any known alternate object store. [1]: In my testing, for example, I can get one side of the commit-graph code to fill object_dir with "./objects" and the other with just "objects". Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-04 06:51:50 +01:00
struct object_directory *odb = NULL;
struct string_list lines;
int result = 0;
enum commit_graph_write_flags flags = 0;
static struct option builtin_commit_graph_write_options[] = {
OPT_STRING(0, "object-dir", &opts.obj_dir,
N_("dir"),
N_("The object directory to store the graph")),
OPT_BOOL(0, "reachable", &opts.reachable,
N_("start walk at all refs")),
OPT_BOOL(0, "stdin-packs", &opts.stdin_packs,
N_("scan pack-indexes listed by stdin for commits")),
OPT_BOOL(0, "stdin-commits", &opts.stdin_commits,
N_("start walk at commits listed by stdin")),
OPT_BOOL(0, "append", &opts.append,
N_("include all commits already in the commit-graph file")),
OPT_BOOL(0, "progress", &opts.progress, N_("force progress reporting")),
OPT_BOOL(0, "split", &opts.split,
N_("allow writing an incremental commit-graph file")),
OPT_INTEGER(0, "max-commits", &split_opts.max_commits,
N_("maximum number of commits in a non-base split commit-graph")),
OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple,
N_("maximum ratio between two levels of a split commit-graph")),
OPT_EXPIRY_DATE(0, "expire-time", &split_opts.expire_time,
N_("maximum number of commits in a non-base split commit-graph")),
OPT_END(),
};
opts.progress = isatty(2);
split_opts.size_multiple = 2;
split_opts.max_commits = 0;
split_opts.expire_time = 0;
trace2_cmd_mode("write");
argc = parse_options(argc, argv, NULL,
builtin_commit_graph_write_options,
builtin_commit_graph_write_usage, 0);
if (opts.reachable + opts.stdin_packs + opts.stdin_commits > 1)
die(_("use at most one of --reachable, --stdin-commits, or --stdin-packs"));
if (!opts.obj_dir)
opts.obj_dir = get_object_directory();
if (opts.append)
flags |= COMMIT_GRAPH_WRITE_APPEND;
if (opts.split)
flags |= COMMIT_GRAPH_WRITE_SPLIT;
if (opts.progress)
flags |= COMMIT_GRAPH_WRITE_PROGRESS;
read_replace_refs = 0;
commit-graph.h: store an odb in 'struct write_commit_graph_context' There are lots of places in 'commit-graph.h' where a function either has (or almost has) a full 'struct object_directory *', accesses '->path', and then throws away the rest of the struct. This can cause headaches when comparing the locations of object directories across alternates (e.g., in the case of deciding if two commit-graph layers can be merged). These paths are normalized with 'normalize_path_copy()' which mitigates some comparison issues, but not all [1]. Replace usage of 'char *object_dir' with 'odb->path' by storing a 'struct object_directory *' in the 'write_commit_graph_context' structure. This is an intermediate step towards getting rid of all path normalization in 'commit-graph.c'. Resolving a user-provided '--object-dir' argument now requires that we compare it to the known alternates for equality. Prior to this patch, an unknown '--object-dir' argument would silently exit with status zero. This can clearly lead to unintended behavior, such as verifying commit-graphs that aren't in a repository's own object store (or one of its alternates), or causing a typo to mask a legitimate commit-graph verification failure. Make this error non-silent by 'die()'-ing when the given '--object-dir' does not match any known alternate object store. [1]: In my testing, for example, I can get one side of the commit-graph code to fill object_dir with "./objects" and the other with just "objects". Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-04 06:51:50 +01:00
odb = find_odb(the_repository, opts.obj_dir);
if (opts.reachable) {
commit-graph.h: store an odb in 'struct write_commit_graph_context' There are lots of places in 'commit-graph.h' where a function either has (or almost has) a full 'struct object_directory *', accesses '->path', and then throws away the rest of the struct. This can cause headaches when comparing the locations of object directories across alternates (e.g., in the case of deciding if two commit-graph layers can be merged). These paths are normalized with 'normalize_path_copy()' which mitigates some comparison issues, but not all [1]. Replace usage of 'char *object_dir' with 'odb->path' by storing a 'struct object_directory *' in the 'write_commit_graph_context' structure. This is an intermediate step towards getting rid of all path normalization in 'commit-graph.c'. Resolving a user-provided '--object-dir' argument now requires that we compare it to the known alternates for equality. Prior to this patch, an unknown '--object-dir' argument would silently exit with status zero. This can clearly lead to unintended behavior, such as verifying commit-graphs that aren't in a repository's own object store (or one of its alternates), or causing a typo to mask a legitimate commit-graph verification failure. Make this error non-silent by 'die()'-ing when the given '--object-dir' does not match any known alternate object store. [1]: In my testing, for example, I can get one side of the commit-graph code to fill object_dir with "./objects" and the other with just "objects". Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-04 06:51:50 +01:00
if (write_commit_graph_reachable(odb, flags, &split_opts))
return 1;
return 0;
}
string_list_init(&lines, 0);
if (opts.stdin_packs || opts.stdin_commits) {
struct strbuf buf = STRBUF_INIT;
while (strbuf_getline(&buf, stdin) != EOF)
string_list_append(&lines, strbuf_detach(&buf, NULL));
if (opts.stdin_packs)
pack_indexes = &lines;
commit-graph: error out on invalid commit oids in 'write --stdin-commits' While 'git commit-graph write --stdin-commits' expects commit object ids as input, it accepts and silently skips over any invalid commit object ids, and still exits with success: # nonsense $ echo not-a-commit-oid | git commit-graph write --stdin-commits $ echo $? 0 # sometimes I forgot that refs are not good... $ echo HEAD | git commit-graph write --stdin-commits $ echo $? 0 # valid tree OID, but not a commit OID $ git rev-parse HEAD^{tree} | git commit-graph write --stdin-commits $ echo $? 0 $ ls -l .git/objects/info/commit-graph ls: cannot access '.git/objects/info/commit-graph': No such file or directory Check that all input records are indeed valid commit object ids and return with error otherwise, the same way '--stdin-packs' handles invalid input; see e103f7276f (commit-graph: return with errors during write, 2019-06-12). Note that it should only return with error when encountering an invalid commit object id coming from standard input. However, '--reachable' uses the same code path to process object ids pointed to by all refs, and that includes tag object ids as well, which should still be skipped over. Therefore add a new flag to 'enum commit_graph_write_flags' and a corresponding field to 'struct write_commit_graph_context', so we can differentiate between those two cases. Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com> Acked-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-08-05 10:02:40 +02:00
if (opts.stdin_commits) {
commit_hex = &lines;
commit-graph: error out on invalid commit oids in 'write --stdin-commits' While 'git commit-graph write --stdin-commits' expects commit object ids as input, it accepts and silently skips over any invalid commit object ids, and still exits with success: # nonsense $ echo not-a-commit-oid | git commit-graph write --stdin-commits $ echo $? 0 # sometimes I forgot that refs are not good... $ echo HEAD | git commit-graph write --stdin-commits $ echo $? 0 # valid tree OID, but not a commit OID $ git rev-parse HEAD^{tree} | git commit-graph write --stdin-commits $ echo $? 0 $ ls -l .git/objects/info/commit-graph ls: cannot access '.git/objects/info/commit-graph': No such file or directory Check that all input records are indeed valid commit object ids and return with error otherwise, the same way '--stdin-packs' handles invalid input; see e103f7276f (commit-graph: return with errors during write, 2019-06-12). Note that it should only return with error when encountering an invalid commit object id coming from standard input. However, '--reachable' uses the same code path to process object ids pointed to by all refs, and that includes tag object ids as well, which should still be skipped over. Therefore add a new flag to 'enum commit_graph_write_flags' and a corresponding field to 'struct write_commit_graph_context', so we can differentiate between those two cases. Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com> Acked-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-08-05 10:02:40 +02:00
flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS;
}
UNLEAK(buf);
}
commit-graph.h: store an odb in 'struct write_commit_graph_context' There are lots of places in 'commit-graph.h' where a function either has (or almost has) a full 'struct object_directory *', accesses '->path', and then throws away the rest of the struct. This can cause headaches when comparing the locations of object directories across alternates (e.g., in the case of deciding if two commit-graph layers can be merged). These paths are normalized with 'normalize_path_copy()' which mitigates some comparison issues, but not all [1]. Replace usage of 'char *object_dir' with 'odb->path' by storing a 'struct object_directory *' in the 'write_commit_graph_context' structure. This is an intermediate step towards getting rid of all path normalization in 'commit-graph.c'. Resolving a user-provided '--object-dir' argument now requires that we compare it to the known alternates for equality. Prior to this patch, an unknown '--object-dir' argument would silently exit with status zero. This can clearly lead to unintended behavior, such as verifying commit-graphs that aren't in a repository's own object store (or one of its alternates), or causing a typo to mask a legitimate commit-graph verification failure. Make this error non-silent by 'die()'-ing when the given '--object-dir' does not match any known alternate object store. [1]: In my testing, for example, I can get one side of the commit-graph code to fill object_dir with "./objects" and the other with just "objects". Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-04 06:51:50 +01:00
if (write_commit_graph(odb,
pack_indexes,
commit_hex,
flags,
&split_opts))
result = 1;
UNLEAK(lines);
return result;
}
int cmd_commit_graph(int argc, const char **argv, const char *prefix)
{
static struct option builtin_commit_graph_options[] = {
OPT_STRING(0, "object-dir", &opts.obj_dir,
N_("dir"),
N_("The object directory to store the graph")),
OPT_END(),
};
if (argc == 2 && !strcmp(argv[1], "-h"))
usage_with_options(builtin_commit_graph_usage,
builtin_commit_graph_options);
git_config(git_default_config, NULL);
argc = parse_options(argc, argv, prefix,
builtin_commit_graph_options,
builtin_commit_graph_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
save_commit_buffer = 0;
if (argc > 0) {
if (!strcmp(argv[0], "verify"))
return graph_verify(argc, argv);
if (!strcmp(argv[0], "write"))
return graph_write(argc, argv);
}
usage_with_options(builtin_commit_graph_usage,
builtin_commit_graph_options);
}