2019-01-24 09:29:12 +01:00
|
|
|
/*
|
|
|
|
* not really _using_ the compat macros, just make sure the_index
|
|
|
|
* declaration matches the definition in this file.
|
|
|
|
*/
|
2022-11-19 14:07:36 +01:00
|
|
|
#define USE_THE_INDEX_VARIABLE
|
2017-06-22 20:43:32 +02:00
|
|
|
#include "cache.h"
|
|
|
|
#include "repository.h"
|
2018-03-23 18:20:55 +01:00
|
|
|
#include "object-store.h"
|
2017-06-22 20:43:42 +02:00
|
|
|
#include "config.h"
|
2018-05-08 21:37:24 +02:00
|
|
|
#include "object.h"
|
2019-01-12 03:13:24 +01:00
|
|
|
#include "lockfile.h"
|
2021-11-18 01:53:22 +01:00
|
|
|
#include "remote.h"
|
2017-06-22 20:43:44 +02:00
|
|
|
#include "submodule-config.h"
|
2021-03-30 15:10:47 +02:00
|
|
|
#include "sparse-index.h"
|
2021-06-17 19:13:23 +02:00
|
|
|
#include "promisor-remote.h"
|
2017-06-22 20:43:32 +02:00
|
|
|
|
|
|
|
/* The main repository */
|
2018-03-03 12:35:54 +01:00
|
|
|
static struct repository the_repo;
|
|
|
|
struct repository *the_repository;
|
2019-01-24 09:29:12 +01:00
|
|
|
struct index_state the_index;
|
2018-03-03 12:35:54 +01:00
|
|
|
|
|
|
|
void initialize_the_repository(void)
|
|
|
|
{
|
|
|
|
the_repository = &the_repo;
|
|
|
|
|
|
|
|
the_repo.index = &the_index;
|
2018-03-23 18:20:55 +01:00
|
|
|
the_repo.objects = raw_object_store_new();
|
2021-11-18 01:53:22 +01:00
|
|
|
the_repo.remote_state = remote_state_new();
|
2018-05-08 21:37:24 +02:00
|
|
|
the_repo.parsed_objects = parsed_object_pool_new();
|
|
|
|
|
treewide: always have a valid "index_state.repo" member
When the "repo" member was added to "the_index" in [1] the
repo_read_index() was made to populate it, but the unpopulated
"the_index" variable didn't get the same treatment.
Let's do that in initialize_the_repository() when we set it up, and
likewise for all of the current callers initialized an empty "struct
index_state".
This simplifies code that needs to deal with "the_index" or a custom
"struct index_state", we no longer need to second-guess this part of
the "index_state" deep in the stack. A recent example of such
second-guessing is the "istate->repo ? istate->repo : the_repository"
code in [2]. We can now simply use "istate->repo".
We're doing this by making use of the INDEX_STATE_INIT() macro (and
corresponding function) added in [3], which now have mandatory "repo"
arguments.
Because we now call index_state_init() in repository.c's
initialize_the_repository() we don't need to handle the case where we
have a "repo->index" whose "repo" member doesn't match the "repo"
we're setting up, i.e. the "Complete the double-reference" code in
repo_read_index() being altered here. That logic was originally added
in [1], and was working around the lack of what we now have in
initialize_the_repository().
For "fsmonitor-settings.c" we can remove the initialization of a NULL
"r" argument to "the_repository". This was added back in [4], and was
needed at the time for callers that would pass us the "r" from an
"istate->repo". Before this change such a change to
"fsmonitor-settings.c" would segfault all over the test suite (e.g. in
t0002-gitfile.sh).
This change has wider eventual implications for
"fsmonitor-settings.c". The reason the other lazy loading behavior in
it is required (starting with "if (!r->settings.fsmonitor) ..." is
because of the previously passed "r" being "NULL".
I have other local changes on top of this which move its configuration
reading to "prepare_repo_settings()" in "repo-settings.c", as we could
now start to rely on it being called for our "r". But let's leave all
of that for now, and narrowly remove this particular part of the
lazy-loading.
1. 1fd9ae517c4 (repository: add repo reference to index_state,
2021-01-23)
2. ee1f0c242ef (read-cache: add index.skipHash config option,
2023-01-06)
3. 2f6b1eb794e (cache API: add a "INDEX_STATE_INIT" macro/function,
add release_index(), 2023-01-12)
4. 1e0ea5c4316 (fsmonitor: config settings are repository-specific,
2022-03-25)
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Acked-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-01-17 14:57:00 +01:00
|
|
|
index_state_init(&the_index, the_repository);
|
|
|
|
|
2018-03-03 12:35:54 +01:00
|
|
|
repo_set_hash_algo(&the_repo, GIT_HASH_SHA1);
|
|
|
|
}
|
2017-06-22 20:43:32 +02:00
|
|
|
|
2018-03-03 12:35:55 +01:00
|
|
|
static void expand_base_dir(char **out, const char *in,
|
|
|
|
const char *base_dir, const char *def_in)
|
|
|
|
{
|
|
|
|
free(*out);
|
|
|
|
if (in)
|
|
|
|
*out = xstrdup(in);
|
|
|
|
else
|
|
|
|
*out = xstrfmt("%s/%s", base_dir, def_in);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void repo_set_commondir(struct repository *repo,
|
|
|
|
const char *commondir)
|
2017-06-22 20:43:32 +02:00
|
|
|
{
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
|
2017-09-05 15:04:57 +02:00
|
|
|
free(repo->commondir);
|
2018-03-03 12:35:55 +01:00
|
|
|
|
|
|
|
if (commondir) {
|
|
|
|
repo->different_commondir = 1;
|
|
|
|
repo->commondir = xstrdup(commondir);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
repo->different_commondir = get_common_dir_noenv(&sb, repo->gitdir);
|
2017-06-22 20:43:32 +02:00
|
|
|
repo->commondir = strbuf_detach(&sb, NULL);
|
|
|
|
}
|
|
|
|
|
2018-03-03 12:35:55 +01:00
|
|
|
void repo_set_gitdir(struct repository *repo,
|
|
|
|
const char *root,
|
|
|
|
const struct set_gitdir_args *o)
|
2017-06-22 20:43:32 +02:00
|
|
|
{
|
2018-03-03 12:35:55 +01:00
|
|
|
const char *gitfile = read_gitfile(root);
|
|
|
|
/*
|
|
|
|
* repo->gitdir is saved because the caller could pass "root"
|
|
|
|
* that also points to repo->gitdir. We want to keep it alive
|
|
|
|
* until after xstrdup(root). Then we can free it.
|
|
|
|
*/
|
2017-09-05 15:05:01 +02:00
|
|
|
char *old_gitdir = repo->gitdir;
|
2017-06-22 20:43:32 +02:00
|
|
|
|
2018-03-03 12:35:55 +01:00
|
|
|
repo->gitdir = xstrdup(gitfile ? gitfile : root);
|
2017-09-05 15:05:01 +02:00
|
|
|
free(old_gitdir);
|
2018-03-03 12:35:55 +01:00
|
|
|
|
|
|
|
repo_set_commondir(repo, o->commondir);
|
sha1-file: use an object_directory for the main object dir
Our handling of alternate object directories is needlessly different
from the main object directory. As a result, many places in the code
basically look like this:
do_something(r->objects->objdir);
for (odb = r->objects->alt_odb_list; odb; odb = odb->next)
do_something(odb->path);
That gets annoying when do_something() is non-trivial, and we've
resorted to gross hacks like creating fake alternates (see
find_short_object_filename()).
Instead, let's give each raw_object_store a unified list of
object_directory structs. The first will be the main store, and
everything after is an alternate. Very few callers even care about the
distinction, and can just loop over the whole list (and those who care
can just treat the first element differently).
A few observations:
- we don't need r->objects->objectdir anymore, and can just
mechanically convert that to r->objects->odb->path
- object_directory's path field needs to become a real pointer rather
than a FLEX_ARRAY, in order to fill it with expand_base_dir()
- we'll call prepare_alt_odb() earlier in many functions (i.e.,
outside of the loop). This may result in us calling it even when our
function would be satisfied looking only at the main odb.
But this doesn't matter in practice. It's not a very expensive
operation in the first place, and in the majority of cases it will
be a noop. We call it already (and cache its results) in
prepare_packed_git(), and we'll generally check packs before loose
objects. So essentially every program is going to call it
immediately once per program.
Arguably we should just prepare_alt_odb() immediately upon setting
up the repository's object directory, which would save us sprinkling
calls throughout the code base (and forgetting to do so has been a
source of subtle bugs in the past). But I've stopped short of that
here, since there are already a lot of other moving parts in this
patch.
- Most call sites just get shorter. The check_and_freshen() functions
are an exception, because they have entry points to handle local and
nonlocal directories separately.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-11-12 15:50:39 +01:00
|
|
|
|
|
|
|
if (!repo->objects->odb) {
|
2021-03-13 17:17:22 +01:00
|
|
|
CALLOC_ARRAY(repo->objects->odb, 1);
|
sha1-file: use an object_directory for the main object dir
Our handling of alternate object directories is needlessly different
from the main object directory. As a result, many places in the code
basically look like this:
do_something(r->objects->objdir);
for (odb = r->objects->alt_odb_list; odb; odb = odb->next)
do_something(odb->path);
That gets annoying when do_something() is non-trivial, and we've
resorted to gross hacks like creating fake alternates (see
find_short_object_filename()).
Instead, let's give each raw_object_store a unified list of
object_directory structs. The first will be the main store, and
everything after is an alternate. Very few callers even care about the
distinction, and can just loop over the whole list (and those who care
can just treat the first element differently).
A few observations:
- we don't need r->objects->objectdir anymore, and can just
mechanically convert that to r->objects->odb->path
- object_directory's path field needs to become a real pointer rather
than a FLEX_ARRAY, in order to fill it with expand_base_dir()
- we'll call prepare_alt_odb() earlier in many functions (i.e.,
outside of the loop). This may result in us calling it even when our
function would be satisfied looking only at the main odb.
But this doesn't matter in practice. It's not a very expensive
operation in the first place, and in the majority of cases it will
be a noop. We call it already (and cache its results) in
prepare_packed_git(), and we'll generally check packs before loose
objects. So essentially every program is going to call it
immediately once per program.
Arguably we should just prepare_alt_odb() immediately upon setting
up the repository's object directory, which would save us sprinkling
calls throughout the code base (and forgetting to do so has been a
source of subtle bugs in the past). But I've stopped short of that
here, since there are already a lot of other moving parts in this
patch.
- Most call sites just get shorter. The check_and_freshen() functions
are an exception, because they have entry points to handle local and
nonlocal directories separately.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-11-12 15:50:39 +01:00
|
|
|
repo->objects->odb_tail = &repo->objects->odb->next;
|
|
|
|
}
|
|
|
|
expand_base_dir(&repo->objects->odb->path, o->object_dir,
|
2018-03-03 12:35:55 +01:00
|
|
|
repo->commondir, "objects");
|
sha1-file: use an object_directory for the main object dir
Our handling of alternate object directories is needlessly different
from the main object directory. As a result, many places in the code
basically look like this:
do_something(r->objects->objdir);
for (odb = r->objects->alt_odb_list; odb; odb = odb->next)
do_something(odb->path);
That gets annoying when do_something() is non-trivial, and we've
resorted to gross hacks like creating fake alternates (see
find_short_object_filename()).
Instead, let's give each raw_object_store a unified list of
object_directory structs. The first will be the main store, and
everything after is an alternate. Very few callers even care about the
distinction, and can just loop over the whole list (and those who care
can just treat the first element differently).
A few observations:
- we don't need r->objects->objectdir anymore, and can just
mechanically convert that to r->objects->odb->path
- object_directory's path field needs to become a real pointer rather
than a FLEX_ARRAY, in order to fill it with expand_base_dir()
- we'll call prepare_alt_odb() earlier in many functions (i.e.,
outside of the loop). This may result in us calling it even when our
function would be satisfied looking only at the main odb.
But this doesn't matter in practice. It's not a very expensive
operation in the first place, and in the majority of cases it will
be a noop. We call it already (and cache its results) in
prepare_packed_git(), and we'll generally check packs before loose
objects. So essentially every program is going to call it
immediately once per program.
Arguably we should just prepare_alt_odb() immediately upon setting
up the repository's object directory, which would save us sprinkling
calls throughout the code base (and forgetting to do so has been a
source of subtle bugs in the past). But I've stopped short of that
here, since there are already a lot of other moving parts in this
patch.
- Most call sites just get shorter. The check_and_freshen() functions
are an exception, because they have entry points to handle local and
nonlocal directories separately.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-11-12 15:50:39 +01:00
|
|
|
|
2021-12-06 23:05:05 +01:00
|
|
|
repo->objects->odb->disable_ref_updates = o->disable_ref_updates;
|
|
|
|
|
2018-03-23 18:20:55 +01:00
|
|
|
free(repo->objects->alternate_db);
|
|
|
|
repo->objects->alternate_db = xstrdup_or_null(o->alternate_db);
|
2018-03-03 12:35:55 +01:00
|
|
|
expand_base_dir(&repo->graft_file, o->graft_file,
|
|
|
|
repo->commondir, "info/grafts");
|
|
|
|
expand_base_dir(&repo->index_file, o->index_file,
|
|
|
|
repo->gitdir, "index");
|
2017-06-22 20:43:32 +02:00
|
|
|
}
|
|
|
|
|
2017-11-12 22:28:53 +01:00
|
|
|
void repo_set_hash_algo(struct repository *repo, int hash_algo)
|
|
|
|
{
|
|
|
|
repo->hash_algo = &hash_algos[hash_algo];
|
|
|
|
}
|
|
|
|
|
2017-06-22 20:43:32 +02:00
|
|
|
/*
|
|
|
|
* Attempt to resolve and set the provided 'gitdir' for repository 'repo'.
|
|
|
|
* Return 0 upon success and a non-zero value upon failure.
|
|
|
|
*/
|
|
|
|
static int repo_init_gitdir(struct repository *repo, const char *gitdir)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
int error = 0;
|
|
|
|
char *abspath = NULL;
|
|
|
|
const char *resolved_gitdir;
|
2018-03-03 12:35:55 +01:00
|
|
|
struct set_gitdir_args args = { NULL };
|
2017-06-22 20:43:32 +02:00
|
|
|
|
|
|
|
abspath = real_pathdup(gitdir, 0);
|
|
|
|
if (!abspath) {
|
|
|
|
ret = -1;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* 'gitdir' must reference the gitdir directly */
|
|
|
|
resolved_gitdir = resolve_gitdir_gently(abspath, &error);
|
|
|
|
if (!resolved_gitdir) {
|
|
|
|
ret = -1;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2018-03-03 12:35:55 +01:00
|
|
|
repo_set_gitdir(repo, resolved_gitdir, &args);
|
2017-06-22 20:43:32 +02:00
|
|
|
|
|
|
|
out:
|
|
|
|
free(abspath);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
void repo_set_worktree(struct repository *repo, const char *path)
|
|
|
|
{
|
|
|
|
repo->worktree = real_pathdup(path, 1);
|
2019-02-22 23:25:01 +01:00
|
|
|
|
|
|
|
trace2_def_repo(repo);
|
2017-06-22 20:43:32 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static int read_and_verify_repository_format(struct repository_format *format,
|
|
|
|
const char *commondir)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
|
|
|
|
strbuf_addf(&sb, "%s/config", commondir);
|
|
|
|
read_repository_format(format, sb.buf);
|
|
|
|
strbuf_reset(&sb);
|
|
|
|
|
|
|
|
if (verify_repository_format(format, &sb) < 0) {
|
|
|
|
warning("%s", sb.buf);
|
|
|
|
ret = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_release(&sb);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize 'repo' based on the provided 'gitdir'.
|
|
|
|
* Return 0 upon success and a non-zero value upon failure.
|
|
|
|
*/
|
2018-03-29 00:35:31 +02:00
|
|
|
int repo_init(struct repository *repo,
|
|
|
|
const char *gitdir,
|
|
|
|
const char *worktree)
|
2017-06-22 20:43:32 +02:00
|
|
|
{
|
setup: fix memory leaks with `struct repository_format`
After we set up a `struct repository_format`, it owns various pieces of
allocated memory. We then either use those members, because we decide we
want to use the "candidate" repository format, or we discard the
candidate / scratch space. In the first case, we transfer ownership of
the memory to a few global variables. In the latter case, we just
silently drop the struct and end up leaking memory.
Introduce an initialization macro `REPOSITORY_FORMAT_INIT` and a
function `clear_repository_format()`, to be used on each side of
`read_repository_format()`. To have a clear and simple memory ownership,
let all users of `struct repository_format` duplicate the strings that
they take from it, rather than stealing the pointers.
Call `clear_...()` at the start of `read_...()` instead of just zeroing
the struct, since we sometimes enter the function multiple times. Thus,
it is important to initialize the struct before calling `read_...()`, so
document that. It's also important because we might not even call
`read_...()` before we call `clear_...()`, see, e.g., builtin/init-db.c.
Teach `read_...()` to clear the struct on error, so that it is reset to
a safe state, and document this. (In `setup_git_directory_gently()`, we
look at `repo_fmt.hash_algo` even if `repo_fmt.version` is -1, which we
weren't actually supposed to do per the API. After this commit, that's
ok.)
We inherit the existing code's combining "error" and "no version found".
Both are signalled through `version == -1` and now both cause us to
clear any partial configuration we have picked up. For "extensions.*",
that's fine, since they require a positive version number. For
"core.bare" and "core.worktree", we're already verifying that we have a
non-negative version number before using them.
Signed-off-by: Martin Ågren <martin.agren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-28 21:36:28 +01:00
|
|
|
struct repository_format format = REPOSITORY_FORMAT_INIT;
|
2017-06-22 20:43:32 +02:00
|
|
|
memset(repo, 0, sizeof(*repo));
|
|
|
|
|
2018-03-23 18:20:55 +01:00
|
|
|
repo->objects = raw_object_store_new();
|
2018-05-08 21:37:24 +02:00
|
|
|
repo->parsed_objects = parsed_object_pool_new();
|
2021-11-18 01:53:22 +01:00
|
|
|
repo->remote_state = remote_state_new();
|
2018-03-23 18:20:55 +01:00
|
|
|
|
2017-06-22 20:43:32 +02:00
|
|
|
if (repo_init_gitdir(repo, gitdir))
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
if (read_and_verify_repository_format(&format, repo->commondir))
|
|
|
|
goto error;
|
|
|
|
|
2017-11-12 22:28:53 +01:00
|
|
|
repo_set_hash_algo(repo, format.hash_algo);
|
|
|
|
|
2021-06-17 19:13:22 +02:00
|
|
|
/* take ownership of format.partial_clone */
|
|
|
|
repo->repository_format_partial_clone = format.partial_clone;
|
|
|
|
format.partial_clone = NULL;
|
|
|
|
|
2017-06-22 20:43:32 +02:00
|
|
|
if (worktree)
|
|
|
|
repo_set_worktree(repo, worktree);
|
|
|
|
|
setup: fix memory leaks with `struct repository_format`
After we set up a `struct repository_format`, it owns various pieces of
allocated memory. We then either use those members, because we decide we
want to use the "candidate" repository format, or we discard the
candidate / scratch space. In the first case, we transfer ownership of
the memory to a few global variables. In the latter case, we just
silently drop the struct and end up leaking memory.
Introduce an initialization macro `REPOSITORY_FORMAT_INIT` and a
function `clear_repository_format()`, to be used on each side of
`read_repository_format()`. To have a clear and simple memory ownership,
let all users of `struct repository_format` duplicate the strings that
they take from it, rather than stealing the pointers.
Call `clear_...()` at the start of `read_...()` instead of just zeroing
the struct, since we sometimes enter the function multiple times. Thus,
it is important to initialize the struct before calling `read_...()`, so
document that. It's also important because we might not even call
`read_...()` before we call `clear_...()`, see, e.g., builtin/init-db.c.
Teach `read_...()` to clear the struct on error, so that it is reset to
a safe state, and document this. (In `setup_git_directory_gently()`, we
look at `repo_fmt.hash_algo` even if `repo_fmt.version` is -1, which we
weren't actually supposed to do per the API. After this commit, that's
ok.)
We inherit the existing code's combining "error" and "no version found".
Both are signalled through `version == -1` and now both cause us to
clear any partial configuration we have picked up. For "extensions.*",
that's fine, since they require a positive version number. For
"core.bare" and "core.worktree", we're already verifying that we have a
non-negative version number before using them.
Signed-off-by: Martin Ågren <martin.agren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-28 21:36:28 +01:00
|
|
|
clear_repository_format(&format);
|
2017-06-22 20:43:32 +02:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
error:
|
|
|
|
repo_clear(repo);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2018-11-29 01:27:53 +01:00
|
|
|
int repo_submodule_init(struct repository *subrepo,
|
2017-06-22 20:43:47 +02:00
|
|
|
struct repository *superproject,
|
2021-09-09 20:47:28 +02:00
|
|
|
const char *path,
|
|
|
|
const struct object_id *treeish_name)
|
2017-06-22 20:43:47 +02:00
|
|
|
{
|
|
|
|
struct strbuf gitdir = STRBUF_INIT;
|
|
|
|
struct strbuf worktree = STRBUF_INIT;
|
|
|
|
int ret = 0;
|
|
|
|
|
2021-09-09 20:47:28 +02:00
|
|
|
strbuf_repo_worktree_path(&gitdir, superproject, "%s/.git", path);
|
|
|
|
strbuf_repo_worktree_path(&worktree, superproject, "%s", path);
|
2017-06-22 20:43:47 +02:00
|
|
|
|
2018-11-29 01:27:53 +01:00
|
|
|
if (repo_init(subrepo, gitdir.buf, worktree.buf)) {
|
2017-06-22 20:43:47 +02:00
|
|
|
/*
|
2019-11-05 18:07:23 +01:00
|
|
|
* If initialization fails then it may be due to the submodule
|
2017-06-22 20:43:47 +02:00
|
|
|
* not being populated in the superproject's worktree. Instead
|
2019-11-05 18:07:23 +01:00
|
|
|
* we can try to initialize the submodule by finding it's gitdir
|
2017-06-22 20:43:47 +02:00
|
|
|
* in the superproject's 'modules' directory. In this case the
|
|
|
|
* submodule would not have a worktree.
|
|
|
|
*/
|
2021-09-09 20:47:28 +02:00
|
|
|
const struct submodule *sub =
|
|
|
|
submodule_from_path(superproject, treeish_name, path);
|
|
|
|
if (!sub) {
|
|
|
|
ret = -1;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2017-06-22 20:43:47 +02:00
|
|
|
strbuf_reset(&gitdir);
|
2021-09-15 20:59:19 +02:00
|
|
|
submodule_name_to_gitdir(&gitdir, superproject, sub->name);
|
2017-06-22 20:43:47 +02:00
|
|
|
|
2018-11-29 01:27:53 +01:00
|
|
|
if (repo_init(subrepo, gitdir.buf, NULL)) {
|
2017-06-22 20:43:47 +02:00
|
|
|
ret = -1;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-29 01:27:53 +01:00
|
|
|
subrepo->submodule_prefix = xstrfmt("%s%s/",
|
|
|
|
superproject->submodule_prefix ?
|
|
|
|
superproject->submodule_prefix :
|
2021-09-09 20:47:28 +02:00
|
|
|
"", path);
|
2017-06-22 20:43:47 +02:00
|
|
|
|
|
|
|
out:
|
|
|
|
strbuf_release(&gitdir);
|
|
|
|
strbuf_release(&worktree);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2022-03-04 19:32:17 +01:00
|
|
|
static void repo_clear_path_cache(struct repo_path_cache *cache)
|
|
|
|
{
|
|
|
|
FREE_AND_NULL(cache->squash_msg);
|
|
|
|
FREE_AND_NULL(cache->squash_msg);
|
|
|
|
FREE_AND_NULL(cache->merge_msg);
|
|
|
|
FREE_AND_NULL(cache->merge_rr);
|
|
|
|
FREE_AND_NULL(cache->merge_mode);
|
|
|
|
FREE_AND_NULL(cache->merge_head);
|
|
|
|
FREE_AND_NULL(cache->merge_autostash);
|
|
|
|
FREE_AND_NULL(cache->auto_merge);
|
|
|
|
FREE_AND_NULL(cache->fetch_head);
|
|
|
|
FREE_AND_NULL(cache->shallow);
|
|
|
|
}
|
|
|
|
|
2017-06-22 20:43:32 +02:00
|
|
|
void repo_clear(struct repository *repo)
|
|
|
|
{
|
2017-10-01 16:44:46 +02:00
|
|
|
FREE_AND_NULL(repo->gitdir);
|
|
|
|
FREE_AND_NULL(repo->commondir);
|
|
|
|
FREE_AND_NULL(repo->graft_file);
|
|
|
|
FREE_AND_NULL(repo->index_file);
|
|
|
|
FREE_AND_NULL(repo->worktree);
|
|
|
|
FREE_AND_NULL(repo->submodule_prefix);
|
2017-06-22 20:43:42 +02:00
|
|
|
|
2018-03-23 18:20:55 +01:00
|
|
|
raw_object_store_clear(repo->objects);
|
|
|
|
FREE_AND_NULL(repo->objects);
|
|
|
|
|
2018-05-08 21:37:24 +02:00
|
|
|
parsed_object_pool_clear(repo->parsed_objects);
|
|
|
|
FREE_AND_NULL(repo->parsed_objects);
|
|
|
|
|
2017-06-22 20:43:42 +02:00
|
|
|
if (repo->config) {
|
|
|
|
git_configset_clear(repo->config);
|
2017-10-01 16:44:46 +02:00
|
|
|
FREE_AND_NULL(repo->config);
|
2017-06-22 20:43:42 +02:00
|
|
|
}
|
2017-06-22 20:43:43 +02:00
|
|
|
|
2017-06-22 20:43:44 +02:00
|
|
|
if (repo->submodule_cache) {
|
|
|
|
submodule_cache_free(repo->submodule_cache);
|
|
|
|
repo->submodule_cache = NULL;
|
|
|
|
}
|
|
|
|
|
2017-06-22 20:43:43 +02:00
|
|
|
if (repo->index) {
|
|
|
|
discard_index(repo->index);
|
2018-05-10 08:13:10 +02:00
|
|
|
if (repo->index != &the_index)
|
|
|
|
FREE_AND_NULL(repo->index);
|
2017-06-22 20:43:43 +02:00
|
|
|
}
|
2021-06-17 19:13:23 +02:00
|
|
|
|
|
|
|
if (repo->promisor_remote_config) {
|
|
|
|
promisor_remote_clear(repo->promisor_remote_config);
|
|
|
|
FREE_AND_NULL(repo->promisor_remote_config);
|
|
|
|
}
|
2021-11-18 01:53:22 +01:00
|
|
|
|
|
|
|
if (repo->remote_state) {
|
|
|
|
remote_state_clear(repo->remote_state);
|
|
|
|
FREE_AND_NULL(repo->remote_state);
|
|
|
|
}
|
2022-03-04 19:32:17 +01:00
|
|
|
|
|
|
|
repo_clear_path_cache(&repo->cached_paths);
|
2017-06-22 20:43:43 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
int repo_read_index(struct repository *repo)
|
|
|
|
{
|
2021-03-30 15:10:47 +02:00
|
|
|
int res;
|
|
|
|
|
treewide: always have a valid "index_state.repo" member
When the "repo" member was added to "the_index" in [1] the
repo_read_index() was made to populate it, but the unpopulated
"the_index" variable didn't get the same treatment.
Let's do that in initialize_the_repository() when we set it up, and
likewise for all of the current callers initialized an empty "struct
index_state".
This simplifies code that needs to deal with "the_index" or a custom
"struct index_state", we no longer need to second-guess this part of
the "index_state" deep in the stack. A recent example of such
second-guessing is the "istate->repo ? istate->repo : the_repository"
code in [2]. We can now simply use "istate->repo".
We're doing this by making use of the INDEX_STATE_INIT() macro (and
corresponding function) added in [3], which now have mandatory "repo"
arguments.
Because we now call index_state_init() in repository.c's
initialize_the_repository() we don't need to handle the case where we
have a "repo->index" whose "repo" member doesn't match the "repo"
we're setting up, i.e. the "Complete the double-reference" code in
repo_read_index() being altered here. That logic was originally added
in [1], and was working around the lack of what we now have in
initialize_the_repository().
For "fsmonitor-settings.c" we can remove the initialization of a NULL
"r" argument to "the_repository". This was added back in [4], and was
needed at the time for callers that would pass us the "r" from an
"istate->repo". Before this change such a change to
"fsmonitor-settings.c" would segfault all over the test suite (e.g. in
t0002-gitfile.sh).
This change has wider eventual implications for
"fsmonitor-settings.c". The reason the other lazy loading behavior in
it is required (starting with "if (!r->settings.fsmonitor) ..." is
because of the previously passed "r" being "NULL".
I have other local changes on top of this which move its configuration
reading to "prepare_repo_settings()" in "repo-settings.c", as we could
now start to rely on it being called for our "r". But let's leave all
of that for now, and narrowly remove this particular part of the
lazy-loading.
1. 1fd9ae517c4 (repository: add repo reference to index_state,
2021-01-23)
2. ee1f0c242ef (read-cache: add index.skipHash config option,
2023-01-06)
3. 2f6b1eb794e (cache API: add a "INDEX_STATE_INIT" macro/function,
add release_index(), 2023-01-12)
4. 1e0ea5c4316 (fsmonitor: config settings are repository-specific,
2022-03-25)
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Acked-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-01-17 14:57:00 +01:00
|
|
|
/* Complete the double-reference */
|
2023-01-12 13:55:27 +01:00
|
|
|
if (!repo->index) {
|
|
|
|
ALLOC_ARRAY(repo->index, 1);
|
treewide: always have a valid "index_state.repo" member
When the "repo" member was added to "the_index" in [1] the
repo_read_index() was made to populate it, but the unpopulated
"the_index" variable didn't get the same treatment.
Let's do that in initialize_the_repository() when we set it up, and
likewise for all of the current callers initialized an empty "struct
index_state".
This simplifies code that needs to deal with "the_index" or a custom
"struct index_state", we no longer need to second-guess this part of
the "index_state" deep in the stack. A recent example of such
second-guessing is the "istate->repo ? istate->repo : the_repository"
code in [2]. We can now simply use "istate->repo".
We're doing this by making use of the INDEX_STATE_INIT() macro (and
corresponding function) added in [3], which now have mandatory "repo"
arguments.
Because we now call index_state_init() in repository.c's
initialize_the_repository() we don't need to handle the case where we
have a "repo->index" whose "repo" member doesn't match the "repo"
we're setting up, i.e. the "Complete the double-reference" code in
repo_read_index() being altered here. That logic was originally added
in [1], and was working around the lack of what we now have in
initialize_the_repository().
For "fsmonitor-settings.c" we can remove the initialization of a NULL
"r" argument to "the_repository". This was added back in [4], and was
needed at the time for callers that would pass us the "r" from an
"istate->repo". Before this change such a change to
"fsmonitor-settings.c" would segfault all over the test suite (e.g. in
t0002-gitfile.sh).
This change has wider eventual implications for
"fsmonitor-settings.c". The reason the other lazy loading behavior in
it is required (starting with "if (!r->settings.fsmonitor) ..." is
because of the previously passed "r" being "NULL".
I have other local changes on top of this which move its configuration
reading to "prepare_repo_settings()" in "repo-settings.c", as we could
now start to rely on it being called for our "r". But let's leave all
of that for now, and narrowly remove this particular part of the
lazy-loading.
1. 1fd9ae517c4 (repository: add repo reference to index_state,
2021-01-23)
2. ee1f0c242ef (read-cache: add index.skipHash config option,
2023-01-06)
3. 2f6b1eb794e (cache API: add a "INDEX_STATE_INIT" macro/function,
add release_index(), 2023-01-12)
4. 1e0ea5c4316 (fsmonitor: config settings are repository-specific,
2022-03-25)
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Acked-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-01-17 14:57:00 +01:00
|
|
|
index_state_init(repo->index, repo);
|
|
|
|
} else if (repo->index->repo != repo) {
|
2021-01-23 20:58:15 +01:00
|
|
|
BUG("repo's index should point back at itself");
|
treewide: always have a valid "index_state.repo" member
When the "repo" member was added to "the_index" in [1] the
repo_read_index() was made to populate it, but the unpopulated
"the_index" variable didn't get the same treatment.
Let's do that in initialize_the_repository() when we set it up, and
likewise for all of the current callers initialized an empty "struct
index_state".
This simplifies code that needs to deal with "the_index" or a custom
"struct index_state", we no longer need to second-guess this part of
the "index_state" deep in the stack. A recent example of such
second-guessing is the "istate->repo ? istate->repo : the_repository"
code in [2]. We can now simply use "istate->repo".
We're doing this by making use of the INDEX_STATE_INIT() macro (and
corresponding function) added in [3], which now have mandatory "repo"
arguments.
Because we now call index_state_init() in repository.c's
initialize_the_repository() we don't need to handle the case where we
have a "repo->index" whose "repo" member doesn't match the "repo"
we're setting up, i.e. the "Complete the double-reference" code in
repo_read_index() being altered here. That logic was originally added
in [1], and was working around the lack of what we now have in
initialize_the_repository().
For "fsmonitor-settings.c" we can remove the initialization of a NULL
"r" argument to "the_repository". This was added back in [4], and was
needed at the time for callers that would pass us the "r" from an
"istate->repo". Before this change such a change to
"fsmonitor-settings.c" would segfault all over the test suite (e.g. in
t0002-gitfile.sh).
This change has wider eventual implications for
"fsmonitor-settings.c". The reason the other lazy loading behavior in
it is required (starting with "if (!r->settings.fsmonitor) ..." is
because of the previously passed "r" being "NULL".
I have other local changes on top of this which move its configuration
reading to "prepare_repo_settings()" in "repo-settings.c", as we could
now start to rely on it being called for our "r". But let's leave all
of that for now, and narrowly remove this particular part of the
lazy-loading.
1. 1fd9ae517c4 (repository: add repo reference to index_state,
2021-01-23)
2. ee1f0c242ef (read-cache: add index.skipHash config option,
2023-01-06)
3. 2f6b1eb794e (cache API: add a "INDEX_STATE_INIT" macro/function,
add release_index(), 2023-01-12)
4. 1e0ea5c4316 (fsmonitor: config settings are repository-specific,
2022-03-25)
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Acked-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-01-17 14:57:00 +01:00
|
|
|
}
|
2021-01-23 20:58:15 +01:00
|
|
|
|
2021-03-30 15:10:47 +02:00
|
|
|
res = read_index_from(repo->index, repo->index_file, repo->gitdir);
|
|
|
|
|
|
|
|
prepare_repo_settings(repo);
|
|
|
|
if (repo->settings.command_requires_full_index)
|
|
|
|
ensure_full_index(repo->index);
|
|
|
|
|
repo_read_index: clear SKIP_WORKTREE bit from files present in worktree
The fix is short (~30 lines), but the description is not. Sorry.
There is a set of problems caused by files in what I'll refer to as the
"present-despite-SKIP_WORKTREE" state. This commit aims to not just fix
these problems, but remove the entire class as a possibility -- for
those using sparse checkouts. But first, we need to understand the
problems this class presents. A quick outline:
* Problems
* User facing issues
* Problem space complexity
* Maintenance and code correctness challenges
* SKIP_WORKTREE expectations in Git
* Suggested solution
* Pros/Cons of suggested solution
* Notes on testcase modifications
=== User facing issues ===
There are various ways for users to get files to be present in the
working copy despite having the SKIP_WORKTREE bit set for that file in
the index. This may come from:
* various git commands not really supporting the SKIP_WORKTREE bit[1,2]
* users grabbing files from elsewhere and writing them to the worktree
(perhaps even cached in their editor)
* users attempting to "abort" a sparse-checkout operation with a
not-so-early Ctrl+C (updating $GIT_DIR/info/sparse-checkout and the
working tree is not atomic)[3].
Once users have present-despite-SKIP_WORKTREE files, any modifications
users make to these files will be ignored, possibly to users' confusion.
Further:
* these files will degrade performance for the sparse-index case due
to requiring the index to be expanded (see commit 55dfcf9591
("sparse-checkout: clear tracked sparse dirs", 2021-09-08) for why
we try to delete entire directories outside the sparse cone).
* these files will not be updated by by standard commands
(switch/checkout/pull/merge/rebase will leave them alone unless
conflicts happen -- and even then, the conflicted file may be
written somewhere else to avoid overwriting the SKIP_WORKTREE file
that is present and in the way)
* there is nothing in Git that users can use to discover such
files (status, diff, grep, etc. all ignore it)
* there is no reasonable mechanism to "recover" from such a condition
(neither `git sparse-checkout reapply` nor `git reset --hard` will
correct it).
So, not only are users modifications ignored, but the files get
progressively more stale over time. At some point in the future, they
may change their sparseness specification or disable sparse-checkouts.
At that time, all present-despite-SKIP_WORKTREE files will show up as
having lots of modifications because they represent a version from a
different branch or commit. These might include user-made local changes
from days before, but the only way to tell is to have users look through
them all closely.
If these users come to others for help, there will be no logs that
explain the issue; it's just a mysterious list of changes. Users might
adamantly claim (correctly, as it turns out) that they didn't modify
these files, while others presume they did.
[1] https://lore.kernel.org/git/xmqqbmb1a7ga.fsf@gitster-ct.c.googlers.com/
[2] https://lore.kernel.org/git/CABPp-BH9tju7WVm=QZDOvaMDdZbpNXrVWQdN-jmfN8wC6YVhmw@mail.gmail.com/
[3] https://lore.kernel.org/git/CABPp-BFnFpzwGC11TLoLs8YK5yiisA5D5-fFjXnJsbESVDwZsA@mail.gmail.com/
=== Problem space complexity ===
SKIP_WORKTREE has been part of Git for over a decade. Duy did lots of
work on it initially, and several others have since come along and put
lots of work into it. Stolee spent most of 2021 on the sparse-index,
with lots of bugfixes along the way including to non-sparse-index cases
as we are still trying to get sparse checkouts to behave reasonably.
Basically every codepath throughout the treat needs to be aware of an
additional type of file: tracked-but-not-present. The extra type
results in lots of extra testcases and lots of extra code everywhere.
But, the sad thing is that we actually have more than one extra type.
We have tracked, tracked-but-not-present (SKIP_WORKTREE), and
tracked-but-promised-to-not-be-present-but-is-present-anyway
(present-despite-SKIP_WORKTREE). Two types is a monumental amount of
effort to support, and adding a third feels a bit like insanity[4].
[4] Some examples of which can be seen at
https://lore.kernel.org/git/CABPp-BGJ_Nvi5TmgriD9Bh6eNXE2EDq2f8e8QKXAeYG3BxZafA@mail.gmail.com/
=== Maintenance and code correctness challenges ===
Matheus' patches to grep stalled for nearly a year, in part because of
complications of how to handle sparse-checkouts appropriately in all
cases[5][6] (with trying to sanely figure out how to sanely handle
present-despite-SKIP_WORKTREE files being one of the complications).
His rm/add follow-ups also took months because of those kinds of
issues[7]. The corner cases with things like submodules and
SKIP_WORKTREE with the addition of present-despite-SKIP_WORKTREE start
becoming really complex[8].
We've had to add ugly logic to merge-ort to attempt to handle
present-despite-SKIP_WORKTREE files[9], and basically just been forced
to give up in merge-recursive knowing full well that we'll sometimes
silently discard user modifications. Despite stash essentially being a
merge, it needed extra code (beyond what was in merge-ort and
merge-recursive) to manually tweak SKIP_WORKTREE bits in order to avoid
a few different bugs that'd result in an early abort with a partial
stash application[10].
[5] See https://lore.kernel.org/git/5f3f7ac77039d41d1692ceae4b0c5df3bb45b74a.1612901326.git.matheus.bernardino@usp.br/#t
and the dates on the thread; also Matheus and I had several
conversations off-list trying to resolve the issues over that time
[6] ...it finally kind of got unstuck after
https://lore.kernel.org/git/CABPp-BGJ_Nvi5TmgriD9Bh6eNXE2EDq2f8e8QKXAeYG3BxZafA@mail.gmail.com/
[7] See for example
https://lore.kernel.org/git/CABPp-BHwNoVnooqDFPAsZxBT9aR5Dwk5D9sDRCvYSb8akxAJgA@mail.gmail.com/#t
and quotes like "The core functionality of sparse-checkout has always
been only partially implemented", a statement I still believe is true
today.
[8] https://lore.kernel.org/git/pull.809.git.git.1592356884310.gitgitgadget@gmail.com/
[9] See commit 66b209b86a ("merge-ort: implement CE_SKIP_WORKTREE
handling with conflicted entries", 2021-03-20)
[10] See commit ba359fd507 ("stash: fix stash application in
sparse-checkouts", 2020-12-01)
=== SKIP_WORKTREE expectations in Git ===
A couple quotes:
* From [11] (before the "sparse-checkout" command existed):
If it needs too many special cases, hacks, and conditionals, then it
is not worth the complexity---if it is easier to write a correct code
by allowing Git to populate working tree files, it is perfectly fine
to do so.
In a sense, the sparse checkout "feature" itself is a hack by itself,
and that is why I think this part should be "best effort" as well.
* From the git-sparse-checkout manual (still present today):
THIS COMMAND IS EXPERIMENTAL. ITS BEHAVIOR, AND THE BEHAVIOR OF OTHER
COMMANDS IN THE PRESENCE OF SPARSE-CHECKOUTS, WILL LIKELY CHANGE IN
THE FUTURE.
[11] https://lore.kernel.org/git/xmqqbmb1a7ga.fsf@gitster-ct.c.googlers.com/
=== Suggested solution ===
SKIP_WORKTREE was written to allow sparse-checkouts, in particular, as
the name of the option implies, to allow the file to NOT be in the
worktree but consider it to be unchanged rather than deleted.
The suggests a simple solution: present-despite-SKIP_WORKTREE files
should not exist, for those using sparse-checkouts.
Enforce this at index loading time by checking if core.sparseCheckout is
true; if so, check files in the index with the SKIP_WORKTREE bit set to
verify that they are absent from the working tree. If they are present,
unset the bit (in memory, though any commands that write to the index
will record the update).
Users can, of course, can get the SKIP_WORKTREE bit back such as by
running `git sparse-checkout reapply` (if they have ensured the file is
unmodified and doesn't match the specified sparsity patterns).
=== Pros/Cons of suggested solution ===
Pros:
* Solves the user visible problems reported above, which I've been
complaining about for nearly a year but couldn't find a solution to.
* Helps prevent slow performance degradation with a sparse-index.
* Much easier behavior in sparse-checkouts for users to reason about
* Very simple, ~30 lines of code.
* Significantly simplifies some ugly testcases, and obviates the need
to test an entire class of potential issues.
* Reduces code complexity, reasoning, and maintenance. Avoids
disagreements about weird corner cases[12].
* It has been reported that some users might be (ab)using
SKIP_WORKTREE as a let-me-modify-but-keep-the-file-in-the-worktree
mechanism[13, and a few other similar references]. These users know
of multiple caveats and shortcomings in doing so; perhaps not
surprising given the "SKIP_WORKTREE expecations" section above.
However, these users use `git update-index --skip-worktree`, and not
`git sparse-checkout` or core.sparseCheckout=true. As such, these
users would be unaffected by this change and can continue abusing
the system as before.
[12] https://lore.kernel.org/git/CABPp-BH9tju7WVm=QZDOvaMDdZbpNXrVWQdN-jmfN8wC6YVhmw@mail.gmail.com/
[13] https://stackoverflow.com/questions/13630849/git-difference-between-assume-unchanged-and-skip-worktree
Cons:
* When core.sparseCheckout is enabled, this adds a performance cost to
reading the index. I'll defer discussion of this cost to a subsequent
patch, since I have some optimizations to add.
=== Notes on testcase modifications ===
The good:
* t1011: Compare to two cases above it ('read-tree will not throw away
dirty changes, non-sparse'); since the file is present, it should
match the non-sparse case now
* t1092: sparse-index & sparse-checkout now match full-worktree
behavior in more cases! Yaay for consistency!
* t6428, t7012: look at how much simpler the tests become! Merge and
stash can just fail early telling the user there's a file in the
way, instead of not noticing until it's about to write a file and
then have to implement sudden crash avoidance. Hurray for sanity!
* t7817: sparse behavior better matches full tree behavior. Hurray
for sanity!
The confusing:
* t3705: These changes were ONLY needed on Windows, but they don't
hurt other platforms. Let's discuss each individually:
* core.sparseCheckout should be false by default. Nothing in this
testcase toggles that until many, many tests later. However,
early tests (#5 in particular) were testing `update-index
--skip-worktree` behavior in a non-sparse-checkout, but the
Windows tests in CI were behaving as if core.sparseCheckout=true
had been specified somewhere. I do not have access to a Windows
machine. But I just manually did what should have been a no-op
and turned the config off. And it fixed the test.
* I have no idea why the leftover .gitattributes file from this
test was causing failures for test #18 on Windows, but only with
these changes of mine. Test #18 was checking for empty stderr,
and specifically wanted to know that some error completely
unrelated to file endings did not appear. The leftover
.gitattributes file thus caused some spurious stderr unrelated to
the thing being checked. Since other tests did not intend to
test normalization, just proactively remove the .gitattributes
file. I'm certain this is cleaner and better, I'm just unsure
why/how this didn't trigger problems before.
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-14 16:59:41 +01:00
|
|
|
/*
|
|
|
|
* If sparse checkouts are in use, check whether paths with the
|
|
|
|
* SKIP_WORKTREE attribute are missing from the worktree; if not,
|
|
|
|
* clear that attribute for that path.
|
|
|
|
*/
|
|
|
|
clear_skip_worktree_from_present_files(repo->index);
|
|
|
|
|
2021-03-30 15:10:47 +02:00
|
|
|
return res;
|
2017-06-22 20:43:32 +02:00
|
|
|
}
|
2019-01-12 03:13:24 +01:00
|
|
|
|
|
|
|
int repo_hold_locked_index(struct repository *repo,
|
|
|
|
struct lock_file *lf,
|
|
|
|
int flags)
|
|
|
|
{
|
|
|
|
if (!repo->index_file)
|
|
|
|
BUG("the repo hasn't been setup");
|
|
|
|
return hold_lock_file_for_update(lf, repo->index_file, flags);
|
|
|
|
}
|