From 03156169270509dc2ecba8324497c9088c4272ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Wed, 10 Jul 2019 20:58:55 -0300 Subject: [PATCH 01/10] clone: test for our behavior on odd objects/* content MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tests for what happens when we perform a local clone on a repo containing odd files at .git/object directory, such as symlinks to other dirs, or unknown files. I'm bending over backwards here to avoid a SHA-1 dependency. See [1] for an earlier and simpler version that hardcoded SHA-1s. This behavior has been the same for a *long* time, but hasn't been tested for. There's a good post-hoc argument to be made for copying over unknown things, e.g. I'd like a git version that doesn't know about the commit-graph to copy it under "clone --local" so a newer git version can make use of it. In follow-up commits we'll look at changing some of this behavior, but for now, let's just assert it as-is so we'll notice what we'll change later. 1. https://public-inbox.org/git/20190226002625.13022-5-avarab@gmail.com/ Signed-off-by: Ævar Arnfjörð Bjarmason [matheus.bernardino: improved and split tests in more than one patch] Helped-by: Matheus Tavares Signed-off-by: Matheus Tavares Signed-off-by: Junio C Hamano --- t/t5604-clone-reference.sh | 111 +++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/t/t5604-clone-reference.sh b/t/t5604-clone-reference.sh index 4320082b1b..11250cab40 100755 --- a/t/t5604-clone-reference.sh +++ b/t/t5604-clone-reference.sh @@ -221,4 +221,115 @@ test_expect_success 'clone, dissociate from alternates' ' ( cd C && git fsck ) ' +test_expect_success 'setup repo with garbage in objects/*' ' + git init S && + ( + cd S && + test_commit A && + + cd .git/objects && + >.some-hidden-file && + >some-file && + mkdir .some-hidden-dir && + >.some-hidden-dir/some-file && + >.some-hidden-dir/.some-dot-file && + mkdir some-dir && + >some-dir/some-file && + >some-dir/.some-dot-file + ) +' + +test_expect_success 'clone a repo with garbage in objects/*' ' + for option in --local --no-hardlinks --shared --dissociate + do + git clone $option S S$option || return 1 && + git -C S$option fsck || return 1 + done && + find S-* -name "*some*" | sort >actual && + cat >expected <<-EOF && + S--dissociate/.git/objects/.some-hidden-file + S--dissociate/.git/objects/some-dir + S--dissociate/.git/objects/some-dir/.some-dot-file + S--dissociate/.git/objects/some-dir/some-file + S--dissociate/.git/objects/some-file + S--local/.git/objects/.some-hidden-file + S--local/.git/objects/some-dir + S--local/.git/objects/some-dir/.some-dot-file + S--local/.git/objects/some-dir/some-file + S--local/.git/objects/some-file + S--no-hardlinks/.git/objects/.some-hidden-file + S--no-hardlinks/.git/objects/some-dir + S--no-hardlinks/.git/objects/some-dir/.some-dot-file + S--no-hardlinks/.git/objects/some-dir/some-file + S--no-hardlinks/.git/objects/some-file + EOF + test_cmp expected actual +' + +test_expect_success SYMLINKS 'setup repo with manually symlinked dirs and unknown files at objects/' ' + git init T && + ( + cd T && + git config gc.auto 0 && + test_commit A && + git gc && + test_commit B && + + cd .git/objects && + mv pack packs && + ln -s packs pack && + find ?? -type d >loose-dirs && + last_loose=$(tail -n 1 loose-dirs) && + rm -f loose-dirs && + mv $last_loose a-loose-dir && + ln -s a-loose-dir $last_loose && + find . -type f | sort >../../../T.objects-files.raw && + echo unknown_content >unknown_file + ) && + git -C T fsck && + git -C T rev-list --all --objects >T.objects +' + + +test_expect_success SYMLINKS 'clone repo with symlinked dirs and unknown files at objects/' ' + for option in --local --no-hardlinks --shared --dissociate + do + git clone $option T T$option || return 1 && + git -C T$option fsck || return 1 && + git -C T$option rev-list --all --objects >T$option.objects && + test_cmp T.objects T$option.objects && + ( + cd T$option/.git/objects && + find . -type f | sort >../../../T$option.objects-files.raw + ) + done && + + for raw in $(ls T*.raw) + do + sed -e "s!/../!/Y/!; s![0-9a-f]\{38,\}!Z!" -e "/commit-graph/d" \ + -e "/multi-pack-index/d" <$raw >$raw.de-sha || return 1 + done && + + cat >expected-files <<-EOF && + ./Y/Z + ./Y/Z + ./a-loose-dir/Z + ./Y/Z + ./info/packs + ./pack/pack-Z.idx + ./pack/pack-Z.pack + ./packs/pack-Z.idx + ./packs/pack-Z.pack + ./unknown_file + EOF + + for option in --local --dissociate --no-hardlinks + do + test_cmp expected-files T$option.objects-files.raw.de-sha || return 1 + done && + + echo ./info/alternates >expected-files && + test_cmp expected-files T--shared.objects-files.raw +' + test_done From 36596fd2dfa473cf1069d23776e62cc156e7b5c6 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Date: Wed, 10 Jul 2019 20:58:56 -0300 Subject: [PATCH 02/10] clone: better handle symlinked files at .git/objects/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is currently an odd behaviour when locally cloning a repository with symlinks at .git/objects: using --no-hardlinks all symlinks are dereferenced but without it, Git will try to hardlink the files with the link() function, which has an OS-specific behaviour on symlinks. On OSX and NetBSD, it creates a hardlink to the file pointed by the symlink whilst on GNU/Linux, it creates a hardlink to the symlink itself. On Manjaro GNU/Linux: $ touch a $ ln -s a b $ link b c $ ls -li a b c 155 [...] a 156 [...] b -> a 156 [...] c -> a But on NetBSD: $ ls -li a b c 2609160 [...] a 2609164 [...] b -> a 2609160 [...] c It's not good to have the result of a local clone to be OS-dependent and besides that, the current behaviour on GNU/Linux may result in broken symlinks. So let's standardize this by making the hardlinks always point to dereferenced paths, instead of the symlinks themselves. Also, add tests for symlinked files at .git/objects/. Note: Git won't create symlinks at .git/objects itself, but it's better to handle this case and be friendly with users who manually create them. Signed-off-by: Matheus Tavares Signed-off-by: Ævar Arnfjörð Bjarmason Co-authored-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- builtin/clone.c | 2 +- t/t5604-clone-reference.sh | 27 ++++++++++++++++++++------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index e3231864ca..5956b1c8e6 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -442,7 +442,7 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest, if (unlink(dest->buf) && errno != ENOENT) die_errno(_("failed to unlink '%s'"), dest->buf); if (!option_no_hardlinks) { - if (!link(src->buf, dest->buf)) + if (!link(real_path(src->buf), dest->buf)) continue; if (option_local > 0) die_errno(_("failed to create link '%s'"), dest->buf); diff --git a/t/t5604-clone-reference.sh b/t/t5604-clone-reference.sh index 11250cab40..459ad8a20b 100755 --- a/t/t5604-clone-reference.sh +++ b/t/t5604-clone-reference.sh @@ -266,7 +266,7 @@ test_expect_success 'clone a repo with garbage in objects/*' ' test_cmp expected actual ' -test_expect_success SYMLINKS 'setup repo with manually symlinked dirs and unknown files at objects/' ' +test_expect_success SYMLINKS 'setup repo with manually symlinked or unknown files at objects/' ' git init T && ( cd T && @@ -280,10 +280,19 @@ test_expect_success SYMLINKS 'setup repo with manually symlinked dirs and unknow ln -s packs pack && find ?? -type d >loose-dirs && last_loose=$(tail -n 1 loose-dirs) && - rm -f loose-dirs && mv $last_loose a-loose-dir && ln -s a-loose-dir $last_loose && + first_loose=$(head -n 1 loose-dirs) && + rm -f loose-dirs && + + cd $first_loose && + obj=$(ls *) && + mv $obj ../an-object && + ln -s ../an-object $obj && + + cd ../ && find . -type f | sort >../../../T.objects-files.raw && + find . -type l | sort >../../../T.objects-symlinks.raw && echo unknown_content >unknown_file ) && git -C T fsck && @@ -291,7 +300,7 @@ test_expect_success SYMLINKS 'setup repo with manually symlinked dirs and unknow ' -test_expect_success SYMLINKS 'clone repo with symlinked dirs and unknown files at objects/' ' +test_expect_success SYMLINKS 'clone repo with symlinked or unknown files at objects/' ' for option in --local --no-hardlinks --shared --dissociate do git clone $option T T$option || return 1 && @@ -300,7 +309,8 @@ test_expect_success SYMLINKS 'clone repo with symlinked dirs and unknown files a test_cmp T.objects T$option.objects && ( cd T$option/.git/objects && - find . -type f | sort >../../../T$option.objects-files.raw + find . -type f | sort >../../../T$option.objects-files.raw && + find . -type l | sort >../../../T$option.objects-symlinks.raw ) done && @@ -314,6 +324,7 @@ test_expect_success SYMLINKS 'clone repo with symlinked dirs and unknown files a ./Y/Z ./Y/Z ./a-loose-dir/Z + ./an-object ./Y/Z ./info/packs ./pack/pack-Z.idx @@ -323,13 +334,15 @@ test_expect_success SYMLINKS 'clone repo with symlinked dirs and unknown files a ./unknown_file EOF - for option in --local --dissociate --no-hardlinks + for option in --local --no-hardlinks --dissociate do - test_cmp expected-files T$option.objects-files.raw.de-sha || return 1 + test_cmp expected-files T$option.objects-files.raw.de-sha || return 1 && + test_must_be_empty T$option.objects-symlinks.raw.de-sha || return 1 done && echo ./info/alternates >expected-files && - test_cmp expected-files T--shared.objects-files.raw + test_cmp expected-files T--shared.objects-files.raw && + test_must_be_empty T--shared.objects-symlinks.raw ' test_done From 150791adbf1eb432af0b895d81eef5c2717aa6cc Mon Sep 17 00:00:00 2001 From: Daniel Ferreira Date: Wed, 10 Jul 2019 20:58:57 -0300 Subject: [PATCH 03/10] dir-iterator: add tests for dir-iterator API Create t/helper/test-dir-iterator.c, which prints relevant information about a directory tree iterated over with dir-iterator. Create t/t0066-dir-iterator.sh, which tests that dir-iterator does iterate through a whole directory tree as expected. Signed-off-by: Daniel Ferreira [matheus.bernardino: update to use test-tool and some minor aesthetics] Helped-by: Matheus Tavares Signed-off-by: Matheus Tavares Signed-off-by: Junio C Hamano --- Makefile | 1 + t/helper/test-dir-iterator.c | 33 ++++++++++++++++++++++ t/helper/test-tool.c | 1 + t/helper/test-tool.h | 1 + t/t0066-dir-iterator.sh | 55 ++++++++++++++++++++++++++++++++++++ 5 files changed, 91 insertions(+) create mode 100644 t/helper/test-dir-iterator.c create mode 100755 t/t0066-dir-iterator.sh diff --git a/Makefile b/Makefile index 8a7e235352..61df07f488 100644 --- a/Makefile +++ b/Makefile @@ -710,6 +710,7 @@ TEST_BUILTINS_OBJS += test-config.o TEST_BUILTINS_OBJS += test-ctype.o TEST_BUILTINS_OBJS += test-date.o TEST_BUILTINS_OBJS += test-delta.o +TEST_BUILTINS_OBJS += test-dir-iterator.o TEST_BUILTINS_OBJS += test-drop-caches.o TEST_BUILTINS_OBJS += test-dump-cache-tree.o TEST_BUILTINS_OBJS += test-dump-fsmonitor.o diff --git a/t/helper/test-dir-iterator.c b/t/helper/test-dir-iterator.c new file mode 100644 index 0000000000..84f50bed8c --- /dev/null +++ b/t/helper/test-dir-iterator.c @@ -0,0 +1,33 @@ +#include "test-tool.h" +#include "git-compat-util.h" +#include "strbuf.h" +#include "iterator.h" +#include "dir-iterator.h" + +/* Argument is a directory path to iterate over */ +int cmd__dir_iterator(int argc, const char **argv) +{ + struct strbuf path = STRBUF_INIT; + struct dir_iterator *diter; + + if (argc < 2) + die("BUG: test-dir-iterator needs one argument"); + + strbuf_add(&path, argv[1], strlen(argv[1])); + + diter = dir_iterator_begin(path.buf); + + while (dir_iterator_advance(diter) == ITER_OK) { + if (S_ISDIR(diter->st.st_mode)) + printf("[d] "); + else if (S_ISREG(diter->st.st_mode)) + printf("[f] "); + else + printf("[?] "); + + printf("(%s) [%s] %s\n", diter->relative_path, diter->basename, + diter->path.buf); + } + + return 0; +} diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index 087a8c0cc9..7bc9bb231e 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -19,6 +19,7 @@ static struct test_cmd cmds[] = { { "ctype", cmd__ctype }, { "date", cmd__date }, { "delta", cmd__delta }, + { "dir-iterator", cmd__dir_iterator }, { "drop-caches", cmd__drop_caches }, { "dump-cache-tree", cmd__dump_cache_tree }, { "dump-fsmonitor", cmd__dump_fsmonitor }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index 7e703f3038..ec0ffbd0cb 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -9,6 +9,7 @@ int cmd__config(int argc, const char **argv); int cmd__ctype(int argc, const char **argv); int cmd__date(int argc, const char **argv); int cmd__delta(int argc, const char **argv); +int cmd__dir_iterator(int argc, const char **argv); int cmd__drop_caches(int argc, const char **argv); int cmd__dump_cache_tree(int argc, const char **argv); int cmd__dump_fsmonitor(int argc, const char **argv); diff --git a/t/t0066-dir-iterator.sh b/t/t0066-dir-iterator.sh new file mode 100755 index 0000000000..59bce868f4 --- /dev/null +++ b/t/t0066-dir-iterator.sh @@ -0,0 +1,55 @@ +#!/bin/sh + +test_description='Test the dir-iterator functionality' + +. ./test-lib.sh + +test_expect_success 'setup' ' + mkdir -p dir && + mkdir -p dir/a/b/c/ && + >dir/b && + >dir/c && + mkdir -p dir/d/e/d/ && + >dir/a/b/c/d && + >dir/a/e && + >dir/d/e/d/a && + + mkdir -p dir2/a/b/c/ && + >dir2/a/b/c/d +' + +test_expect_success 'dir-iterator should iterate through all files' ' + cat >expected-iteration-sorted-output <<-EOF && + [d] (a) [a] ./dir/a + [d] (a/b) [b] ./dir/a/b + [d] (a/b/c) [c] ./dir/a/b/c + [d] (d) [d] ./dir/d + [d] (d/e) [e] ./dir/d/e + [d] (d/e/d) [d] ./dir/d/e/d + [f] (a/b/c/d) [d] ./dir/a/b/c/d + [f] (a/e) [e] ./dir/a/e + [f] (b) [b] ./dir/b + [f] (c) [c] ./dir/c + [f] (d/e/d/a) [a] ./dir/d/e/d/a + EOF + + test-tool dir-iterator ./dir >out && + sort out >./actual-iteration-sorted-output && + + test_cmp expected-iteration-sorted-output actual-iteration-sorted-output +' + +test_expect_success 'dir-iterator should list files in the correct order' ' + cat >expected-pre-order-output <<-EOF && + [d] (a) [a] ./dir2/a + [d] (a/b) [b] ./dir2/a/b + [d] (a/b/c) [c] ./dir2/a/b/c + [f] (a/b/c/d) [d] ./dir2/a/b/c/d + EOF + + test-tool dir-iterator ./dir2 >actual-pre-order-output && + + test_cmp expected-pre-order-output actual-pre-order-output +' + +test_done From c9bba372ed9ee0c5ea1a4037c3c723a6c31f5921 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Date: Wed, 10 Jul 2019 20:58:58 -0300 Subject: [PATCH 04/10] dir-iterator: use warning_errno when possible Change warning(..., strerror(errno)) by warning_errno(...). This helps to unify warning display besides simplifying a bit the code. Also, improve warning messages by surrounding paths with quotation marks and using more meaningful statements. Signed-off-by: Matheus Tavares Signed-off-by: Junio C Hamano --- dir-iterator.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/dir-iterator.c b/dir-iterator.c index f2dcd82fde..0c8880868a 100644 --- a/dir-iterator.c +++ b/dir-iterator.c @@ -71,8 +71,8 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator) level->dir = opendir(iter->base.path.buf); if (!level->dir && errno != ENOENT) { - warning("error opening directory %s: %s", - iter->base.path.buf, strerror(errno)); + warning_errno("error opening directory '%s'", + iter->base.path.buf); /* Popping the level is handled below */ } @@ -122,11 +122,11 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator) if (!de) { /* This level is exhausted; pop up a level. */ if (errno) { - warning("error reading directory %s: %s", - iter->base.path.buf, strerror(errno)); + warning_errno("error reading directory '%s'", + iter->base.path.buf); } else if (closedir(level->dir)) - warning("error closing directory %s: %s", - iter->base.path.buf, strerror(errno)); + warning_errno("error closing directory '%s'", + iter->base.path.buf); level->dir = NULL; if (--iter->levels_nr == 0) @@ -140,9 +140,8 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator) strbuf_addstr(&iter->base.path, de->d_name); if (lstat(iter->base.path.buf, &iter->base.st) < 0) { if (errno != ENOENT) - warning("error reading path '%s': %s", - iter->base.path.buf, - strerror(errno)); + warning_errno("failed to stat '%s'", + iter->base.path.buf); continue; } @@ -170,9 +169,11 @@ int dir_iterator_abort(struct dir_iterator *dir_iterator) &iter->levels[iter->levels_nr - 1]; if (level->dir && closedir(level->dir)) { + int saved_errno = errno; strbuf_setlen(&iter->base.path, level->prefix_len); - warning("error closing directory %s: %s", - iter->base.path.buf, strerror(errno)); + errno = saved_errno; + warning_errno("error closing directory '%s'", + iter->base.path.buf); } } From 3012397e0327f5e4dfd1d1183a792268429744ae Mon Sep 17 00:00:00 2001 From: Matheus Tavares Date: Wed, 10 Jul 2019 20:58:59 -0300 Subject: [PATCH 05/10] dir-iterator: refactor state machine model dir_iterator_advance() is a large function with two nested loops. Let's improve its readability factoring out three functions and simplifying its mechanics. The refactored model will no longer depend on level.initialized and level.dir_state to keep track of the iteration state and will perform on a single loop. Also, dir_iterator_begin() currently does not check if the given string represents a valid directory path. Since the refactored model will have to stat() the given path at initialization, let's also check for this kind of error and make dir_iterator_begin() return NULL, on failures, with errno appropriately set. And add tests for this new behavior. Improve documentation at dir-iteration.h and code comments at dir-iterator.c to reflect the changes and eliminate possible ambiguities. Finally, adjust refs/files-backend.c to check for now possible dir_iterator_begin() failures. Original-patch-by: Daniel Ferreira Signed-off-by: Matheus Tavares Signed-off-by: Junio C Hamano --- dir-iterator.c | 230 ++++++++++++++++++----------------- dir-iterator.h | 17 ++- refs/files-backend.c | 17 ++- t/helper/test-dir-iterator.c | 5 + t/t0066-dir-iterator.sh | 13 ++ 5 files changed, 162 insertions(+), 120 deletions(-) diff --git a/dir-iterator.c b/dir-iterator.c index 0c8880868a..594fe4d67b 100644 --- a/dir-iterator.c +++ b/dir-iterator.c @@ -4,8 +4,6 @@ #include "dir-iterator.h" struct dir_iterator_level { - int initialized; - DIR *dir; /* @@ -13,16 +11,6 @@ struct dir_iterator_level { * (including a trailing '/'): */ size_t prefix_len; - - /* - * The last action that has been taken with the current entry - * (needed for directories, which have to be included in the - * iteration and also iterated into): - */ - enum { - DIR_STATE_ITER, - DIR_STATE_RECURSE - } dir_state; }; /* @@ -34,9 +22,11 @@ struct dir_iterator_int { struct dir_iterator base; /* - * The number of levels currently on the stack. This is always - * at least 1, because when it becomes zero the iteration is - * ended and this struct is freed. + * The number of levels currently on the stack. After the first + * call to dir_iterator_begin(), if it succeeds to open the + * first level's dir, this will always be at least 1. Then, + * when it comes to zero the iteration is ended and this + * struct is freed. */ size_t levels_nr; @@ -50,113 +40,118 @@ struct dir_iterator_int { struct dir_iterator_level *levels; }; +/* + * Push a level in the iter stack and initialize it with information from + * the directory pointed by iter->base->path. It is assumed that this + * strbuf points to a valid directory path. Return 0 on success and -1 + * otherwise, leaving the stack unchanged. + */ +static int push_level(struct dir_iterator_int *iter) +{ + struct dir_iterator_level *level; + + ALLOC_GROW(iter->levels, iter->levels_nr + 1, iter->levels_alloc); + level = &iter->levels[iter->levels_nr++]; + + if (!is_dir_sep(iter->base.path.buf[iter->base.path.len - 1])) + strbuf_addch(&iter->base.path, '/'); + level->prefix_len = iter->base.path.len; + + level->dir = opendir(iter->base.path.buf); + if (!level->dir) { + if (errno != ENOENT) { + warning_errno("error opening directory '%s'", + iter->base.path.buf); + } + iter->levels_nr--; + return -1; + } + + return 0; +} + +/* + * Pop the top level on the iter stack, releasing any resources associated + * with it. Return the new value of iter->levels_nr. + */ +static int pop_level(struct dir_iterator_int *iter) +{ + struct dir_iterator_level *level = + &iter->levels[iter->levels_nr - 1]; + + if (level->dir && closedir(level->dir)) + warning_errno("error closing directory '%s'", + iter->base.path.buf); + level->dir = NULL; + + return --iter->levels_nr; +} + +/* + * Populate iter->base with the necessary information on the next iteration + * entry, represented by the given dirent de. Return 0 on success and -1 + * otherwise. + */ +static int prepare_next_entry_data(struct dir_iterator_int *iter, + struct dirent *de) +{ + strbuf_addstr(&iter->base.path, de->d_name); + /* + * We have to reset these because the path strbuf might have + * been realloc()ed at the previous strbuf_addstr(). + */ + iter->base.relative_path = iter->base.path.buf + + iter->levels[0].prefix_len; + iter->base.basename = iter->base.path.buf + + iter->levels[iter->levels_nr - 1].prefix_len; + + if (lstat(iter->base.path.buf, &iter->base.st)) { + if (errno != ENOENT) + warning_errno("failed to stat '%s'", iter->base.path.buf); + return -1; + } + + return 0; +} + int dir_iterator_advance(struct dir_iterator *dir_iterator) { struct dir_iterator_int *iter = (struct dir_iterator_int *)dir_iterator; + if (S_ISDIR(iter->base.st.st_mode)) { + if (push_level(iter) && iter->levels_nr == 0) { + /* Pushing the first level failed */ + return dir_iterator_abort(dir_iterator); + } + } + + /* Loop until we find an entry that we can give back to the caller. */ while (1) { + struct dirent *de; struct dir_iterator_level *level = &iter->levels[iter->levels_nr - 1]; - struct dirent *de; - if (!level->initialized) { - /* - * Note: dir_iterator_begin() ensures that - * path is not the empty string. - */ - if (!is_dir_sep(iter->base.path.buf[iter->base.path.len - 1])) - strbuf_addch(&iter->base.path, '/'); - level->prefix_len = iter->base.path.len; + strbuf_setlen(&iter->base.path, level->prefix_len); + errno = 0; + de = readdir(level->dir); - level->dir = opendir(iter->base.path.buf); - if (!level->dir && errno != ENOENT) { - warning_errno("error opening directory '%s'", + if (!de) { + if (errno) + warning_errno("error reading directory '%s'", iter->base.path.buf); - /* Popping the level is handled below */ - } - - level->initialized = 1; - } else if (S_ISDIR(iter->base.st.st_mode)) { - if (level->dir_state == DIR_STATE_ITER) { - /* - * The directory was just iterated - * over; now prepare to iterate into - * it. - */ - level->dir_state = DIR_STATE_RECURSE; - ALLOC_GROW(iter->levels, iter->levels_nr + 1, - iter->levels_alloc); - level = &iter->levels[iter->levels_nr++]; - level->initialized = 0; - continue; - } else { - /* - * The directory has already been - * iterated over and iterated into; - * we're done with it. - */ - } - } - - if (!level->dir) { - /* - * This level is exhausted (or wasn't opened - * successfully); pop up a level. - */ - if (--iter->levels_nr == 0) + else if (pop_level(iter) == 0) return dir_iterator_abort(dir_iterator); - continue; } - /* - * Loop until we find an entry that we can give back - * to the caller: - */ - while (1) { - strbuf_setlen(&iter->base.path, level->prefix_len); - errno = 0; - de = readdir(level->dir); + if (is_dot_or_dotdot(de->d_name)) + continue; - if (!de) { - /* This level is exhausted; pop up a level. */ - if (errno) { - warning_errno("error reading directory '%s'", - iter->base.path.buf); - } else if (closedir(level->dir)) - warning_errno("error closing directory '%s'", - iter->base.path.buf); + if (prepare_next_entry_data(iter, de)) + continue; - level->dir = NULL; - if (--iter->levels_nr == 0) - return dir_iterator_abort(dir_iterator); - break; - } - - if (is_dot_or_dotdot(de->d_name)) - continue; - - strbuf_addstr(&iter->base.path, de->d_name); - if (lstat(iter->base.path.buf, &iter->base.st) < 0) { - if (errno != ENOENT) - warning_errno("failed to stat '%s'", - iter->base.path.buf); - continue; - } - - /* - * We have to set these each time because - * the path strbuf might have been realloc()ed. - */ - iter->base.relative_path = - iter->base.path.buf + iter->levels[0].prefix_len; - iter->base.basename = - iter->base.path.buf + level->prefix_len; - level->dir_state = DIR_STATE_ITER; - - return ITER_OK; - } + return ITER_OK; } } @@ -187,17 +182,32 @@ struct dir_iterator *dir_iterator_begin(const char *path) { struct dir_iterator_int *iter = xcalloc(1, sizeof(*iter)); struct dir_iterator *dir_iterator = &iter->base; - - if (!path || !*path) - BUG("empty path passed to dir_iterator_begin()"); + int saved_errno; strbuf_init(&iter->base.path, PATH_MAX); strbuf_addstr(&iter->base.path, path); ALLOC_GROW(iter->levels, 10, iter->levels_alloc); + iter->levels_nr = 0; - iter->levels_nr = 1; - iter->levels[0].initialized = 0; + /* + * Note: stat already checks for NULL or empty strings and + * inexistent paths. + */ + if (stat(iter->base.path.buf, &iter->base.st) < 0) { + saved_errno = errno; + goto error_out; + } + + if (!S_ISDIR(iter->base.st.st_mode)) { + saved_errno = ENOTDIR; + goto error_out; + } return dir_iterator; + +error_out: + dir_iterator_abort(dir_iterator); + errno = saved_errno; + return NULL; } diff --git a/dir-iterator.h b/dir-iterator.h index 970793d07a..9b4cb7acd2 100644 --- a/dir-iterator.h +++ b/dir-iterator.h @@ -8,18 +8,22 @@ * * Iterate over a directory tree, recursively, including paths of all * types and hidden paths. Skip "." and ".." entries and don't follow - * symlinks except for the original path. + * symlinks except for the original path. Note that the original path + * is not included in the iteration. * * Every time dir_iterator_advance() is called, update the members of * the dir_iterator structure to reflect the next path in the * iteration. The order that paths are iterated over within a - * directory is undefined, but directory paths are always iterated - * over before the subdirectory contents. + * directory is undefined, directory paths are always given before + * their contents. * * A typical iteration looks like this: * * int ok; - * struct iterator *iter = dir_iterator_begin(path); + * struct dir_iterator *iter = dir_iterator_begin(path); + * + * if (!iter) + * goto error_handler; * * while ((ok = dir_iterator_advance(iter)) == ITER_OK) { * if (want_to_stop_iteration()) { @@ -59,8 +63,9 @@ struct dir_iterator { }; /* - * Start a directory iteration over path. Return a dir_iterator that - * holds the internal state of the iteration. + * Start a directory iteration over path. On success, return a + * dir_iterator that holds the internal state of the iteration. + * In case of failure, return NULL and set errno accordingly. * * The iteration includes all paths under path, not including path * itself and not including "." or ".." entries. diff --git a/refs/files-backend.c b/refs/files-backend.c index 63e55e6773..7ed81046d4 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -2143,13 +2143,22 @@ static struct ref_iterator_vtable files_reflog_iterator_vtable = { static struct ref_iterator *reflog_iterator_begin(struct ref_store *ref_store, const char *gitdir) { - struct files_reflog_iterator *iter = xcalloc(1, sizeof(*iter)); - struct ref_iterator *ref_iterator = &iter->base; + struct dir_iterator *diter; + struct files_reflog_iterator *iter; + struct ref_iterator *ref_iterator; struct strbuf sb = STRBUF_INIT; - base_ref_iterator_init(ref_iterator, &files_reflog_iterator_vtable, 0); strbuf_addf(&sb, "%s/logs", gitdir); - iter->dir_iterator = dir_iterator_begin(sb.buf); + + diter = dir_iterator_begin(sb.buf); + if(!diter) + return empty_ref_iterator_begin(); + + iter = xcalloc(1, sizeof(*iter)); + ref_iterator = &iter->base; + + base_ref_iterator_init(ref_iterator, &files_reflog_iterator_vtable, 0); + iter->dir_iterator = diter; iter->ref_store = ref_store; strbuf_release(&sb); diff --git a/t/helper/test-dir-iterator.c b/t/helper/test-dir-iterator.c index 84f50bed8c..fab1ff6237 100644 --- a/t/helper/test-dir-iterator.c +++ b/t/helper/test-dir-iterator.c @@ -17,6 +17,11 @@ int cmd__dir_iterator(int argc, const char **argv) diter = dir_iterator_begin(path.buf); + if (!diter) { + printf("dir_iterator_begin failure: %d\n", errno); + exit(EXIT_FAILURE); + } + while (dir_iterator_advance(diter) == ITER_OK) { if (S_ISDIR(diter->st.st_mode)) printf("[d] "); diff --git a/t/t0066-dir-iterator.sh b/t/t0066-dir-iterator.sh index 59bce868f4..cc4b19c34c 100755 --- a/t/t0066-dir-iterator.sh +++ b/t/t0066-dir-iterator.sh @@ -52,4 +52,17 @@ test_expect_success 'dir-iterator should list files in the correct order' ' test_cmp expected-pre-order-output actual-pre-order-output ' +test_expect_success 'begin should fail upon inexistent paths' ' + test_must_fail test-tool dir-iterator ./inexistent-path \ + >actual-inexistent-path-output && + echo "dir_iterator_begin failure: 2" >expected-inexistent-path-output && + test_cmp expected-inexistent-path-output actual-inexistent-path-output +' + +test_expect_success 'begin should fail upon non directory paths' ' + test_must_fail test-tool dir-iterator ./dir/b >actual-non-dir-output && + echo "dir_iterator_begin failure: 20" >expected-non-dir-output && + test_cmp expected-non-dir-output actual-non-dir-output +' + test_done From fa1da7d2eef0fedf0e5942028513912bf2bb64ed Mon Sep 17 00:00:00 2001 From: Matheus Tavares Date: Wed, 10 Jul 2019 20:59:00 -0300 Subject: [PATCH 06/10] dir-iterator: add flags parameter to dir_iterator_begin Add the possibility of giving flags to dir_iterator_begin to initialize a dir-iterator with special options. Currently possible flags are: - DIR_ITERATOR_PEDANTIC, which makes dir_iterator_advance abort immediately in the case of an error, instead of keep looking for the next valid entry; - DIR_ITERATOR_FOLLOW_SYMLINKS, which makes the iterator follow symlinks and include linked directories' contents in the iteration. These new flags will be used in a subsequent patch. Also add tests for the flags' usage and adjust refs/files-backend.c to the new dir_iterator_begin signature. Signed-off-by: Matheus Tavares Signed-off-by: Junio C Hamano --- dir-iterator.c | 56 +++++++++++++++++-------- dir-iterator.h | 55 ++++++++++++++++++++----- refs/files-backend.c | 2 +- t/helper/test-dir-iterator.c | 32 ++++++++++++--- t/t0066-dir-iterator.sh | 80 ++++++++++++++++++++++++++++++++++++ 5 files changed, 190 insertions(+), 35 deletions(-) diff --git a/dir-iterator.c b/dir-iterator.c index 594fe4d67b..b17e9f970a 100644 --- a/dir-iterator.c +++ b/dir-iterator.c @@ -38,13 +38,16 @@ struct dir_iterator_int { * that will be included in this iteration. */ struct dir_iterator_level *levels; + + /* Combination of flags for this dir-iterator */ + unsigned int flags; }; /* * Push a level in the iter stack and initialize it with information from * the directory pointed by iter->base->path. It is assumed that this * strbuf points to a valid directory path. Return 0 on success and -1 - * otherwise, leaving the stack unchanged. + * otherwise, setting errno accordingly and leaving the stack unchanged. */ static int push_level(struct dir_iterator_int *iter) { @@ -59,11 +62,13 @@ static int push_level(struct dir_iterator_int *iter) level->dir = opendir(iter->base.path.buf); if (!level->dir) { + int saved_errno = errno; if (errno != ENOENT) { warning_errno("error opening directory '%s'", iter->base.path.buf); } iter->levels_nr--; + errno = saved_errno; return -1; } @@ -90,11 +95,13 @@ static int pop_level(struct dir_iterator_int *iter) /* * Populate iter->base with the necessary information on the next iteration * entry, represented by the given dirent de. Return 0 on success and -1 - * otherwise. + * otherwise, setting errno accordingly. */ static int prepare_next_entry_data(struct dir_iterator_int *iter, struct dirent *de) { + int err, saved_errno; + strbuf_addstr(&iter->base.path, de->d_name); /* * We have to reset these because the path strbuf might have @@ -105,13 +112,17 @@ static int prepare_next_entry_data(struct dir_iterator_int *iter, iter->base.basename = iter->base.path.buf + iter->levels[iter->levels_nr - 1].prefix_len; - if (lstat(iter->base.path.buf, &iter->base.st)) { - if (errno != ENOENT) - warning_errno("failed to stat '%s'", iter->base.path.buf); - return -1; - } + if (iter->flags & DIR_ITERATOR_FOLLOW_SYMLINKS) + err = stat(iter->base.path.buf, &iter->base.st); + else + err = lstat(iter->base.path.buf, &iter->base.st); - return 0; + saved_errno = errno; + if (err && errno != ENOENT) + warning_errno("failed to stat '%s'", iter->base.path.buf); + + errno = saved_errno; + return err; } int dir_iterator_advance(struct dir_iterator *dir_iterator) @@ -119,11 +130,11 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator) struct dir_iterator_int *iter = (struct dir_iterator_int *)dir_iterator; - if (S_ISDIR(iter->base.st.st_mode)) { - if (push_level(iter) && iter->levels_nr == 0) { - /* Pushing the first level failed */ - return dir_iterator_abort(dir_iterator); - } + if (S_ISDIR(iter->base.st.st_mode) && push_level(iter)) { + if (errno != ENOENT && iter->flags & DIR_ITERATOR_PEDANTIC) + goto error_out; + if (iter->levels_nr == 0) + goto error_out; } /* Loop until we find an entry that we can give back to the caller. */ @@ -137,22 +148,32 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator) de = readdir(level->dir); if (!de) { - if (errno) + if (errno) { warning_errno("error reading directory '%s'", iter->base.path.buf); - else if (pop_level(iter) == 0) + if (iter->flags & DIR_ITERATOR_PEDANTIC) + goto error_out; + } else if (pop_level(iter) == 0) { return dir_iterator_abort(dir_iterator); + } continue; } if (is_dot_or_dotdot(de->d_name)) continue; - if (prepare_next_entry_data(iter, de)) + if (prepare_next_entry_data(iter, de)) { + if (errno != ENOENT && iter->flags & DIR_ITERATOR_PEDANTIC) + goto error_out; continue; + } return ITER_OK; } + +error_out: + dir_iterator_abort(dir_iterator); + return ITER_ERROR; } int dir_iterator_abort(struct dir_iterator *dir_iterator) @@ -178,7 +199,7 @@ int dir_iterator_abort(struct dir_iterator *dir_iterator) return ITER_DONE; } -struct dir_iterator *dir_iterator_begin(const char *path) +struct dir_iterator *dir_iterator_begin(const char *path, unsigned int flags) { struct dir_iterator_int *iter = xcalloc(1, sizeof(*iter)); struct dir_iterator *dir_iterator = &iter->base; @@ -189,6 +210,7 @@ struct dir_iterator *dir_iterator_begin(const char *path) ALLOC_GROW(iter->levels, 10, iter->levels_alloc); iter->levels_nr = 0; + iter->flags = flags; /* * Note: stat already checks for NULL or empty strings and diff --git a/dir-iterator.h b/dir-iterator.h index 9b4cb7acd2..08229157c6 100644 --- a/dir-iterator.h +++ b/dir-iterator.h @@ -20,7 +20,8 @@ * A typical iteration looks like this: * * int ok; - * struct dir_iterator *iter = dir_iterator_begin(path); + * unsigned int flags = DIR_ITERATOR_PEDANTIC; + * struct dir_iterator *iter = dir_iterator_begin(path, flags); * * if (!iter) * goto error_handler; @@ -44,6 +45,29 @@ * dir_iterator_advance() again. */ +/* + * Flags for dir_iterator_begin: + * + * - DIR_ITERATOR_PEDANTIC: override dir-iterator's default behavior + * in case of an error at dir_iterator_advance(), which is to keep + * looking for a next valid entry. With this flag, resources are freed + * and ITER_ERROR is returned immediately. In both cases, a meaningful + * warning is emitted. Note: ENOENT errors are always ignored so that + * the API users may remove files during iteration. + * + * - DIR_ITERATOR_FOLLOW_SYMLINKS: make dir-iterator follow symlinks. + * i.e., linked directories' contents will be iterated over and + * iter->base.st will contain information on the referred files, + * not the symlinks themselves, which is the default behavior. Broken + * symlinks are ignored. + * + * Warning: circular symlinks are also followed when + * DIR_ITERATOR_FOLLOW_SYMLINKS is set. The iteration may end up with + * an ELOOP if they happen and DIR_ITERATOR_PEDANTIC is set. + */ +#define DIR_ITERATOR_PEDANTIC (1 << 0) +#define DIR_ITERATOR_FOLLOW_SYMLINKS (1 << 1) + struct dir_iterator { /* The current path: */ struct strbuf path; @@ -58,29 +82,38 @@ struct dir_iterator { /* The current basename: */ const char *basename; - /* The result of calling lstat() on path: */ + /* + * The result of calling lstat() on path; or stat(), if the + * DIR_ITERATOR_FOLLOW_SYMLINKS flag was set at + * dir_iterator's initialization. + */ struct stat st; }; /* - * Start a directory iteration over path. On success, return a - * dir_iterator that holds the internal state of the iteration. - * In case of failure, return NULL and set errno accordingly. + * Start a directory iteration over path with the combination of + * options specified by flags. On success, return a dir_iterator + * that holds the internal state of the iteration. In case of + * failure, return NULL and set errno accordingly. * * The iteration includes all paths under path, not including path * itself and not including "." or ".." entries. * - * path is the starting directory. An internal copy will be made. + * Parameters are: + * - path is the starting directory. An internal copy will be made. + * - flags is a combination of the possible flags to initialize a + * dir-iterator or 0 for default behavior. */ -struct dir_iterator *dir_iterator_begin(const char *path); +struct dir_iterator *dir_iterator_begin(const char *path, unsigned int flags); /* * Advance the iterator to the first or next item and return ITER_OK. * If the iteration is exhausted, free the dir_iterator and any - * resources associated with it and return ITER_DONE. On error, free - * dir_iterator and associated resources and return ITER_ERROR. It is - * a bug to use iterator or call this function again after it has - * returned ITER_DONE or ITER_ERROR. + * resources associated with it and return ITER_DONE. + * + * It is a bug to use iterator or call this function again after it + * has returned ITER_DONE or ITER_ERROR (which may be returned iff + * the DIR_ITERATOR_PEDANTIC flag was set). */ int dir_iterator_advance(struct dir_iterator *iterator); diff --git a/refs/files-backend.c b/refs/files-backend.c index 7ed81046d4..b1f8f53a09 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -2150,7 +2150,7 @@ static struct ref_iterator *reflog_iterator_begin(struct ref_store *ref_store, strbuf_addf(&sb, "%s/logs", gitdir); - diter = dir_iterator_begin(sb.buf); + diter = dir_iterator_begin(sb.buf, 0); if(!diter) return empty_ref_iterator_begin(); diff --git a/t/helper/test-dir-iterator.c b/t/helper/test-dir-iterator.c index fab1ff6237..a5b96cb0dc 100644 --- a/t/helper/test-dir-iterator.c +++ b/t/helper/test-dir-iterator.c @@ -4,29 +4,44 @@ #include "iterator.h" #include "dir-iterator.h" -/* Argument is a directory path to iterate over */ +/* + * usage: + * tool-test dir-iterator [--follow-symlinks] [--pedantic] directory_path + */ int cmd__dir_iterator(int argc, const char **argv) { struct strbuf path = STRBUF_INIT; struct dir_iterator *diter; + unsigned int flags = 0; + int iter_status; - if (argc < 2) - die("BUG: test-dir-iterator needs one argument"); + for (++argv, --argc; *argv && starts_with(*argv, "--"); ++argv, --argc) { + if (strcmp(*argv, "--follow-symlinks") == 0) + flags |= DIR_ITERATOR_FOLLOW_SYMLINKS; + else if (strcmp(*argv, "--pedantic") == 0) + flags |= DIR_ITERATOR_PEDANTIC; + else + die("invalid option '%s'", *argv); + } - strbuf_add(&path, argv[1], strlen(argv[1])); + if (!*argv || argc != 1) + die("dir-iterator needs exactly one non-option argument"); - diter = dir_iterator_begin(path.buf); + strbuf_add(&path, *argv, strlen(*argv)); + diter = dir_iterator_begin(path.buf, flags); if (!diter) { printf("dir_iterator_begin failure: %d\n", errno); exit(EXIT_FAILURE); } - while (dir_iterator_advance(diter) == ITER_OK) { + while ((iter_status = dir_iterator_advance(diter)) == ITER_OK) { if (S_ISDIR(diter->st.st_mode)) printf("[d] "); else if (S_ISREG(diter->st.st_mode)) printf("[f] "); + else if (S_ISLNK(diter->st.st_mode)) + printf("[s] "); else printf("[?] "); @@ -34,5 +49,10 @@ int cmd__dir_iterator(int argc, const char **argv) diter->path.buf); } + if (iter_status != ITER_DONE) { + printf("dir_iterator_advance failure\n"); + return 1; + } + return 0; } diff --git a/t/t0066-dir-iterator.sh b/t/t0066-dir-iterator.sh index cc4b19c34c..9354d3f1ed 100755 --- a/t/t0066-dir-iterator.sh +++ b/t/t0066-dir-iterator.sh @@ -65,4 +65,84 @@ test_expect_success 'begin should fail upon non directory paths' ' test_cmp expected-non-dir-output actual-non-dir-output ' +test_expect_success POSIXPERM,SANITY 'advance should not fail on errors by default' ' + cat >expected-no-permissions-output <<-EOF && + [d] (a) [a] ./dir3/a + EOF + + mkdir -p dir3/a && + >dir3/a/b && + chmod 0 dir3/a && + + test-tool dir-iterator ./dir3 >actual-no-permissions-output && + test_cmp expected-no-permissions-output actual-no-permissions-output && + chmod 755 dir3/a && + rm -rf dir3 +' + +test_expect_success POSIXPERM,SANITY 'advance should fail on errors, w/ pedantic flag' ' + cat >expected-no-permissions-pedantic-output <<-EOF && + [d] (a) [a] ./dir3/a + dir_iterator_advance failure + EOF + + mkdir -p dir3/a && + >dir3/a/b && + chmod 0 dir3/a && + + test_must_fail test-tool dir-iterator --pedantic ./dir3 \ + >actual-no-permissions-pedantic-output && + test_cmp expected-no-permissions-pedantic-output \ + actual-no-permissions-pedantic-output && + chmod 755 dir3/a && + rm -rf dir3 +' + +test_expect_success SYMLINKS 'setup dirs with symlinks' ' + mkdir -p dir4/a && + mkdir -p dir4/b/c && + >dir4/a/d && + ln -s d dir4/a/e && + ln -s ../b dir4/a/f && + + mkdir -p dir5/a/b && + mkdir -p dir5/a/c && + ln -s ../c dir5/a/b/d && + ln -s ../ dir5/a/b/e && + ln -s ../../ dir5/a/b/f +' + +test_expect_success SYMLINKS 'dir-iterator should not follow symlinks by default' ' + cat >expected-no-follow-sorted-output <<-EOF && + [d] (a) [a] ./dir4/a + [d] (b) [b] ./dir4/b + [d] (b/c) [c] ./dir4/b/c + [f] (a/d) [d] ./dir4/a/d + [s] (a/e) [e] ./dir4/a/e + [s] (a/f) [f] ./dir4/a/f + EOF + + test-tool dir-iterator ./dir4 >out && + sort out >actual-no-follow-sorted-output && + + test_cmp expected-no-follow-sorted-output actual-no-follow-sorted-output +' + +test_expect_success SYMLINKS 'dir-iterator should follow symlinks w/ follow flag' ' + cat >expected-follow-sorted-output <<-EOF && + [d] (a) [a] ./dir4/a + [d] (a/f) [f] ./dir4/a/f + [d] (a/f/c) [c] ./dir4/a/f/c + [d] (b) [b] ./dir4/b + [d] (b/c) [c] ./dir4/b/c + [f] (a/d) [d] ./dir4/a/d + [f] (a/e) [e] ./dir4/a/e + EOF + + test-tool dir-iterator --follow-symlinks ./dir4 >out && + sort out >actual-follow-sorted-output && + + test_cmp expected-follow-sorted-output actual-follow-sorted-output +' + test_done From 68c7c59cf202a4d90d96f19076a33d12278c5864 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Date: Wed, 10 Jul 2019 20:59:01 -0300 Subject: [PATCH 07/10] clone: copy hidden paths at local clone MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the copy_or_link_directory function no longer skip hidden directories. This function, used to copy .git/objects, currently skips all hidden directories but not hidden files, which is an odd behaviour. The reason for that could be unintentional: probably the intention was to skip '.' and '..' only but it ended up accidentally skipping all directories starting with '.'. Besides being more natural, the new behaviour is more permissive to the user. Also adjust tests to reflect this behaviour change. Signed-off-by: Matheus Tavares Signed-off-by: Ævar Arnfjörð Bjarmason Co-authored-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- builtin/clone.c | 2 +- t/t5604-clone-reference.sh | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/builtin/clone.c b/builtin/clone.c index 5956b1c8e6..541f0e1be3 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -427,7 +427,7 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest, continue; } if (S_ISDIR(buf.st_mode)) { - if (de->d_name[0] != '.') + if (!is_dot_or_dotdot(de->d_name)) copy_or_link_directory(src, dest, src_repo, src_baselen); continue; diff --git a/t/t5604-clone-reference.sh b/t/t5604-clone-reference.sh index 459ad8a20b..4894237ab8 100755 --- a/t/t5604-clone-reference.sh +++ b/t/t5604-clone-reference.sh @@ -247,16 +247,25 @@ test_expect_success 'clone a repo with garbage in objects/*' ' done && find S-* -name "*some*" | sort >actual && cat >expected <<-EOF && + S--dissociate/.git/objects/.some-hidden-dir + S--dissociate/.git/objects/.some-hidden-dir/.some-dot-file + S--dissociate/.git/objects/.some-hidden-dir/some-file S--dissociate/.git/objects/.some-hidden-file S--dissociate/.git/objects/some-dir S--dissociate/.git/objects/some-dir/.some-dot-file S--dissociate/.git/objects/some-dir/some-file S--dissociate/.git/objects/some-file + S--local/.git/objects/.some-hidden-dir + S--local/.git/objects/.some-hidden-dir/.some-dot-file + S--local/.git/objects/.some-hidden-dir/some-file S--local/.git/objects/.some-hidden-file S--local/.git/objects/some-dir S--local/.git/objects/some-dir/.some-dot-file S--local/.git/objects/some-dir/some-file S--local/.git/objects/some-file + S--no-hardlinks/.git/objects/.some-hidden-dir + S--no-hardlinks/.git/objects/.some-hidden-dir/.some-dot-file + S--no-hardlinks/.git/objects/.some-hidden-dir/some-file S--no-hardlinks/.git/objects/.some-hidden-file S--no-hardlinks/.git/objects/some-dir S--no-hardlinks/.git/objects/some-dir/.some-dot-file From 14954b799f0bac76e500593c95d1f274cf0636e5 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Date: Wed, 10 Jul 2019 20:59:02 -0300 Subject: [PATCH 08/10] clone: extract function from copy_or_link_directory Extract dir creation code snippet from copy_or_link_directory to its own function named mkdir_if_missing. This change will help to remove copy_or_link_directory's explicit recursion, which will be done in a following patch. Also makes the code more readable. Signed-off-by: Matheus Tavares Signed-off-by: Junio C Hamano --- builtin/clone.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 541f0e1be3..297af3f40e 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -391,6 +391,21 @@ static void copy_alternates(struct strbuf *src, const char *src_repo) fclose(in); } +static void mkdir_if_missing(const char *pathname, mode_t mode) +{ + struct stat st; + + if (!mkdir(pathname, mode)) + return; + + if (errno != EEXIST) + die_errno(_("failed to create directory '%s'"), pathname); + else if (stat(pathname, &st)) + die_errno(_("failed to stat '%s'"), pathname); + else if (!S_ISDIR(st.st_mode)) + die(_("%s exists and is not a directory"), pathname); +} + static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest, const char *src_repo, int src_baselen) { @@ -403,14 +418,7 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest, if (!dir) die_errno(_("failed to open '%s'"), src->buf); - if (mkdir(dest->buf, 0777)) { - if (errno != EEXIST) - die_errno(_("failed to create directory '%s'"), dest->buf); - else if (stat(dest->buf, &buf)) - die_errno(_("failed to stat '%s'"), dest->buf); - else if (!S_ISDIR(buf.st_mode)) - die(_("%s exists and is not a directory"), dest->buf); - } + mkdir_if_missing(dest->buf, 0777); strbuf_addch(src, '/'); src_len = src->len; From ff7ccc8c9a6e61a7225c161c743a49ac079f1425 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Date: Wed, 10 Jul 2019 20:59:03 -0300 Subject: [PATCH 09/10] clone: use dir-iterator to avoid explicit dir traversal Replace usage of opendir/readdir/closedir API to traverse directories recursively, at copy_or_link_directory function, by the dir-iterator API. This simplifies the code and avoids recursive calls to copy_or_link_directory. This process also makes copy_or_link_directory call die() in case of an error on readdir or stat inside dir_iterator_advance. Previously it would just print a warning for errors on stat and ignore errors on readdir, which isn't nice because a local git clone could succeed even though the .git/objects copy didn't fully succeed. Signed-off-by: Matheus Tavares Signed-off-by: Junio C Hamano --- builtin/clone.c | 47 +++++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 297af3f40e..a4ce801a67 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -23,6 +23,8 @@ #include "transport.h" #include "strbuf.h" #include "dir.h" +#include "dir-iterator.h" +#include "iterator.h" #include "sigchain.h" #include "branch.h" #include "remote.h" @@ -407,42 +409,39 @@ static void mkdir_if_missing(const char *pathname, mode_t mode) } static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest, - const char *src_repo, int src_baselen) + const char *src_repo) { - struct dirent *de; - struct stat buf; int src_len, dest_len; - DIR *dir; - - dir = opendir(src->buf); - if (!dir) - die_errno(_("failed to open '%s'"), src->buf); + struct dir_iterator *iter; + int iter_status; + unsigned int flags; mkdir_if_missing(dest->buf, 0777); + flags = DIR_ITERATOR_PEDANTIC | DIR_ITERATOR_FOLLOW_SYMLINKS; + iter = dir_iterator_begin(src->buf, flags); + + if (!iter) + die_errno(_("failed to start iterator over '%s'"), src->buf); + strbuf_addch(src, '/'); src_len = src->len; strbuf_addch(dest, '/'); dest_len = dest->len; - while ((de = readdir(dir)) != NULL) { + while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) { strbuf_setlen(src, src_len); - strbuf_addstr(src, de->d_name); + strbuf_addstr(src, iter->relative_path); strbuf_setlen(dest, dest_len); - strbuf_addstr(dest, de->d_name); - if (stat(src->buf, &buf)) { - warning (_("failed to stat %s\n"), src->buf); - continue; - } - if (S_ISDIR(buf.st_mode)) { - if (!is_dot_or_dotdot(de->d_name)) - copy_or_link_directory(src, dest, - src_repo, src_baselen); + strbuf_addstr(dest, iter->relative_path); + + if (S_ISDIR(iter->st.st_mode)) { + mkdir_if_missing(dest->buf, 0777); continue; } /* Files that cannot be copied bit-for-bit... */ - if (!strcmp(src->buf + src_baselen, "/info/alternates")) { + if (!strcmp(iter->relative_path, "info/alternates")) { copy_alternates(src, src_repo); continue; } @@ -459,7 +458,11 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest, if (copy_file_with_time(dest->buf, src->buf, 0666)) die_errno(_("failed to copy file to '%s'"), dest->buf); } - closedir(dir); + + if (iter_status != ITER_DONE) { + strbuf_setlen(src, src_len); + die(_("failed to iterate over '%s'"), src->buf); + } } static void clone_local(const char *src_repo, const char *dest_repo) @@ -477,7 +480,7 @@ static void clone_local(const char *src_repo, const char *dest_repo) get_common_dir(&dest, dest_repo); strbuf_addstr(&src, "/objects"); strbuf_addstr(&dest, "/objects"); - copy_or_link_directory(&src, &dest, src_repo, src.len); + copy_or_link_directory(&src, &dest, src_repo); strbuf_release(&src); strbuf_release(&dest); } From c4d9c506f7a802ed263071c82319098e3fcd0b3e Mon Sep 17 00:00:00 2001 From: Matheus Tavares Date: Wed, 10 Jul 2019 20:59:04 -0300 Subject: [PATCH 10/10] clone: replace strcmp by fspathcmp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the use of strcmp by fspathcmp at copy_or_link_directory, which is more permissive/friendly to case-insensitive file systems. Suggested-by: Nguyễn Thái Ngọc Duy Signed-off-by: Matheus Tavares Signed-off-by: Junio C Hamano --- builtin/clone.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/clone.c b/builtin/clone.c index a4ce801a67..62b6a0a352 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -441,7 +441,7 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest, } /* Files that cannot be copied bit-for-bit... */ - if (!strcmp(iter->relative_path, "info/alternates")) { + if (!fspathcmp(iter->relative_path, "info/alternates")) { copy_alternates(src, src_repo); continue; }