Merge branch 'jk/maint-avoid-streaming-filtered-contents'
* jk/maint-avoid-streaming-filtered-contents: do not stream large files to pack when filters are in use teach dry-run convert_to_git not to require a src buffer teach convert_to_git a "dry run" mode
This commit is contained in:
commit
31e3d834b3
29
convert.c
29
convert.c
@ -196,9 +196,17 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
|
|||||||
char *dst;
|
char *dst;
|
||||||
|
|
||||||
if (crlf_action == CRLF_BINARY ||
|
if (crlf_action == CRLF_BINARY ||
|
||||||
(crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE) || !len)
|
(crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE) ||
|
||||||
|
(src && !len))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we are doing a dry-run and have no source buffer, there is
|
||||||
|
* nothing to analyze; we must assume we would convert.
|
||||||
|
*/
|
||||||
|
if (!buf && !src)
|
||||||
|
return 1;
|
||||||
|
|
||||||
gather_stats(src, len, &stats);
|
gather_stats(src, len, &stats);
|
||||||
|
|
||||||
if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) {
|
if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) {
|
||||||
@ -232,6 +240,13 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
|
|||||||
if (!stats.cr)
|
if (!stats.cr)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* At this point all of our source analysis is done, and we are sure we
|
||||||
|
* would convert. If we are in dry-run mode, we can give an answer.
|
||||||
|
*/
|
||||||
|
if (!buf)
|
||||||
|
return 1;
|
||||||
|
|
||||||
/* only grow if not in place */
|
/* only grow if not in place */
|
||||||
if (strbuf_avail(buf) + buf->len < len)
|
if (strbuf_avail(buf) + buf->len < len)
|
||||||
strbuf_grow(buf, len - buf->len);
|
strbuf_grow(buf, len - buf->len);
|
||||||
@ -396,6 +411,9 @@ static int apply_filter(const char *path, const char *src, size_t len,
|
|||||||
if (!cmd)
|
if (!cmd)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
if (!dst)
|
||||||
|
return 1;
|
||||||
|
|
||||||
memset(&async, 0, sizeof(async));
|
memset(&async, 0, sizeof(async));
|
||||||
async.proc = filter_buffer;
|
async.proc = filter_buffer;
|
||||||
async.data = ¶ms;
|
async.data = ¶ms;
|
||||||
@ -527,9 +545,12 @@ static int ident_to_git(const char *path, const char *src, size_t len,
|
|||||||
{
|
{
|
||||||
char *dst, *dollar;
|
char *dst, *dollar;
|
||||||
|
|
||||||
if (!ident || !count_ident(src, len))
|
if (!ident || (src && !count_ident(src, len)))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
if (!buf)
|
||||||
|
return 1;
|
||||||
|
|
||||||
/* only grow if not in place */
|
/* only grow if not in place */
|
||||||
if (strbuf_avail(buf) + buf->len < len)
|
if (strbuf_avail(buf) + buf->len < len)
|
||||||
strbuf_grow(buf, len - buf->len);
|
strbuf_grow(buf, len - buf->len);
|
||||||
@ -759,13 +780,13 @@ int convert_to_git(const char *path, const char *src, size_t len,
|
|||||||
filter = ca.drv->clean;
|
filter = ca.drv->clean;
|
||||||
|
|
||||||
ret |= apply_filter(path, src, len, dst, filter);
|
ret |= apply_filter(path, src, len, dst, filter);
|
||||||
if (ret) {
|
if (ret && dst) {
|
||||||
src = dst->buf;
|
src = dst->buf;
|
||||||
len = dst->len;
|
len = dst->len;
|
||||||
}
|
}
|
||||||
ca.crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
|
ca.crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
|
||||||
ret |= crlf_to_git(path, src, len, dst, ca.crlf_action, checksafe);
|
ret |= crlf_to_git(path, src, len, dst, ca.crlf_action, checksafe);
|
||||||
if (ret) {
|
if (ret && dst) {
|
||||||
src = dst->buf;
|
src = dst->buf;
|
||||||
len = dst->len;
|
len = dst->len;
|
||||||
}
|
}
|
||||||
|
@ -40,6 +40,11 @@ extern int convert_to_working_tree(const char *path, const char *src,
|
|||||||
size_t len, struct strbuf *dst);
|
size_t len, struct strbuf *dst);
|
||||||
extern int renormalize_buffer(const char *path, const char *src, size_t len,
|
extern int renormalize_buffer(const char *path, const char *src, size_t len,
|
||||||
struct strbuf *dst);
|
struct strbuf *dst);
|
||||||
|
static inline int would_convert_to_git(const char *path, const char *src,
|
||||||
|
size_t len, enum safe_crlf checksafe)
|
||||||
|
{
|
||||||
|
return convert_to_git(path, src, len, NULL, checksafe);
|
||||||
|
}
|
||||||
|
|
||||||
/*****************************************************************
|
/*****************************************************************
|
||||||
*
|
*
|
||||||
|
14
sha1_file.c
14
sha1_file.c
@ -2700,10 +2700,13 @@ static int index_core(unsigned char *sha1, int fd, size_t size,
|
|||||||
* This also bypasses the usual "convert-to-git" dance, and that is on
|
* This also bypasses the usual "convert-to-git" dance, and that is on
|
||||||
* purpose. We could write a streaming version of the converting
|
* purpose. We could write a streaming version of the converting
|
||||||
* functions and insert that before feeding the data to fast-import
|
* functions and insert that before feeding the data to fast-import
|
||||||
* (or equivalent in-core API described above), but the primary
|
* (or equivalent in-core API described above). However, that is
|
||||||
* motivation for trying to stream from the working tree file and to
|
* somewhat complicated, as we do not know the size of the filter
|
||||||
* avoid mmaping it in core is to deal with large binary blobs, and
|
* result, which we need to know beforehand when writing a git object.
|
||||||
* by definition they do _not_ want to get any conversion.
|
* Since the primary motivation for trying to stream from the working
|
||||||
|
* tree file and to avoid mmaping it in core is to deal with large
|
||||||
|
* binary blobs, they generally do not want to get any conversion, and
|
||||||
|
* callers should avoid this code path when filters are requested.
|
||||||
*/
|
*/
|
||||||
static int index_stream(unsigned char *sha1, int fd, size_t size,
|
static int index_stream(unsigned char *sha1, int fd, size_t size,
|
||||||
enum object_type type, const char *path,
|
enum object_type type, const char *path,
|
||||||
@ -2720,7 +2723,8 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st,
|
|||||||
|
|
||||||
if (!S_ISREG(st->st_mode))
|
if (!S_ISREG(st->st_mode))
|
||||||
ret = index_pipe(sha1, fd, type, path, flags);
|
ret = index_pipe(sha1, fd, type, path, flags);
|
||||||
else if (size <= big_file_threshold || type != OBJ_BLOB)
|
else if (size <= big_file_threshold || type != OBJ_BLOB ||
|
||||||
|
(path && would_convert_to_git(path, NULL, 0, 0)))
|
||||||
ret = index_core(sha1, fd, size, type, path, flags);
|
ret = index_core(sha1, fd, size, type, path, flags);
|
||||||
else
|
else
|
||||||
ret = index_stream(sha1, fd, size, type, path, flags);
|
ret = index_stream(sha1, fd, size, type, path, flags);
|
||||||
|
86
t/t1051-large-conversion.sh
Executable file
86
t/t1051-large-conversion.sh
Executable file
@ -0,0 +1,86 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
test_description='test conversion filters on large files'
|
||||||
|
. ./test-lib.sh
|
||||||
|
|
||||||
|
set_attr() {
|
||||||
|
test_when_finished 'rm -f .gitattributes' &&
|
||||||
|
echo "* $*" >.gitattributes
|
||||||
|
}
|
||||||
|
|
||||||
|
check_input() {
|
||||||
|
git read-tree --empty &&
|
||||||
|
git add small large &&
|
||||||
|
git cat-file blob :small >small.index &&
|
||||||
|
git cat-file blob :large | head -n 1 >large.index &&
|
||||||
|
test_cmp small.index large.index
|
||||||
|
}
|
||||||
|
|
||||||
|
check_output() {
|
||||||
|
rm -f small large &&
|
||||||
|
git checkout small large &&
|
||||||
|
head -n 1 large >large.head &&
|
||||||
|
test_cmp small large.head
|
||||||
|
}
|
||||||
|
|
||||||
|
test_expect_success 'setup input tests' '
|
||||||
|
printf "\$Id: foo\$\\r\\n" >small &&
|
||||||
|
cat small small >large &&
|
||||||
|
git config core.bigfilethreshold 20 &&
|
||||||
|
git config filter.test.clean "sed s/.*/CLEAN/"
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'autocrlf=true converts on input' '
|
||||||
|
test_config core.autocrlf true &&
|
||||||
|
check_input
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'eol=crlf converts on input' '
|
||||||
|
set_attr eol=crlf &&
|
||||||
|
check_input
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'ident converts on input' '
|
||||||
|
set_attr ident &&
|
||||||
|
check_input
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'user-defined filters convert on input' '
|
||||||
|
set_attr filter=test &&
|
||||||
|
check_input
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'setup output tests' '
|
||||||
|
echo "\$Id\$" >small &&
|
||||||
|
cat small small >large &&
|
||||||
|
git add small large &&
|
||||||
|
git config core.bigfilethreshold 7 &&
|
||||||
|
git config filter.test.smudge "sed s/.*/SMUDGE/"
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'autocrlf=true converts on output' '
|
||||||
|
test_config core.autocrlf true &&
|
||||||
|
check_output
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'eol=crlf converts on output' '
|
||||||
|
set_attr eol=crlf &&
|
||||||
|
check_output
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'user-defined filters convert on output' '
|
||||||
|
set_attr filter=test &&
|
||||||
|
check_output
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'ident converts on output' '
|
||||||
|
set_attr ident &&
|
||||||
|
rm -f small large &&
|
||||||
|
git checkout small large &&
|
||||||
|
sed -n "s/Id: .*/Id: SHA/p" <small >small.clean &&
|
||||||
|
head -n 1 large >large.head &&
|
||||||
|
sed -n "s/Id: .*/Id: SHA/p" <large.head >large.clean &&
|
||||||
|
test_cmp small.clean large.clean
|
||||||
|
'
|
||||||
|
|
||||||
|
test_done
|
Loading…
Reference in New Issue
Block a user