Merge branch 'nd/stream-more'
Use API to read blob data in smaller chunks in more places to reduce the memory footprint. By Nguyễn Thái Ngọc Duy (6) and Junio C Hamano (1) * nd/stream-more: update-server-info: respect core.bigfilethreshold fsck: use streaming API for writing lost-found blobs show: use streaming API for showing blobs parse_object: avoid putting whole blob in core cat-file: use streaming API to print blobs Add more large blob test cases streaming: make streaming-write-entry to be more reusable
This commit is contained in:
commit
47de6b0425
@ -11,6 +11,7 @@
|
||||
#include "parse-options.h"
|
||||
#include "diff.h"
|
||||
#include "userdiff.h"
|
||||
#include "streaming.h"
|
||||
|
||||
#define BATCH 1
|
||||
#define BATCH_CHECK 2
|
||||
@ -127,6 +128,8 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
|
||||
return cmd_ls_tree(2, ls_args, NULL);
|
||||
}
|
||||
|
||||
if (type == OBJ_BLOB)
|
||||
return stream_blob_to_fd(1, sha1, NULL, 0);
|
||||
buf = read_sha1_file(sha1, &type, &size);
|
||||
if (!buf)
|
||||
die("Cannot read object %s", obj_name);
|
||||
@ -149,6 +152,28 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
|
||||
break;
|
||||
|
||||
case 0:
|
||||
if (type_from_string(exp_type) == OBJ_BLOB) {
|
||||
unsigned char blob_sha1[20];
|
||||
if (sha1_object_info(sha1, NULL) == OBJ_TAG) {
|
||||
enum object_type type;
|
||||
unsigned long size;
|
||||
char *buffer = read_sha1_file(sha1, &type, &size);
|
||||
if (memcmp(buffer, "object ", 7) ||
|
||||
get_sha1_hex(buffer + 7, blob_sha1))
|
||||
die("%s not a valid tag", sha1_to_hex(sha1));
|
||||
free(buffer);
|
||||
} else
|
||||
hashcpy(blob_sha1, sha1);
|
||||
|
||||
if (sha1_object_info(blob_sha1, NULL) == OBJ_BLOB)
|
||||
return stream_blob_to_fd(1, blob_sha1, NULL, 0);
|
||||
/*
|
||||
* we attempted to dereference a tag to a blob
|
||||
* and failed; there may be new dereference
|
||||
* mechanisms this code is not aware of.
|
||||
* fall-back to the usual case.
|
||||
*/
|
||||
}
|
||||
buf = read_object_with_reference(sha1, exp_type, &size, NULL);
|
||||
break;
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "parse-options.h"
|
||||
#include "dir.h"
|
||||
#include "progress.h"
|
||||
#include "streaming.h"
|
||||
|
||||
#define REACHABLE 0x0001
|
||||
#define SEEN 0x0002
|
||||
@ -238,13 +239,8 @@ static void check_unreachable_object(struct object *obj)
|
||||
if (!(f = fopen(filename, "w")))
|
||||
die_errno("Could not open '%s'", filename);
|
||||
if (obj->type == OBJ_BLOB) {
|
||||
enum object_type type;
|
||||
unsigned long size;
|
||||
char *buf = read_sha1_file(obj->sha1,
|
||||
&type, &size);
|
||||
if (buf && fwrite(buf, 1, size, f) != size)
|
||||
if (stream_blob_to_fd(fileno(f), obj->sha1, NULL, 1))
|
||||
die_errno("Could not write '%s'", filename);
|
||||
free(buf);
|
||||
} else
|
||||
fprintf(f, "%s\n", sha1_to_hex(obj->sha1));
|
||||
if (fclose(f))
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "string-list.h"
|
||||
#include "parse-options.h"
|
||||
#include "branch.h"
|
||||
#include "streaming.h"
|
||||
|
||||
/* Set a default date-time format for git log ("log.date" config variable) */
|
||||
static const char *default_date_mode = NULL;
|
||||
@ -383,8 +384,13 @@ static void show_tagger(char *buf, int len, struct rev_info *rev)
|
||||
strbuf_release(&out);
|
||||
}
|
||||
|
||||
static int show_object(const unsigned char *sha1, int show_tag_object,
|
||||
struct rev_info *rev)
|
||||
static int show_blob_object(const unsigned char *sha1, struct rev_info *rev)
|
||||
{
|
||||
fflush(stdout);
|
||||
return stream_blob_to_fd(1, sha1, NULL, 0);
|
||||
}
|
||||
|
||||
static int show_tag_object(const unsigned char *sha1, struct rev_info *rev)
|
||||
{
|
||||
unsigned long size;
|
||||
enum object_type type;
|
||||
@ -394,16 +400,16 @@ static int show_object(const unsigned char *sha1, int show_tag_object,
|
||||
if (!buf)
|
||||
return error(_("Could not read object %s"), sha1_to_hex(sha1));
|
||||
|
||||
if (show_tag_object)
|
||||
while (offset < size && buf[offset] != '\n') {
|
||||
int new_offset = offset + 1;
|
||||
while (new_offset < size && buf[new_offset++] != '\n')
|
||||
; /* do nothing */
|
||||
if (!prefixcmp(buf + offset, "tagger "))
|
||||
show_tagger(buf + offset + 7,
|
||||
new_offset - offset - 7, rev);
|
||||
offset = new_offset;
|
||||
}
|
||||
assert(type == OBJ_TAG);
|
||||
while (offset < size && buf[offset] != '\n') {
|
||||
int new_offset = offset + 1;
|
||||
while (new_offset < size && buf[new_offset++] != '\n')
|
||||
; /* do nothing */
|
||||
if (!prefixcmp(buf + offset, "tagger "))
|
||||
show_tagger(buf + offset + 7,
|
||||
new_offset - offset - 7, rev);
|
||||
offset = new_offset;
|
||||
}
|
||||
|
||||
if (offset < size)
|
||||
fwrite(buf + offset, size - offset, 1, stdout);
|
||||
@ -463,7 +469,7 @@ int cmd_show(int argc, const char **argv, const char *prefix)
|
||||
const char *name = objects[i].name;
|
||||
switch (o->type) {
|
||||
case OBJ_BLOB:
|
||||
ret = show_object(o->sha1, 0, NULL);
|
||||
ret = show_blob_object(o->sha1, NULL);
|
||||
break;
|
||||
case OBJ_TAG: {
|
||||
struct tag *t = (struct tag *)o;
|
||||
@ -474,7 +480,7 @@ int cmd_show(int argc, const char **argv, const char *prefix)
|
||||
diff_get_color_opt(&rev.diffopt, DIFF_COMMIT),
|
||||
t->tag,
|
||||
diff_get_color_opt(&rev.diffopt, DIFF_RESET));
|
||||
ret = show_object(o->sha1, 1, &rev);
|
||||
ret = show_tag_object(o->sha1, &rev);
|
||||
rev.shown_one = 1;
|
||||
if (ret)
|
||||
break;
|
||||
|
@ -15,6 +15,7 @@ int cmd_update_server_info(int argc, const char **argv, const char *prefix)
|
||||
OPT_END()
|
||||
};
|
||||
|
||||
git_config(git_default_config, NULL);
|
||||
argc = parse_options(argc, argv, prefix, options,
|
||||
update_server_info_usage, 0);
|
||||
if (argc > 0)
|
||||
|
53
entry.c
53
entry.c
@ -120,58 +120,15 @@ static int streaming_write_entry(struct cache_entry *ce, char *path,
|
||||
const struct checkout *state, int to_tempfile,
|
||||
int *fstat_done, struct stat *statbuf)
|
||||
{
|
||||
struct git_istream *st;
|
||||
enum object_type type;
|
||||
unsigned long sz;
|
||||
int result = -1;
|
||||
ssize_t kept = 0;
|
||||
int fd = -1;
|
||||
|
||||
st = open_istream(ce->sha1, &type, &sz, filter);
|
||||
if (!st)
|
||||
return -1;
|
||||
if (type != OBJ_BLOB)
|
||||
goto close_and_exit;
|
||||
int fd;
|
||||
|
||||
fd = open_output_fd(path, ce, to_tempfile);
|
||||
if (fd < 0)
|
||||
goto close_and_exit;
|
||||
|
||||
for (;;) {
|
||||
char buf[1024 * 16];
|
||||
ssize_t wrote, holeto;
|
||||
ssize_t readlen = read_istream(st, buf, sizeof(buf));
|
||||
|
||||
if (!readlen)
|
||||
break;
|
||||
if (sizeof(buf) == readlen) {
|
||||
for (holeto = 0; holeto < readlen; holeto++)
|
||||
if (buf[holeto])
|
||||
break;
|
||||
if (readlen == holeto) {
|
||||
kept += holeto;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1)
|
||||
goto close_and_exit;
|
||||
else
|
||||
kept = 0;
|
||||
wrote = write_in_full(fd, buf, readlen);
|
||||
|
||||
if (wrote != readlen)
|
||||
goto close_and_exit;
|
||||
}
|
||||
if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 ||
|
||||
write(fd, "", 1) != 1))
|
||||
goto close_and_exit;
|
||||
*fstat_done = fstat_output(fd, state, statbuf);
|
||||
|
||||
close_and_exit:
|
||||
close_istream(st);
|
||||
if (0 <= fd)
|
||||
if (0 <= fd) {
|
||||
result = stream_blob_to_fd(fd, ce->sha1, filter, 1);
|
||||
*fstat_done = fstat_output(fd, state, statbuf);
|
||||
result = close(fd);
|
||||
}
|
||||
if (result && 0 <= fd)
|
||||
unlink(path);
|
||||
return result;
|
||||
|
11
object.c
11
object.c
@ -198,6 +198,17 @@ struct object *parse_object(const unsigned char *sha1)
|
||||
if (obj && obj->parsed)
|
||||
return obj;
|
||||
|
||||
if ((obj && obj->type == OBJ_BLOB) ||
|
||||
(!obj && has_sha1_file(sha1) &&
|
||||
sha1_object_info(sha1, NULL) == OBJ_BLOB)) {
|
||||
if (check_sha1_signature(repl, NULL, 0, NULL) < 0) {
|
||||
error("sha1 mismatch %s\n", sha1_to_hex(repl));
|
||||
return NULL;
|
||||
}
|
||||
parse_blob_buffer(lookup_blob(sha1), NULL, 0);
|
||||
return lookup_object(sha1);
|
||||
}
|
||||
|
||||
buffer = read_sha1_file(sha1, &type, &size);
|
||||
if (buffer) {
|
||||
if (check_sha1_signature(repl, buffer, size, typename(type)) < 0) {
|
||||
|
42
sha1_file.c
42
sha1_file.c
@ -19,6 +19,7 @@
|
||||
#include "pack-revindex.h"
|
||||
#include "sha1-lookup.h"
|
||||
#include "bulk-checkin.h"
|
||||
#include "streaming.h"
|
||||
|
||||
#ifndef O_NOATIME
|
||||
#if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
|
||||
@ -1146,10 +1147,47 @@ static const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int check_sha1_signature(const unsigned char *sha1, void *map, unsigned long size, const char *type)
|
||||
/*
|
||||
* With an in-core object data in "map", rehash it to make sure the
|
||||
* object name actually matches "sha1" to detect object corruption.
|
||||
* With "map" == NULL, try reading the object named with "sha1" using
|
||||
* the streaming interface and rehash it to do the same.
|
||||
*/
|
||||
int check_sha1_signature(const unsigned char *sha1, void *map,
|
||||
unsigned long size, const char *type)
|
||||
{
|
||||
unsigned char real_sha1[20];
|
||||
hash_sha1_file(map, size, type, real_sha1);
|
||||
enum object_type obj_type;
|
||||
struct git_istream *st;
|
||||
git_SHA_CTX c;
|
||||
char hdr[32];
|
||||
int hdrlen;
|
||||
|
||||
if (map) {
|
||||
hash_sha1_file(map, size, type, real_sha1);
|
||||
return hashcmp(sha1, real_sha1) ? -1 : 0;
|
||||
}
|
||||
|
||||
st = open_istream(sha1, &obj_type, &size, NULL);
|
||||
if (!st)
|
||||
return -1;
|
||||
|
||||
/* Generate the header */
|
||||
hdrlen = sprintf(hdr, "%s %lu", typename(obj_type), size) + 1;
|
||||
|
||||
/* Sha1.. */
|
||||
git_SHA1_Init(&c);
|
||||
git_SHA1_Update(&c, hdr, hdrlen);
|
||||
for (;;) {
|
||||
char buf[1024 * 16];
|
||||
ssize_t readlen = read_istream(st, buf, sizeof(buf));
|
||||
|
||||
if (!readlen)
|
||||
break;
|
||||
git_SHA1_Update(&c, buf, readlen);
|
||||
}
|
||||
git_SHA1_Final(real_sha1, &c);
|
||||
close_istream(st);
|
||||
return hashcmp(sha1, real_sha1) ? -1 : 0;
|
||||
}
|
||||
|
||||
|
55
streaming.c
55
streaming.c
@ -489,3 +489,58 @@ static open_method_decl(incore)
|
||||
|
||||
return st->u.incore.buf ? 0 : -1;
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************
|
||||
* Users of streaming interface
|
||||
****************************************************************/
|
||||
|
||||
int stream_blob_to_fd(int fd, unsigned const char *sha1, struct stream_filter *filter,
|
||||
int can_seek)
|
||||
{
|
||||
struct git_istream *st;
|
||||
enum object_type type;
|
||||
unsigned long sz;
|
||||
ssize_t kept = 0;
|
||||
int result = -1;
|
||||
|
||||
st = open_istream(sha1, &type, &sz, filter);
|
||||
if (!st)
|
||||
return result;
|
||||
if (type != OBJ_BLOB)
|
||||
goto close_and_exit;
|
||||
for (;;) {
|
||||
char buf[1024 * 16];
|
||||
ssize_t wrote, holeto;
|
||||
ssize_t readlen = read_istream(st, buf, sizeof(buf));
|
||||
|
||||
if (!readlen)
|
||||
break;
|
||||
if (can_seek && sizeof(buf) == readlen) {
|
||||
for (holeto = 0; holeto < readlen; holeto++)
|
||||
if (buf[holeto])
|
||||
break;
|
||||
if (readlen == holeto) {
|
||||
kept += holeto;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1)
|
||||
goto close_and_exit;
|
||||
else
|
||||
kept = 0;
|
||||
wrote = write_in_full(fd, buf, readlen);
|
||||
|
||||
if (wrote != readlen)
|
||||
goto close_and_exit;
|
||||
}
|
||||
if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 ||
|
||||
write(fd, "", 1) != 1))
|
||||
goto close_and_exit;
|
||||
result = 0;
|
||||
|
||||
close_and_exit:
|
||||
close_istream(st);
|
||||
return result;
|
||||
}
|
||||
|
@ -12,4 +12,6 @@ extern struct git_istream *open_istream(const unsigned char *, enum object_type
|
||||
extern int close_istream(struct git_istream *);
|
||||
extern ssize_t read_istream(struct git_istream *, char *, size_t);
|
||||
|
||||
extern int stream_blob_to_fd(int fd, const unsigned char *, struct stream_filter *, int can_seek);
|
||||
|
||||
#endif /* STREAMING_H */
|
||||
|
@ -6,11 +6,15 @@ test_description='adding and checking out large blobs'
|
||||
. ./test-lib.sh
|
||||
|
||||
test_expect_success setup '
|
||||
git config core.bigfilethreshold 200k &&
|
||||
# clone does not allow us to pass core.bigfilethreshold to
|
||||
# new repos, so set core.bigfilethreshold globally
|
||||
git config --global core.bigfilethreshold 200k &&
|
||||
echo X | dd of=large1 bs=1k seek=2000 &&
|
||||
echo X | dd of=large2 bs=1k seek=2000 &&
|
||||
echo X | dd of=large3 bs=1k seek=2000 &&
|
||||
echo Y | dd of=huge bs=1k seek=2500
|
||||
echo Y | dd of=huge bs=1k seek=2500 &&
|
||||
GIT_ALLOC_LIMIT=1500 &&
|
||||
export GIT_ALLOC_LIMIT
|
||||
'
|
||||
|
||||
test_expect_success 'add a large file or two' '
|
||||
@ -100,4 +104,34 @@ test_expect_success 'packsize limit' '
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'diff --raw' '
|
||||
git commit -q -m initial &&
|
||||
echo modified >>large1 &&
|
||||
git add large1 &&
|
||||
git commit -q -m modified &&
|
||||
git diff --raw HEAD^
|
||||
'
|
||||
|
||||
test_expect_success 'hash-object' '
|
||||
git hash-object large1
|
||||
'
|
||||
|
||||
test_expect_success 'cat-file a large file' '
|
||||
git cat-file blob :large1 >/dev/null
|
||||
'
|
||||
|
||||
test_expect_success 'cat-file a large file from a tag' '
|
||||
git tag -m largefile largefiletag :large1 &&
|
||||
git cat-file blob largefiletag >/dev/null
|
||||
'
|
||||
|
||||
test_expect_success 'git-show a large file' '
|
||||
git show :large1 >/dev/null
|
||||
|
||||
'
|
||||
|
||||
test_expect_success 'repack' '
|
||||
git repack -ad
|
||||
'
|
||||
|
||||
test_done
|
||||
|
27
wrapper.c
27
wrapper.c
@ -9,6 +9,18 @@ static void do_nothing(size_t size)
|
||||
|
||||
static void (*try_to_free_routine)(size_t size) = do_nothing;
|
||||
|
||||
static void memory_limit_check(size_t size)
|
||||
{
|
||||
static int limit = -1;
|
||||
if (limit == -1) {
|
||||
const char *env = getenv("GIT_ALLOC_LIMIT");
|
||||
limit = env ? atoi(env) * 1024 : 0;
|
||||
}
|
||||
if (limit && size > limit)
|
||||
die("attempting to allocate %"PRIuMAX" over limit %d",
|
||||
(intmax_t)size, limit);
|
||||
}
|
||||
|
||||
try_to_free_t set_try_to_free_routine(try_to_free_t routine)
|
||||
{
|
||||
try_to_free_t old = try_to_free_routine;
|
||||
@ -32,7 +44,10 @@ char *xstrdup(const char *str)
|
||||
|
||||
void *xmalloc(size_t size)
|
||||
{
|
||||
void *ret = malloc(size);
|
||||
void *ret;
|
||||
|
||||
memory_limit_check(size);
|
||||
ret = malloc(size);
|
||||
if (!ret && !size)
|
||||
ret = malloc(1);
|
||||
if (!ret) {
|
||||
@ -79,7 +94,10 @@ char *xstrndup(const char *str, size_t len)
|
||||
|
||||
void *xrealloc(void *ptr, size_t size)
|
||||
{
|
||||
void *ret = realloc(ptr, size);
|
||||
void *ret;
|
||||
|
||||
memory_limit_check(size);
|
||||
ret = realloc(ptr, size);
|
||||
if (!ret && !size)
|
||||
ret = realloc(ptr, 1);
|
||||
if (!ret) {
|
||||
@ -95,7 +113,10 @@ void *xrealloc(void *ptr, size_t size)
|
||||
|
||||
void *xcalloc(size_t nmemb, size_t size)
|
||||
{
|
||||
void *ret = calloc(nmemb, size);
|
||||
void *ret;
|
||||
|
||||
memory_limit_check(size * nmemb);
|
||||
ret = calloc(nmemb, size);
|
||||
if (!ret && (!nmemb || !size))
|
||||
ret = calloc(1, 1);
|
||||
if (!ret) {
|
||||
|
Loading…
Reference in New Issue
Block a user