diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 8ed501f220..36a9104433 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -11,6 +11,7 @@ #include "parse-options.h" #include "diff.h" #include "userdiff.h" +#include "streaming.h" #define BATCH 1 #define BATCH_CHECK 2 @@ -127,6 +128,8 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name) return cmd_ls_tree(2, ls_args, NULL); } + if (type == OBJ_BLOB) + return stream_blob_to_fd(1, sha1, NULL, 0); buf = read_sha1_file(sha1, &type, &size); if (!buf) die("Cannot read object %s", obj_name); @@ -149,6 +152,28 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name) break; case 0: + if (type_from_string(exp_type) == OBJ_BLOB) { + unsigned char blob_sha1[20]; + if (sha1_object_info(sha1, NULL) == OBJ_TAG) { + enum object_type type; + unsigned long size; + char *buffer = read_sha1_file(sha1, &type, &size); + if (memcmp(buffer, "object ", 7) || + get_sha1_hex(buffer + 7, blob_sha1)) + die("%s not a valid tag", sha1_to_hex(sha1)); + free(buffer); + } else + hashcpy(blob_sha1, sha1); + + if (sha1_object_info(blob_sha1, NULL) == OBJ_BLOB) + return stream_blob_to_fd(1, blob_sha1, NULL, 0); + /* + * we attempted to dereference a tag to a blob + * and failed; there may be new dereference + * mechanisms this code is not aware of. + * fall-back to the usual case. + */ + } buf = read_object_with_reference(sha1, exp_type, &size, NULL); break; diff --git a/builtin/fsck.c b/builtin/fsck.c index 67eb553c7d..a710227a64 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -12,6 +12,7 @@ #include "parse-options.h" #include "dir.h" #include "progress.h" +#include "streaming.h" #define REACHABLE 0x0001 #define SEEN 0x0002 @@ -238,13 +239,8 @@ static void check_unreachable_object(struct object *obj) if (!(f = fopen(filename, "w"))) die_errno("Could not open '%s'", filename); if (obj->type == OBJ_BLOB) { - enum object_type type; - unsigned long size; - char *buf = read_sha1_file(obj->sha1, - &type, &size); - if (buf && fwrite(buf, 1, size, f) != size) + if (stream_blob_to_fd(fileno(f), obj->sha1, NULL, 1)) die_errno("Could not write '%s'", filename); - free(buf); } else fprintf(f, "%s\n", sha1_to_hex(obj->sha1)); if (fclose(f)) diff --git a/builtin/log.c b/builtin/log.c index 8a47012b0b..690caa7830 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -20,6 +20,7 @@ #include "string-list.h" #include "parse-options.h" #include "branch.h" +#include "streaming.h" /* Set a default date-time format for git log ("log.date" config variable) */ static const char *default_date_mode = NULL; @@ -383,8 +384,13 @@ static void show_tagger(char *buf, int len, struct rev_info *rev) strbuf_release(&out); } -static int show_object(const unsigned char *sha1, int show_tag_object, - struct rev_info *rev) +static int show_blob_object(const unsigned char *sha1, struct rev_info *rev) +{ + fflush(stdout); + return stream_blob_to_fd(1, sha1, NULL, 0); +} + +static int show_tag_object(const unsigned char *sha1, struct rev_info *rev) { unsigned long size; enum object_type type; @@ -394,16 +400,16 @@ static int show_object(const unsigned char *sha1, int show_tag_object, if (!buf) return error(_("Could not read object %s"), sha1_to_hex(sha1)); - if (show_tag_object) - while (offset < size && buf[offset] != '\n') { - int new_offset = offset + 1; - while (new_offset < size && buf[new_offset++] != '\n') - ; /* do nothing */ - if (!prefixcmp(buf + offset, "tagger ")) - show_tagger(buf + offset + 7, - new_offset - offset - 7, rev); - offset = new_offset; - } + assert(type == OBJ_TAG); + while (offset < size && buf[offset] != '\n') { + int new_offset = offset + 1; + while (new_offset < size && buf[new_offset++] != '\n') + ; /* do nothing */ + if (!prefixcmp(buf + offset, "tagger ")) + show_tagger(buf + offset + 7, + new_offset - offset - 7, rev); + offset = new_offset; + } if (offset < size) fwrite(buf + offset, size - offset, 1, stdout); @@ -463,7 +469,7 @@ int cmd_show(int argc, const char **argv, const char *prefix) const char *name = objects[i].name; switch (o->type) { case OBJ_BLOB: - ret = show_object(o->sha1, 0, NULL); + ret = show_blob_object(o->sha1, NULL); break; case OBJ_TAG: { struct tag *t = (struct tag *)o; @@ -474,7 +480,7 @@ int cmd_show(int argc, const char **argv, const char *prefix) diff_get_color_opt(&rev.diffopt, DIFF_COMMIT), t->tag, diff_get_color_opt(&rev.diffopt, DIFF_RESET)); - ret = show_object(o->sha1, 1, &rev); + ret = show_tag_object(o->sha1, &rev); rev.shown_one = 1; if (ret) break; diff --git a/builtin/update-server-info.c b/builtin/update-server-info.c index b90dce6358..0d63c4498c 100644 --- a/builtin/update-server-info.c +++ b/builtin/update-server-info.c @@ -15,6 +15,7 @@ int cmd_update_server_info(int argc, const char **argv, const char *prefix) OPT_END() }; + git_config(git_default_config, NULL); argc = parse_options(argc, argv, prefix, options, update_server_info_usage, 0); if (argc > 0) diff --git a/entry.c b/entry.c index 852fea1395..17a6bccec6 100644 --- a/entry.c +++ b/entry.c @@ -120,58 +120,15 @@ static int streaming_write_entry(struct cache_entry *ce, char *path, const struct checkout *state, int to_tempfile, int *fstat_done, struct stat *statbuf) { - struct git_istream *st; - enum object_type type; - unsigned long sz; int result = -1; - ssize_t kept = 0; - int fd = -1; - - st = open_istream(ce->sha1, &type, &sz, filter); - if (!st) - return -1; - if (type != OBJ_BLOB) - goto close_and_exit; + int fd; fd = open_output_fd(path, ce, to_tempfile); - if (fd < 0) - goto close_and_exit; - - for (;;) { - char buf[1024 * 16]; - ssize_t wrote, holeto; - ssize_t readlen = read_istream(st, buf, sizeof(buf)); - - if (!readlen) - break; - if (sizeof(buf) == readlen) { - for (holeto = 0; holeto < readlen; holeto++) - if (buf[holeto]) - break; - if (readlen == holeto) { - kept += holeto; - continue; - } - } - - if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1) - goto close_and_exit; - else - kept = 0; - wrote = write_in_full(fd, buf, readlen); - - if (wrote != readlen) - goto close_and_exit; - } - if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 || - write(fd, "", 1) != 1)) - goto close_and_exit; - *fstat_done = fstat_output(fd, state, statbuf); - -close_and_exit: - close_istream(st); - if (0 <= fd) + if (0 <= fd) { + result = stream_blob_to_fd(fd, ce->sha1, filter, 1); + *fstat_done = fstat_output(fd, state, statbuf); result = close(fd); + } if (result && 0 <= fd) unlink(path); return result; diff --git a/object.c b/object.c index 6b06297a5f..0498b18d45 100644 --- a/object.c +++ b/object.c @@ -198,6 +198,17 @@ struct object *parse_object(const unsigned char *sha1) if (obj && obj->parsed) return obj; + if ((obj && obj->type == OBJ_BLOB) || + (!obj && has_sha1_file(sha1) && + sha1_object_info(sha1, NULL) == OBJ_BLOB)) { + if (check_sha1_signature(repl, NULL, 0, NULL) < 0) { + error("sha1 mismatch %s\n", sha1_to_hex(repl)); + return NULL; + } + parse_blob_buffer(lookup_blob(sha1), NULL, 0); + return lookup_object(sha1); + } + buffer = read_sha1_file(sha1, &type, &size); if (buffer) { if (check_sha1_signature(repl, buffer, size, typename(type)) < 0) { diff --git a/sha1_file.c b/sha1_file.c index 4f06a0e450..ad314f08b9 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -19,6 +19,7 @@ #include "pack-revindex.h" #include "sha1-lookup.h" #include "bulk-checkin.h" +#include "streaming.h" #ifndef O_NOATIME #if defined(__linux__) && (defined(__i386__) || defined(__PPC__)) @@ -1146,10 +1147,47 @@ static const struct packed_git *has_packed_and_bad(const unsigned char *sha1) return NULL; } -int check_sha1_signature(const unsigned char *sha1, void *map, unsigned long size, const char *type) +/* + * With an in-core object data in "map", rehash it to make sure the + * object name actually matches "sha1" to detect object corruption. + * With "map" == NULL, try reading the object named with "sha1" using + * the streaming interface and rehash it to do the same. + */ +int check_sha1_signature(const unsigned char *sha1, void *map, + unsigned long size, const char *type) { unsigned char real_sha1[20]; - hash_sha1_file(map, size, type, real_sha1); + enum object_type obj_type; + struct git_istream *st; + git_SHA_CTX c; + char hdr[32]; + int hdrlen; + + if (map) { + hash_sha1_file(map, size, type, real_sha1); + return hashcmp(sha1, real_sha1) ? -1 : 0; + } + + st = open_istream(sha1, &obj_type, &size, NULL); + if (!st) + return -1; + + /* Generate the header */ + hdrlen = sprintf(hdr, "%s %lu", typename(obj_type), size) + 1; + + /* Sha1.. */ + git_SHA1_Init(&c); + git_SHA1_Update(&c, hdr, hdrlen); + for (;;) { + char buf[1024 * 16]; + ssize_t readlen = read_istream(st, buf, sizeof(buf)); + + if (!readlen) + break; + git_SHA1_Update(&c, buf, readlen); + } + git_SHA1_Final(real_sha1, &c); + close_istream(st); return hashcmp(sha1, real_sha1) ? -1 : 0; } diff --git a/streaming.c b/streaming.c index 71072e1b1d..7e7ee2be6f 100644 --- a/streaming.c +++ b/streaming.c @@ -489,3 +489,58 @@ static open_method_decl(incore) return st->u.incore.buf ? 0 : -1; } + + +/**************************************************************** + * Users of streaming interface + ****************************************************************/ + +int stream_blob_to_fd(int fd, unsigned const char *sha1, struct stream_filter *filter, + int can_seek) +{ + struct git_istream *st; + enum object_type type; + unsigned long sz; + ssize_t kept = 0; + int result = -1; + + st = open_istream(sha1, &type, &sz, filter); + if (!st) + return result; + if (type != OBJ_BLOB) + goto close_and_exit; + for (;;) { + char buf[1024 * 16]; + ssize_t wrote, holeto; + ssize_t readlen = read_istream(st, buf, sizeof(buf)); + + if (!readlen) + break; + if (can_seek && sizeof(buf) == readlen) { + for (holeto = 0; holeto < readlen; holeto++) + if (buf[holeto]) + break; + if (readlen == holeto) { + kept += holeto; + continue; + } + } + + if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1) + goto close_and_exit; + else + kept = 0; + wrote = write_in_full(fd, buf, readlen); + + if (wrote != readlen) + goto close_and_exit; + } + if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 || + write(fd, "", 1) != 1)) + goto close_and_exit; + result = 0; + + close_and_exit: + close_istream(st); + return result; +} diff --git a/streaming.h b/streaming.h index 589e857b8c..3e827709c8 100644 --- a/streaming.h +++ b/streaming.h @@ -12,4 +12,6 @@ extern struct git_istream *open_istream(const unsigned char *, enum object_type extern int close_istream(struct git_istream *); extern ssize_t read_istream(struct git_istream *, char *, size_t); +extern int stream_blob_to_fd(int fd, const unsigned char *, struct stream_filter *, int can_seek); + #endif /* STREAMING_H */ diff --git a/t/t1050-large.sh b/t/t1050-large.sh index 29d6024b7f..4d127f19b7 100755 --- a/t/t1050-large.sh +++ b/t/t1050-large.sh @@ -6,11 +6,15 @@ test_description='adding and checking out large blobs' . ./test-lib.sh test_expect_success setup ' - git config core.bigfilethreshold 200k && + # clone does not allow us to pass core.bigfilethreshold to + # new repos, so set core.bigfilethreshold globally + git config --global core.bigfilethreshold 200k && echo X | dd of=large1 bs=1k seek=2000 && echo X | dd of=large2 bs=1k seek=2000 && echo X | dd of=large3 bs=1k seek=2000 && - echo Y | dd of=huge bs=1k seek=2500 + echo Y | dd of=huge bs=1k seek=2500 && + GIT_ALLOC_LIMIT=1500 && + export GIT_ALLOC_LIMIT ' test_expect_success 'add a large file or two' ' @@ -100,4 +104,34 @@ test_expect_success 'packsize limit' ' ) ' +test_expect_success 'diff --raw' ' + git commit -q -m initial && + echo modified >>large1 && + git add large1 && + git commit -q -m modified && + git diff --raw HEAD^ +' + +test_expect_success 'hash-object' ' + git hash-object large1 +' + +test_expect_success 'cat-file a large file' ' + git cat-file blob :large1 >/dev/null +' + +test_expect_success 'cat-file a large file from a tag' ' + git tag -m largefile largefiletag :large1 && + git cat-file blob largefiletag >/dev/null +' + +test_expect_success 'git-show a large file' ' + git show :large1 >/dev/null + +' + +test_expect_success 'repack' ' + git repack -ad +' + test_done diff --git a/wrapper.c b/wrapper.c index 85f09df747..6ccd0595f4 100644 --- a/wrapper.c +++ b/wrapper.c @@ -9,6 +9,18 @@ static void do_nothing(size_t size) static void (*try_to_free_routine)(size_t size) = do_nothing; +static void memory_limit_check(size_t size) +{ + static int limit = -1; + if (limit == -1) { + const char *env = getenv("GIT_ALLOC_LIMIT"); + limit = env ? atoi(env) * 1024 : 0; + } + if (limit && size > limit) + die("attempting to allocate %"PRIuMAX" over limit %d", + (intmax_t)size, limit); +} + try_to_free_t set_try_to_free_routine(try_to_free_t routine) { try_to_free_t old = try_to_free_routine; @@ -32,7 +44,10 @@ char *xstrdup(const char *str) void *xmalloc(size_t size) { - void *ret = malloc(size); + void *ret; + + memory_limit_check(size); + ret = malloc(size); if (!ret && !size) ret = malloc(1); if (!ret) { @@ -79,7 +94,10 @@ char *xstrndup(const char *str, size_t len) void *xrealloc(void *ptr, size_t size) { - void *ret = realloc(ptr, size); + void *ret; + + memory_limit_check(size); + ret = realloc(ptr, size); if (!ret && !size) ret = realloc(ptr, 1); if (!ret) { @@ -95,7 +113,10 @@ void *xrealloc(void *ptr, size_t size) void *xcalloc(size_t nmemb, size_t size) { - void *ret = calloc(nmemb, size); + void *ret; + + memory_limit_check(size * nmemb); + ret = calloc(nmemb, size); if (!ret && (!nmemb || !size)) ret = calloc(1, 1); if (!ret) {