From c4ce46fc7ac1b59372aa935e641ca15b12359f5b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 8 May 2011 01:47:33 -0700 Subject: [PATCH 01/12] index_fd(): turn write_object and format_check arguments into one flag The "format_check" parameter tucked after the existing parameters is too ugly an afterthought to live in any reasonable API. Combine it with the other boolean parameter "write_object" into a single "flags" parameter. Signed-off-by: Junio C Hamano --- builtin/hash-object.c | 5 ++++- builtin/update-index.c | 3 ++- cache.h | 7 +++++-- notes-merge.c | 2 +- read-cache.c | 4 ++-- sha1_file.c | 29 +++++++++++++---------------- 6 files changed, 27 insertions(+), 23 deletions(-) diff --git a/builtin/hash-object.c b/builtin/hash-object.c index b96f46acf5..33911fd5e9 100644 --- a/builtin/hash-object.c +++ b/builtin/hash-object.c @@ -14,8 +14,11 @@ static void hash_fd(int fd, const char *type, int write_object, const char *path { struct stat st; unsigned char sha1[20]; + unsigned flags = (HASH_FORMAT_CHECK | + (write_object ? HASH_WRITE_OBJECT : 0)); + if (fstat(fd, &st) < 0 || - index_fd(sha1, fd, &st, write_object, type_from_string(type), path, 1)) + index_fd(sha1, fd, &st, type_from_string(type), path, flags)) die(write_object ? "Unable to add %s to database" : "Unable to hash %s", path); diff --git a/builtin/update-index.c b/builtin/update-index.c index d7850c6309..f14bc90830 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -99,7 +99,8 @@ static int add_one_path(struct cache_entry *old, const char *path, int len, stru fill_stat_cache_info(ce, st); ce->ce_mode = ce_mode_from_stat(old, st->st_mode); - if (index_path(ce->sha1, path, st, !info_only)) + if (index_path(ce->sha1, path, st, + info_only ? 0 : HASH_WRITE_OBJECT)) return -1; option = allow_add ? ADD_CACHE_OK_TO_ADD : 0; option |= allow_replace ? ADD_CACHE_OK_TO_REPLACE : 0; diff --git a/cache.h b/cache.h index 28899b7b78..bc38013959 100644 --- a/cache.h +++ b/cache.h @@ -518,8 +518,11 @@ struct pathspec { extern int init_pathspec(struct pathspec *, const char **); extern void free_pathspec(struct pathspec *); extern int ce_path_match(const struct cache_entry *ce, const struct pathspec *pathspec); -extern int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, enum object_type type, const char *path, int format_check); -extern int index_path(unsigned char *sha1, const char *path, struct stat *st, int write_object); + +#define HASH_WRITE_OBJECT 1 +#define HASH_FORMAT_CHECK 2 +extern int index_fd(unsigned char *sha1, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags); +extern int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags); extern void fill_stat_cache_info(struct cache_entry *ce, struct stat *st); #define REFRESH_REALLY 0x0001 /* ignore_valid */ diff --git a/notes-merge.c b/notes-merge.c index 28046a9984..e1aaf43b43 100644 --- a/notes-merge.c +++ b/notes-merge.c @@ -707,7 +707,7 @@ int notes_merge_commit(struct notes_merge_options *o, /* write file as blob, and add to partial_tree */ if (stat(ent->name, &st)) die_errno("Failed to stat '%s'", ent->name); - if (index_path(blob_sha1, ent->name, &st, 1)) + if (index_path(blob_sha1, ent->name, &st, HASH_WRITE_OBJECT)) die("Failed to write blob object from '%s'", ent->name); if (add_note(partial_tree, obj_sha1, blob_sha1, NULL)) die("Failed to add resolved note '%s' to notes tree", diff --git a/read-cache.c b/read-cache.c index f38471cac3..4ac9a037f4 100644 --- a/read-cache.c +++ b/read-cache.c @@ -92,7 +92,7 @@ static int ce_compare_data(struct cache_entry *ce, struct stat *st) if (fd >= 0) { unsigned char sha1[20]; - if (!index_fd(sha1, fd, st, 0, OBJ_BLOB, ce->name, 0)) + if (!index_fd(sha1, fd, st, OBJ_BLOB, ce->name, 0)) match = hashcmp(sha1, ce->sha1); /* index_fd() closed the file descriptor already */ } @@ -641,7 +641,7 @@ int add_to_index(struct index_state *istate, const char *path, struct stat *st, return 0; } if (!intent_only) { - if (index_path(ce->sha1, path, st, 1)) + if (index_path(ce->sha1, path, st, HASH_WRITE_OBJECT)) return error("unable to index file %s", path); } else record_intent_to_add(ce); diff --git a/sha1_file.c b/sha1_file.c index 889fe71830..17c179c9fd 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2581,10 +2581,11 @@ static void check_tag(const void *buf, size_t size) } static int index_mem(unsigned char *sha1, void *buf, size_t size, - int write_object, enum object_type type, - const char *path, int format_check) + enum object_type type, + const char *path, unsigned flags) { int ret, re_allocated = 0; + int write_object = flags & HASH_WRITE_OBJECT; if (!type) type = OBJ_BLOB; @@ -2600,7 +2601,7 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size, re_allocated = 1; } } - if (format_check) { + if (flags & HASH_FORMAT_CHECK) { if (type == OBJ_TREE) check_tree(buf, size); if (type == OBJ_COMMIT) @@ -2620,8 +2621,8 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size, #define SMALL_FILE_SIZE (32*1024) -int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, - enum object_type type, const char *path, int format_check) +int index_fd(unsigned char *sha1, int fd, struct stat *st, + enum object_type type, const char *path, unsigned flags) { int ret; size_t size = xsize_t(st->st_size); @@ -2629,33 +2630,29 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, if (!S_ISREG(st->st_mode)) { struct strbuf sbuf = STRBUF_INIT; if (strbuf_read(&sbuf, fd, 4096) >= 0) - ret = index_mem(sha1, sbuf.buf, sbuf.len, write_object, - type, path, format_check); + ret = index_mem(sha1, sbuf.buf, sbuf.len, type, path, flags); else ret = -1; strbuf_release(&sbuf); } else if (!size) { - ret = index_mem(sha1, NULL, size, write_object, type, path, - format_check); + ret = index_mem(sha1, NULL, size, type, path, flags); } else if (size <= SMALL_FILE_SIZE) { char *buf = xmalloc(size); if (size == read_in_full(fd, buf, size)) - ret = index_mem(sha1, buf, size, write_object, type, - path, format_check); + ret = index_mem(sha1, buf, size, type, path, flags); else ret = error("short read %s", strerror(errno)); free(buf); } else { void *buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); - ret = index_mem(sha1, buf, size, write_object, type, path, - format_check); + ret = index_mem(sha1, buf, size, type, path, flags); munmap(buf, size); } close(fd); return ret; } -int index_path(unsigned char *sha1, const char *path, struct stat *st, int write_object) +int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags) { int fd; struct strbuf sb = STRBUF_INIT; @@ -2666,7 +2663,7 @@ int index_path(unsigned char *sha1, const char *path, struct stat *st, int write if (fd < 0) return error("open(\"%s\"): %s", path, strerror(errno)); - if (index_fd(sha1, fd, st, write_object, OBJ_BLOB, path, 0) < 0) + if (index_fd(sha1, fd, st, OBJ_BLOB, path, flags) < 0) return error("%s: failed to insert into database", path); break; @@ -2676,7 +2673,7 @@ int index_path(unsigned char *sha1, const char *path, struct stat *st, int write return error("readlink(\"%s\"): %s", path, errstr); } - if (!write_object) + if (!(flags & HASH_WRITE_OBJECT)) hash_sha1_file(sb.buf, sb.len, blob_type, sha1); else if (write_sha1_file(sb.buf, sb.len, blob_type, sha1)) return error("%s: failed to insert into database", From 7b41e1e15b2cce13deaafc0aab10580036346a5a Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 8 May 2011 01:47:34 -0700 Subject: [PATCH 02/12] index_fd(): split into two helper functions Split out the case where we do not know the size of the input (hence we read everything into a strbuf before doing anything) to index_pipe(), and the other case where we mmap or read the whole data to index_bulk(). Signed-off-by: Junio C Hamano --- sha1_file.c | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/sha1_file.c b/sha1_file.c index 17c179c9fd..49416b0291 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2619,22 +2619,29 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size, return ret; } +static int index_pipe(unsigned char *sha1, int fd, enum object_type type, + const char *path, unsigned flags) +{ + struct strbuf sbuf = STRBUF_INIT; + int ret; + + if (strbuf_read(&sbuf, fd, 4096) >= 0) + ret = index_mem(sha1, sbuf.buf, sbuf.len, type, path, flags); + else + ret = -1; + strbuf_release(&sbuf); + return ret; +} + #define SMALL_FILE_SIZE (32*1024) -int index_fd(unsigned char *sha1, int fd, struct stat *st, - enum object_type type, const char *path, unsigned flags) +static int index_core(unsigned char *sha1, int fd, size_t size, + enum object_type type, const char *path, + unsigned flags) { int ret; - size_t size = xsize_t(st->st_size); - if (!S_ISREG(st->st_mode)) { - struct strbuf sbuf = STRBUF_INIT; - if (strbuf_read(&sbuf, fd, 4096) >= 0) - ret = index_mem(sha1, sbuf.buf, sbuf.len, type, path, flags); - else - ret = -1; - strbuf_release(&sbuf); - } else if (!size) { + if (!size) { ret = index_mem(sha1, NULL, size, type, path, flags); } else if (size <= SMALL_FILE_SIZE) { char *buf = xmalloc(size); @@ -2648,6 +2655,19 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, ret = index_mem(sha1, buf, size, type, path, flags); munmap(buf, size); } + return ret; +} + +int index_fd(unsigned char *sha1, int fd, struct stat *st, + enum object_type type, const char *path, unsigned flags) +{ + int ret; + size_t size = xsize_t(st->st_size); + + if (!S_ISREG(st->st_mode)) + ret = index_pipe(sha1, fd, type, path, flags); + else + ret = index_core(sha1, fd, size, type, path, flags); close(fd); return ret; } From ec70f52f6fb6d3e08c7b24f8b5bf25502d8ee59b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 9 May 2011 12:52:12 -0700 Subject: [PATCH 03/12] convert: rename the "eol" global variable to "core_eol" Yes, it is clear that "eol" wants to mean some sort of end-of-line thing, but as the name of a global variable, it is way too short to describe what kind of end-of-line thing it wants to represent. Besides, there are many codepaths that want to use their own local "char *eol" variable to point at the end of the current line they are processing. This global variable holds what we read from core.eol configuration variable. Name it as such. Signed-off-by: Junio C Hamano --- cache.h | 2 +- config.c | 12 ++++++------ convert.c | 4 ++-- environment.c | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cache.h b/cache.h index 2b34116624..4e9123b77b 100644 --- a/cache.h +++ b/cache.h @@ -606,7 +606,7 @@ enum eol { #endif }; -extern enum eol eol; +extern enum eol core_eol; enum branch_track { BRANCH_TRACK_UNSPECIFIED = -1, diff --git a/config.c b/config.c index 5f9ec28945..671c8df2cc 100644 --- a/config.c +++ b/config.c @@ -583,7 +583,7 @@ static int git_default_core_config(const char *var, const char *value) if (!strcmp(var, "core.autocrlf")) { if (value && !strcasecmp(value, "input")) { - if (eol == EOL_CRLF) + if (core_eol == EOL_CRLF) return error("core.autocrlf=input conflicts with core.eol=crlf"); auto_crlf = AUTO_CRLF_INPUT; return 0; @@ -603,14 +603,14 @@ static int git_default_core_config(const char *var, const char *value) if (!strcmp(var, "core.eol")) { if (value && !strcasecmp(value, "lf")) - eol = EOL_LF; + core_eol = EOL_LF; else if (value && !strcasecmp(value, "crlf")) - eol = EOL_CRLF; + core_eol = EOL_CRLF; else if (value && !strcasecmp(value, "native")) - eol = EOL_NATIVE; + core_eol = EOL_NATIVE; else - eol = EOL_UNSET; - if (eol == EOL_CRLF && auto_crlf == AUTO_CRLF_INPUT) + core_eol = EOL_UNSET; + if (core_eol == EOL_CRLF && auto_crlf == AUTO_CRLF_INPUT) return error("core.autocrlf=input conflicts with core.eol=crlf"); return 0; } diff --git a/convert.c b/convert.c index 7eb51b16ed..4dba329e50 100644 --- a/convert.c +++ b/convert.c @@ -113,10 +113,10 @@ static enum eol determine_output_conversion(enum action action) return EOL_CRLF; else if (auto_crlf == AUTO_CRLF_INPUT) return EOL_LF; - else if (eol == EOL_UNSET) + else if (core_eol == EOL_UNSET) return EOL_NATIVE; } - return eol; + return core_eol; } static void check_safe_crlf(const char *path, enum action action, diff --git a/environment.c b/environment.c index 40185bc854..7fe9f10124 100644 --- a/environment.c +++ b/environment.c @@ -43,7 +43,7 @@ const char *askpass_program; const char *excludes_file; enum auto_crlf auto_crlf = AUTO_CRLF_FALSE; int read_replace_refs = 1; -enum eol eol = EOL_UNSET; +enum eol core_eol = EOL_UNSET; enum safe_crlf safe_crlf = SAFE_CRLF_WARN; unsigned whitespace_rule_cfg = WS_DEFAULT_RULE; enum branch_track git_branch_track = BRANCH_TRACK_REMOTE; From c61dcff9d6944eb35abf7fc7faa36f23a49fabf6 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 9 May 2011 13:12:57 -0700 Subject: [PATCH 04/12] convert: give saner names to crlf/eol variables, types and functions Back when the conversion was only about the end-of-line convention, it might have made sense to call what we do upon seeing CR/LF simply an "action", but these days the conversion routines do a lot more than just tweaking the line ending. Raname "action" to "crlf_action". The function that decides what end of line conversion to use on the output codepath was called "determine_output_conversion", as if there is no other kind of output conversion. Rename it to "output_eol"; it is a function that returns what EOL convention is to be used. A function that decides what "crlf_action" needs to be used on the input codepath, given what conversion attribute is set to the path and global end-of-line convention, was called "determine_action". Rename it to "input_crlf_action". Signed-off-by: Junio C Hamano --- convert.c | 61 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/convert.c b/convert.c index 4dba329e50..e0ee245153 100644 --- a/convert.c +++ b/convert.c @@ -12,7 +12,7 @@ * translation when the "text" attribute or "auto_crlf" option is set. */ -enum action { +enum crlf_action { CRLF_GUESS = -1, CRLF_BINARY = 0, CRLF_TEXT, @@ -94,9 +94,9 @@ static int is_binary(unsigned long size, struct text_stat *stats) return 0; } -static enum eol determine_output_conversion(enum action action) +static enum eol output_eol(enum crlf_action crlf_action) { - switch (action) { + switch (crlf_action) { case CRLF_BINARY: return EOL_UNSET; case CRLF_CRLF: @@ -119,13 +119,13 @@ static enum eol determine_output_conversion(enum action action) return core_eol; } -static void check_safe_crlf(const char *path, enum action action, +static void check_safe_crlf(const char *path, enum crlf_action crlf_action, struct text_stat *stats, enum safe_crlf checksafe) { if (!checksafe) return; - if (determine_output_conversion(action) == EOL_LF) { + if (output_eol(crlf_action) == EOL_LF) { /* * CRLFs would not be restored by checkout: * check if we'd remove CRLFs @@ -136,7 +136,7 @@ static void check_safe_crlf(const char *path, enum action action, else /* i.e. SAFE_CRLF_FAIL */ die("CRLF would be replaced by LF in %s.", path); } - } else if (determine_output_conversion(action) == EOL_CRLF) { + } else if (output_eol(crlf_action) == EOL_CRLF) { /* * CRLFs would be added by checkout: * check if we have "naked" LFs @@ -188,18 +188,19 @@ static int has_cr_in_index(const char *path) } static int crlf_to_git(const char *path, const char *src, size_t len, - struct strbuf *buf, enum action action, enum safe_crlf checksafe) + struct strbuf *buf, + enum crlf_action crlf_action, enum safe_crlf checksafe) { struct text_stat stats; char *dst; - if (action == CRLF_BINARY || - (action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE) || !len) + if (crlf_action == CRLF_BINARY || + (crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE) || !len) return 0; gather_stats(src, len, &stats); - if (action == CRLF_AUTO || action == CRLF_GUESS) { + if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) { /* * We're currently not going to even try to convert stuff * that has bare CR characters. Does anybody do that crazy @@ -214,7 +215,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len, if (is_binary(len, &stats)) return 0; - if (action == CRLF_GUESS) { + if (crlf_action == CRLF_GUESS) { /* * If the file in the index has any CR in it, do not convert. * This is the new safer autocrlf handling. @@ -224,7 +225,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len, } } - check_safe_crlf(path, action, &stats, checksafe); + check_safe_crlf(path, crlf_action, &stats, checksafe); /* Optimization: No CR? Nothing to convert, regardless. */ if (!stats.cr) @@ -234,7 +235,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len, if (strbuf_avail(buf) + buf->len < len) strbuf_grow(buf, len - buf->len); dst = buf->buf; - if (action == CRLF_AUTO || action == CRLF_GUESS) { + if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) { /* * If we guessed, we already know we rejected a file with * lone CR, and we can strip a CR without looking at what @@ -257,12 +258,12 @@ static int crlf_to_git(const char *path, const char *src, size_t len, } static int crlf_to_worktree(const char *path, const char *src, size_t len, - struct strbuf *buf, enum action action) + struct strbuf *buf, enum crlf_action crlf_action) { char *to_free = NULL; struct text_stat stats; - if (!len || determine_output_conversion(action) != EOL_CRLF) + if (!len || output_eol(crlf_action) != EOL_CRLF) return 0; gather_stats(src, len, &stats); @@ -275,8 +276,8 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len, if (stats.lf == stats.crlf) return 0; - if (action == CRLF_AUTO || action == CRLF_GUESS) { - if (action == CRLF_GUESS) { + if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) { + if (crlf_action == CRLF_GUESS) { /* If we have any CR or CRLF line endings, we do not touch it */ /* This is the new safer autocrlf-handling */ if (stats.cr > 0 || stats.crlf > 0) @@ -715,7 +716,7 @@ static int git_path_check_ident(const char *path, struct git_attr_check *check) return !!ATTR_TRUE(value); } -static enum action determine_action(enum action text_attr, enum eol eol_attr) +static enum crlf_action input_crlf_action(enum crlf_action text_attr, enum eol eol_attr) { if (text_attr == CRLF_BINARY) return CRLF_BINARY; @@ -730,7 +731,7 @@ int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst, enum safe_crlf checksafe) { struct git_attr_check check[5]; - enum action action = CRLF_GUESS; + enum crlf_action crlf_action = CRLF_GUESS; enum eol eol_attr = EOL_UNSET; int ident = 0, ret = 0; const char *filter = NULL; @@ -738,9 +739,9 @@ int convert_to_git(const char *path, const char *src, size_t len, setup_convert_check(check); if (!git_checkattr(path, ARRAY_SIZE(check), check)) { struct convert_driver *drv; - action = git_path_check_crlf(path, check + 4); - if (action == CRLF_GUESS) - action = git_path_check_crlf(path, check + 0); + crlf_action = git_path_check_crlf(path, check + 4); + if (crlf_action == CRLF_GUESS) + crlf_action = git_path_check_crlf(path, check + 0); ident = git_path_check_ident(path, check + 1); drv = git_path_check_convert(path, check + 2); eol_attr = git_path_check_eol(path, check + 3); @@ -753,8 +754,8 @@ int convert_to_git(const char *path, const char *src, size_t len, src = dst->buf; len = dst->len; } - action = determine_action(action, eol_attr); - ret |= crlf_to_git(path, src, len, dst, action, checksafe); + crlf_action = input_crlf_action(crlf_action, eol_attr); + ret |= crlf_to_git(path, src, len, dst, crlf_action, checksafe); if (ret) { src = dst->buf; len = dst->len; @@ -767,7 +768,7 @@ static int convert_to_working_tree_internal(const char *path, const char *src, int normalizing) { struct git_attr_check check[5]; - enum action action = CRLF_GUESS; + enum crlf_action crlf_action = CRLF_GUESS; enum eol eol_attr = EOL_UNSET; int ident = 0, ret = 0; const char *filter = NULL; @@ -775,9 +776,9 @@ static int convert_to_working_tree_internal(const char *path, const char *src, setup_convert_check(check); if (!git_checkattr(path, ARRAY_SIZE(check), check)) { struct convert_driver *drv; - action = git_path_check_crlf(path, check + 4); - if (action == CRLF_GUESS) - action = git_path_check_crlf(path, check + 0); + crlf_action = git_path_check_crlf(path, check + 4); + if (crlf_action == CRLF_GUESS) + crlf_action = git_path_check_crlf(path, check + 0); ident = git_path_check_ident(path, check + 1); drv = git_path_check_convert(path, check + 2); eol_attr = git_path_check_eol(path, check + 3); @@ -795,8 +796,8 @@ static int convert_to_working_tree_internal(const char *path, const char *src, * is a smudge filter. The filter might expect CRLFs. */ if (filter || !normalizing) { - action = determine_action(action, eol_attr); - ret |= crlf_to_worktree(path, src, len, dst, action); + crlf_action = input_crlf_action(crlf_action, eol_attr); + ret |= crlf_to_worktree(path, src, len, dst, crlf_action); if (ret) { src = dst->buf; len = dst->len; From 83295964b3289e957d028960f14a2b71348c39ed Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 9 May 2011 11:23:04 -0700 Subject: [PATCH 05/12] convert: make it safer to add conversion attributes The places that need to pass an array of "struct git_attr_check" needed to be careful to pass a large enough array and know what index each element lied. Make it safer and easier to code these. Besides, the hard-coded sequence of initializing various attributes was too ugly after we gained more than a few attributes. Signed-off-by: Junio C Hamano --- convert.c | 48 ++++++++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/convert.c b/convert.c index e0ee245153..a05820ba63 100644 --- a/convert.c +++ b/convert.c @@ -475,30 +475,6 @@ static int read_convert_config(const char *var, const char *value, void *cb) return 0; } -static void setup_convert_check(struct git_attr_check *check) -{ - static struct git_attr *attr_text; - static struct git_attr *attr_crlf; - static struct git_attr *attr_eol; - static struct git_attr *attr_ident; - static struct git_attr *attr_filter; - - if (!attr_text) { - attr_text = git_attr("text"); - attr_crlf = git_attr("crlf"); - attr_eol = git_attr("eol"); - attr_ident = git_attr("ident"); - attr_filter = git_attr("filter"); - user_convert_tail = &user_convert; - git_config(read_convert_config, NULL); - } - check[0].attr = attr_crlf; - check[1].attr = attr_ident; - check[2].attr = attr_filter; - check[3].attr = attr_eol; - check[4].attr = attr_text; -} - static int count_ident(const char *cp, unsigned long size) { /* @@ -727,10 +703,30 @@ static enum crlf_action input_crlf_action(enum crlf_action text_attr, enum eol e return text_attr; } +static const char *conv_attr_name[] = { + "crlf", "ident", "filter", "eol", "text", +}; +#define NUM_CONV_ATTRS ARRAY_SIZE(conv_attr_name) + +static void setup_convert_check(struct git_attr_check *check) +{ + int i; + static struct git_attr_check ccheck[NUM_CONV_ATTRS]; + + if (!ccheck[0].attr) { + for (i = 0; i < NUM_CONV_ATTRS; i++) + ccheck[i].attr = git_attr(conv_attr_name[i]); + user_convert_tail = &user_convert; + git_config(read_convert_config, NULL); + } + for (i = 0; i < NUM_CONV_ATTRS; i++) + check[i].attr = ccheck[i].attr; +} + int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst, enum safe_crlf checksafe) { - struct git_attr_check check[5]; + struct git_attr_check check[NUM_CONV_ATTRS]; enum crlf_action crlf_action = CRLF_GUESS; enum eol eol_attr = EOL_UNSET; int ident = 0, ret = 0; @@ -767,7 +763,7 @@ static int convert_to_working_tree_internal(const char *path, const char *src, size_t len, struct strbuf *dst, int normalizing) { - struct git_attr_check check[5]; + struct git_attr_check check[NUM_CONV_ATTRS]; enum crlf_action crlf_action = CRLF_GUESS; enum eol eol_attr = EOL_UNSET; int ident = 0, ret = 0; From 3bfba20dae16384cb7112268462bd01d30d4a698 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 9 May 2011 13:58:31 -0700 Subject: [PATCH 06/12] convert: make it harder to screw up adding a conversion attribute The current internal API requires the callers of setup_convert_check() to supply the git_attr_check structures (hence they need to know how many to allocate), but they grab the same set of attributes for given path. Define a new convert_attrs() API that fills a higher level information that the callers (convert_to_git and convert_to_working_tree) really want, and move the common code to interact with the attributes system to it. Signed-off-by: Junio C Hamano --- convert.c | 79 ++++++++++++++++++++++++++----------------------------- 1 file changed, 38 insertions(+), 41 deletions(-) diff --git a/convert.c b/convert.c index a05820ba63..efc7e07d47 100644 --- a/convert.c +++ b/convert.c @@ -703,12 +703,19 @@ static enum crlf_action input_crlf_action(enum crlf_action text_attr, enum eol e return text_attr; } +struct conv_attrs { + struct convert_driver *drv; + enum crlf_action crlf_action; + enum eol eol_attr; + int ident; +}; + static const char *conv_attr_name[] = { "crlf", "ident", "filter", "eol", "text", }; #define NUM_CONV_ATTRS ARRAY_SIZE(conv_attr_name) -static void setup_convert_check(struct git_attr_check *check) +static void convert_attrs(struct conv_attrs *ca, const char *path) { int i; static struct git_attr_check ccheck[NUM_CONV_ATTRS]; @@ -719,70 +726,60 @@ static void setup_convert_check(struct git_attr_check *check) user_convert_tail = &user_convert; git_config(read_convert_config, NULL); } - for (i = 0; i < NUM_CONV_ATTRS; i++) - check[i].attr = ccheck[i].attr; + + if (!git_checkattr(path, NUM_CONV_ATTRS, ccheck)) { + ca->crlf_action = git_path_check_crlf(path, ccheck + 4); + if (ca->crlf_action == CRLF_GUESS) + ca->crlf_action = git_path_check_crlf(path, ccheck + 0); + ca->ident = git_path_check_ident(path, ccheck + 1); + ca->drv = git_path_check_convert(path, ccheck + 2); + ca->eol_attr = git_path_check_eol(path, ccheck + 3); + } else { + ca->drv = NULL; + ca->crlf_action = CRLF_GUESS; + ca->eol_attr = EOL_UNSET; + ca->ident = 0; + } } int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst, enum safe_crlf checksafe) { - struct git_attr_check check[NUM_CONV_ATTRS]; - enum crlf_action crlf_action = CRLF_GUESS; - enum eol eol_attr = EOL_UNSET; - int ident = 0, ret = 0; + int ret = 0; const char *filter = NULL; + struct conv_attrs ca; - setup_convert_check(check); - if (!git_checkattr(path, ARRAY_SIZE(check), check)) { - struct convert_driver *drv; - crlf_action = git_path_check_crlf(path, check + 4); - if (crlf_action == CRLF_GUESS) - crlf_action = git_path_check_crlf(path, check + 0); - ident = git_path_check_ident(path, check + 1); - drv = git_path_check_convert(path, check + 2); - eol_attr = git_path_check_eol(path, check + 3); - if (drv && drv->clean) - filter = drv->clean; - } + convert_attrs(&ca, path); + if (ca.drv) + filter = ca.drv->clean; ret |= apply_filter(path, src, len, dst, filter); if (ret) { src = dst->buf; len = dst->len; } - crlf_action = input_crlf_action(crlf_action, eol_attr); - ret |= crlf_to_git(path, src, len, dst, crlf_action, checksafe); + ca.crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr); + ret |= crlf_to_git(path, src, len, dst, ca.crlf_action, checksafe); if (ret) { src = dst->buf; len = dst->len; } - return ret | ident_to_git(path, src, len, dst, ident); + return ret | ident_to_git(path, src, len, dst, ca.ident); } static int convert_to_working_tree_internal(const char *path, const char *src, size_t len, struct strbuf *dst, int normalizing) { - struct git_attr_check check[NUM_CONV_ATTRS]; - enum crlf_action crlf_action = CRLF_GUESS; - enum eol eol_attr = EOL_UNSET; - int ident = 0, ret = 0; + int ret = 0; const char *filter = NULL; + struct conv_attrs ca; - setup_convert_check(check); - if (!git_checkattr(path, ARRAY_SIZE(check), check)) { - struct convert_driver *drv; - crlf_action = git_path_check_crlf(path, check + 4); - if (crlf_action == CRLF_GUESS) - crlf_action = git_path_check_crlf(path, check + 0); - ident = git_path_check_ident(path, check + 1); - drv = git_path_check_convert(path, check + 2); - eol_attr = git_path_check_eol(path, check + 3); - if (drv && drv->smudge) - filter = drv->smudge; - } + convert_attrs(&ca, path); + if (ca.drv) + filter = ca.drv->smudge; - ret |= ident_to_worktree(path, src, len, dst, ident); + ret |= ident_to_worktree(path, src, len, dst, ca.ident); if (ret) { src = dst->buf; len = dst->len; @@ -792,8 +789,8 @@ static int convert_to_working_tree_internal(const char *path, const char *src, * is a smudge filter. The filter might expect CRLFs. */ if (filter || !normalizing) { - crlf_action = input_crlf_action(crlf_action, eol_attr); - ret |= crlf_to_worktree(path, src, len, dst, crlf_action); + ca.crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr); + ret |= crlf_to_worktree(path, src, len, dst, ca.crlf_action); if (ret) { src = dst->buf; len = dst->len; From 4dd1fbc7b1df0030f813a05cee19cad2c7a9cbf9 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 8 May 2011 01:47:35 -0700 Subject: [PATCH 07/12] Bigfile: teach "git add" to send a large file straight to a pack When adding a new content to the repository, we have always slurped the blob in its entirety in-core first, and computed the object name and compressed it into a loose object file. Handling large binary files (e.g. video and audio asset for games) has been problematic because of this design. At the middle level of "git add" callchain is an internal API index_fd() that takes an open file descriptor to read from the working tree file being added with its size. Teach it to call out to fast-import when adding a large blob. The write-out codepath in entry.c::write_entry() should be taught to stream, instead of reading everything in core. This should not be so hard to implement, especially if we limit ourselves only to loose object files and non-delta representation in packfiles. Signed-off-by: Junio C Hamano --- sha1_file.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++- t/t1050-large.sh | 27 ++++++++++++++++ 2 files changed, 110 insertions(+), 1 deletion(-) create mode 100755 t/t1050-large.sh diff --git a/sha1_file.c b/sha1_file.c index 49416b0291..f0ca6a1749 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -11,6 +11,7 @@ #include "pack.h" #include "blob.h" #include "commit.h" +#include "run-command.h" #include "tag.h" #include "tree.h" #include "tree-walk.h" @@ -2658,6 +2659,85 @@ static int index_core(unsigned char *sha1, int fd, size_t size, return ret; } +/* + * This creates one packfile per large blob, because the caller + * immediately wants the result sha1, and fast-import can report the + * object name via marks mechanism only by closing the created + * packfile. + * + * This also bypasses the usual "convert-to-git" dance, and that is on + * purpose. We could write a streaming version of the converting + * functions and insert that before feeding the data to fast-import + * (or equivalent in-core API described above), but the primary + * motivation for trying to stream from the working tree file and to + * avoid mmaping it in core is to deal with large binary blobs, and + * by definition they do _not_ want to get any conversion. + */ +static int index_stream(unsigned char *sha1, int fd, size_t size, + enum object_type type, const char *path, + unsigned flags) +{ + struct child_process fast_import; + char export_marks[512]; + const char *argv[] = { "fast-import", "--quiet", export_marks, NULL }; + char tmpfile[512]; + char fast_import_cmd[512]; + char buf[512]; + int len, tmpfd; + + strcpy(tmpfile, git_path("hashstream_XXXXXX")); + tmpfd = git_mkstemp_mode(tmpfile, 0600); + if (tmpfd < 0) + die_errno("cannot create tempfile: %s", tmpfile); + if (close(tmpfd)) + die_errno("cannot close tempfile: %s", tmpfile); + sprintf(export_marks, "--export-marks=%s", tmpfile); + + memset(&fast_import, 0, sizeof(fast_import)); + fast_import.in = -1; + fast_import.argv = argv; + fast_import.git_cmd = 1; + if (start_command(&fast_import)) + die_errno("index-stream: git fast-import failed"); + + len = sprintf(fast_import_cmd, "blob\nmark :1\ndata %lu\n", + (unsigned long) size); + write_or_whine(fast_import.in, fast_import_cmd, len, + "index-stream: feeding fast-import"); + while (size) { + char buf[10240]; + size_t sz = size < sizeof(buf) ? size : sizeof(buf); + size_t actual; + + actual = read_in_full(fd, buf, sz); + if (actual < 0) + die_errno("index-stream: reading input"); + if (write_in_full(fast_import.in, buf, actual) != actual) + die_errno("index-stream: feeding fast-import"); + size -= actual; + } + if (close(fast_import.in)) + die_errno("index-stream: closing fast-import"); + if (finish_command(&fast_import)) + die_errno("index-stream: finishing fast-import"); + + tmpfd = open(tmpfile, O_RDONLY); + if (tmpfd < 0) + die_errno("index-stream: cannot open fast-import mark"); + len = read(tmpfd, buf, sizeof(buf)); + if (len < 0) + die_errno("index-stream: reading fast-import mark"); + if (close(tmpfd) < 0) + die_errno("index-stream: closing fast-import mark"); + if (unlink(tmpfile)) + die_errno("index-stream: unlinking fast-import mark"); + if (len != 44 || + memcmp(":1 ", buf, 3) || + get_sha1_hex(buf + 3, sha1)) + die_errno("index-stream: unexpected fast-import mark: <%s>", buf); + return 0; +} + int index_fd(unsigned char *sha1, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags) { @@ -2666,8 +2746,10 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, if (!S_ISREG(st->st_mode)) ret = index_pipe(sha1, fd, type, path, flags); - else + else if (size <= big_file_threshold || type != OBJ_BLOB) ret = index_core(sha1, fd, size, type, path, flags); + else + ret = index_stream(sha1, fd, size, type, path, flags); close(fd); return ret; } diff --git a/t/t1050-large.sh b/t/t1050-large.sh new file mode 100755 index 0000000000..deba111bd7 --- /dev/null +++ b/t/t1050-large.sh @@ -0,0 +1,27 @@ +#!/bin/sh +# Copyright (c) 2011, Google Inc. + +test_description='adding and checking out large blobs' + +. ./test-lib.sh + +test_expect_success setup ' + git config core.bigfilethreshold 200k && + echo X | dd of=large bs=1k seek=2000 +' + +test_expect_success 'add a large file' ' + git add large && + # make sure we got a packfile and no loose objects + test -f .git/objects/pack/pack-*.pack && + test ! -f .git/objects/??/?????????????????????????????????????? +' + +test_expect_success 'checkout a large file' ' + large=$(git rev-parse :large) && + git update-index --add --cacheinfo 100644 $large another && + git checkout another && + cmp large another ;# this must not be test_cmp +' + +test_done From fea33a1ef37a5891ebd1fcf6018849150e7b91cb Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 15 May 2011 12:54:50 -0700 Subject: [PATCH 08/12] Declare lookup_replace_object() in cache.h, not in commit.h The declaration is misplaced as the replace API is supposed to affect not just commits, but all types of objects. Signed-off-by: Junio C Hamano --- cache.h | 1 + commit.h | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/cache.h b/cache.h index 2b34116624..e09cf75013 100644 --- a/cache.h +++ b/cache.h @@ -763,6 +763,7 @@ static inline void *read_sha1_file(const unsigned char *sha1, enum object_type * { return read_sha1_file_repl(sha1, type, size, NULL); } +extern const unsigned char *lookup_replace_object(const unsigned char *sha1); extern int hash_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1); extern int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *return_sha1); extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *); diff --git a/commit.h b/commit.h index b3c3bb70c5..f251e75a5b 100644 --- a/commit.h +++ b/commit.h @@ -145,8 +145,6 @@ struct commit_graft *read_graft_line(char *buf, int len); int register_commit_graft(struct commit_graft *, int); struct commit_graft *lookup_commit_graft(const unsigned char *sha1); -const unsigned char *lookup_replace_object(const unsigned char *sha1); - extern struct commit_list *get_merge_bases(struct commit *rev1, struct commit *rev2, int cleanup); extern struct commit_list *get_merge_bases_many(struct commit *one, int n, struct commit **twos, int cleanup); extern struct commit_list *get_octopus_merge_bases(struct commit_list *in); From abb25ac365791e16563dfd09e4ecd3e7e4dcf6b1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 15 May 2011 12:54:51 -0700 Subject: [PATCH 09/12] t6050: make sure we test not just commit replacement The replacement mechanism should affect all types of objects not just commits, so make sure it deals with at least a blob. Signed-off-by: Junio C Hamano --- t/t6050-replace.sh | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/t/t6050-replace.sh b/t/t6050-replace.sh index ae2194e07d..5c87f28e4e 100755 --- a/t/t6050-replace.sh +++ b/t/t6050-replace.sh @@ -236,6 +236,20 @@ test_expect_success 'index-pack and replacements' ' git index-pack test-*.pack ' -# -# +test_expect_success 'not just commits' ' + echo replaced >file && + git add file && + REPLACED=$(git rev-parse :file) && + mv file file.replaced && + + echo original >file && + git add file && + ORIGINAL=$(git rev-parse :file) && + git update-ref refs/replace/$ORIGINAL $REPLACED && + mv file file.original && + + git checkout file && + test_cmp file.replaced file +' + test_done From 4bbf5a2615420ac50c696b72dc303727e6218562 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 15 May 2011 12:54:52 -0700 Subject: [PATCH 10/12] read_sha1_file(): get rid of read_sha1_file_repl() madness Most callers want to silently get a replacement object, and they do not care what the real name of the replacement object is. Worse yet, no sane interface to return the underlying object without replacement is provided. Remove the function and make only the few callers that want the name of the replacement object find it themselves. Signed-off-by: Junio C Hamano --- builtin/mktag.c | 4 ++-- cache.h | 6 +----- object.c | 4 ++-- sha1_file.c | 12 ++++-------- 4 files changed, 9 insertions(+), 17 deletions(-) diff --git a/builtin/mktag.c b/builtin/mktag.c index 324a267163..640ab64f41 100644 --- a/builtin/mktag.c +++ b/builtin/mktag.c @@ -23,8 +23,8 @@ static int verify_object(const unsigned char *sha1, const char *expected_type) int ret = -1; enum object_type type; unsigned long size; - const unsigned char *repl; - void *buffer = read_sha1_file_repl(sha1, &type, &size, &repl); + void *buffer = read_sha1_file(sha1, &type, &size); + const unsigned char *repl = lookup_replace_object(sha1); if (buffer) { if (type == type_from_string(expected_type)) diff --git a/cache.h b/cache.h index e09cf75013..a9ae100542 100644 --- a/cache.h +++ b/cache.h @@ -758,11 +758,7 @@ int offset_1st_component(const char *path); /* Read and unpack a sha1 file into memory, write memory to a sha1 file */ extern int sha1_object_info(const unsigned char *, unsigned long *); -extern void *read_sha1_file_repl(const unsigned char *sha1, enum object_type *type, unsigned long *size, const unsigned char **replacement); -static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size) -{ - return read_sha1_file_repl(sha1, type, size, NULL); -} +extern void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size); extern const unsigned char *lookup_replace_object(const unsigned char *sha1); extern int hash_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1); extern int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *return_sha1); diff --git a/object.c b/object.c index 7e1f2bbed2..31976b5d70 100644 --- a/object.c +++ b/object.c @@ -188,8 +188,8 @@ struct object *parse_object(const unsigned char *sha1) unsigned long size; enum object_type type; int eaten; - const unsigned char *repl; - void *buffer = read_sha1_file_repl(sha1, &type, &size, &repl); + const unsigned char *repl = lookup_replace_object(sha1); + void *buffer = read_sha1_file(sha1, &type, &size); if (buffer) { struct object *obj; diff --git a/sha1_file.c b/sha1_file.c index 889fe71830..5d80febde2 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2206,10 +2206,9 @@ static void *read_object(const unsigned char *sha1, enum object_type *type, * deal with them should arrange to call read_object() and give error * messages themselves. */ -void *read_sha1_file_repl(const unsigned char *sha1, - enum object_type *type, - unsigned long *size, - const unsigned char **replacement) +void *read_sha1_file(const unsigned char *sha1, + enum object_type *type, + unsigned long *size) { const unsigned char *repl = lookup_replace_object(sha1); void *data; @@ -2218,11 +2217,8 @@ void *read_sha1_file_repl(const unsigned char *sha1, errno = 0; data = read_object(repl, type, size); - if (data) { - if (replacement) - *replacement = repl; + if (data) return data; - } if (errno && errno != ENOENT) die_errno("failed to read object %s", sha1_to_hex(sha1)); From e1111cef23cef1d48e9e7f222db87d58c1d51ece Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 15 May 2011 12:54:53 -0700 Subject: [PATCH 11/12] inline lookup_replace_object() calls In a repository without object replacement, lookup_replace_object() should be a no-op. Check the flag "read_replace_refs" on the side of the caller, and bypess a function call when we know we are not dealing with replacement. Also, even when we are set up to replace objects, if we do not find any replacement defined, flip that flag off to avoid function call overhead for all the later object accesses. As this change the semantics of the flag from "do we need read the replacement definition?" to "do we need to check with the lookup table?" the flag needs to be renamed later to something saner, e.g. "use_replace", when the codebase is calmer, but not now. Signed-off-by: Junio C Hamano --- cache.h | 12 ++++++++++-- environment.c | 2 +- replace_object.c | 4 +++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/cache.h b/cache.h index a9ae100542..c10a91d90a 100644 --- a/cache.h +++ b/cache.h @@ -756,10 +756,18 @@ char *strip_path_suffix(const char *path, const char *suffix); int daemon_avoid_alias(const char *path); int offset_1st_component(const char *path); +/* object replacement */ +extern void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size); +extern const unsigned char *do_lookup_replace_object(const unsigned char *sha1); +static inline const unsigned char *lookup_replace_object(const unsigned char *sha1) +{ + if (!read_replace_refs) + return sha1; + return do_lookup_replace_object(sha1); +} + /* Read and unpack a sha1 file into memory, write memory to a sha1 file */ extern int sha1_object_info(const unsigned char *, unsigned long *); -extern void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size); -extern const unsigned char *lookup_replace_object(const unsigned char *sha1); extern int hash_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1); extern int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *return_sha1); extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *); diff --git a/environment.c b/environment.c index 40185bc854..91828201d8 100644 --- a/environment.c +++ b/environment.c @@ -42,7 +42,7 @@ const char *editor_program; const char *askpass_program; const char *excludes_file; enum auto_crlf auto_crlf = AUTO_CRLF_FALSE; -int read_replace_refs = 1; +int read_replace_refs = 1; /* NEEDSWORK: rename to use_replace_refs */ enum eol eol = EOL_UNSET; enum safe_crlf safe_crlf = SAFE_CRLF_WARN; unsigned whitespace_rule_cfg = WS_DEFAULT_RULE; diff --git a/replace_object.c b/replace_object.c index 7c6c7544ad..d0b1548726 100644 --- a/replace_object.c +++ b/replace_object.c @@ -85,12 +85,14 @@ static void prepare_replace_object(void) for_each_replace_ref(register_replace_ref, NULL); replace_object_prepared = 1; + if (!replace_object_nr) + read_replace_refs = 0; } /* We allow "recursive" replacement. Only within reason, though */ #define MAXREPLACEDEPTH 5 -const unsigned char *lookup_replace_object(const unsigned char *sha1) +const unsigned char *do_lookup_replace_object(const unsigned char *sha1) { int pos, depth = MAXREPLACEDEPTH; const unsigned char *cur = sha1; From 5bf29b950063c8fa2f3666cb6cf2ca20be61f3d1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 15 May 2011 12:54:54 -0700 Subject: [PATCH 12/12] read_sha1_file(): allow selective bypassing of replacement mechanism The way "object replacement" mechanism was tucked to the read_sha1_file() interface was suboptimal in a couple of ways: - Callers that want it to die with useful diagnosis upon seeing a corrupt object does not have a way to say that they do not want any object replacement. - Callers who do not want it to die but want to handle the errors themselves are told to arrange to call read_object(), but the function does not use the replacement mechanism, and also it is a file scope static function that not many callers can call to begin with. This adds a read_sha1_file_extended() that takes a set of flags; the callers of read_sha1_file() passes a flag READ_SHA1_FILE_REPLACE to ask for object replacement mechanism to kick in. Later, we could add another flag bit to tell the function to return an error instead of dying and then remove the misguided "call read_object() yourself". Signed-off-by: Junio C Hamano --- cache.h | 7 ++++++- sha1_file.c | 10 ++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/cache.h b/cache.h index c10a91d90a..5f1f5c3395 100644 --- a/cache.h +++ b/cache.h @@ -757,7 +757,12 @@ int daemon_avoid_alias(const char *path); int offset_1st_component(const char *path); /* object replacement */ -extern void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size); +#define READ_SHA1_FILE_REPLACE 1 +extern void *read_sha1_file_extended(const unsigned char *sha1, enum object_type *type, unsigned long *size, unsigned flag); +static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size) +{ + return read_sha1_file_extended(sha1, type, size, READ_SHA1_FILE_REPLACE); +} extern const unsigned char *do_lookup_replace_object(const unsigned char *sha1); static inline const unsigned char *lookup_replace_object(const unsigned char *sha1) { diff --git a/sha1_file.c b/sha1_file.c index 5d80febde2..7e6e976c23 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2206,14 +2206,16 @@ static void *read_object(const unsigned char *sha1, enum object_type *type, * deal with them should arrange to call read_object() and give error * messages themselves. */ -void *read_sha1_file(const unsigned char *sha1, - enum object_type *type, - unsigned long *size) +void *read_sha1_file_extended(const unsigned char *sha1, + enum object_type *type, + unsigned long *size, + unsigned flag) { - const unsigned char *repl = lookup_replace_object(sha1); void *data; char *path; const struct packed_git *p; + const unsigned char *repl = (flag & READ_SHA1_FILE_REPLACE) + ? lookup_replace_object(sha1) : sha1; errno = 0; data = read_object(repl, type, size);