From a1142963710fd71e5e6e0feb0c4020e977301af7 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Wed, 13 May 2020 00:53:41 +0000 Subject: [PATCH 01/44] t1050: match object ID paths in a hash-insensitive way The pattern here looking for failures is specific to SHA-1. Let's create a variable that matches the regex or glob pattern for a path within the objects directory. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t1050-large.sh | 2 +- t/test-lib.sh | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/t/t1050-large.sh b/t/t1050-large.sh index 184b479a21..7f88ea07c2 100755 --- a/t/t1050-large.sh +++ b/t/t1050-large.sh @@ -64,7 +64,7 @@ test_expect_success 'add a large file or two' ' test $count = 1 && cnt=$(git show-index <"$idx" | wc -l) && test $cnt = 2 && - for l in .git/objects/??/?????????????????????????????????????? + for l in .git/objects/$OIDPATH_REGEX do test_path_is_file "$l" || continue bad=t diff --git a/t/test-lib.sh b/t/test-lib.sh index baf94546da..77e9a60fcb 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1428,6 +1428,7 @@ test_oid_init ZERO_OID=$(test_oid zero) OID_REGEX=$(echo $ZERO_OID | sed -e 's/0/[0-9a-f]/g') +OIDPATH_REGEX=$(test_oid_to_path $ZERO_OID | sed -e 's/0/[0-9a-f]/g') EMPTY_TREE=$(test_oid empty_tree) EMPTY_BLOB=$(test_oid empty_blob) _z40=$ZERO_OID From b8615c3c63246ae4340239fbad2e6b64cbeb0fa5 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Wed, 13 May 2020 00:53:42 +0000 Subject: [PATCH 02/44] Documentation: document v1 protocol object-format capability Document a capability that indicates which hash algorithms are in use by both sides of a remote connection. Use the term "object-format", since this is the term used for the repository extension as well. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- Documentation/technical/protocol-capabilities.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Documentation/technical/protocol-capabilities.txt b/Documentation/technical/protocol-capabilities.txt index 2b267c0da6..36ccd14f97 100644 --- a/Documentation/technical/protocol-capabilities.txt +++ b/Documentation/technical/protocol-capabilities.txt @@ -176,6 +176,21 @@ agent strings are purely informative for statistics and debugging purposes, and MUST NOT be used to programmatically assume the presence or absence of particular features. +object-format +------------- + +This capability, which takes a hash algorithm as an argument, indicates +that the server supports the given hash algorithms. It may be sent +multiple times; if so, the first one given is the one used in the ref +advertisement. + +When provided by the client, this indicates that it intends to use the +given hash algorithm to communicate. The algorithm provided must be one +that the server supports. + +If this capability is not provided, it is assumed that the only +supported algorithm is SHA-1. + symref ------ From 92315e50b275deee8e84d28ee1ff1ad555a5de36 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:58:49 +0000 Subject: [PATCH 03/44] connect: have ref processing code take struct packet_reader In a future patch, we'll want to access multiple members from struct packet_reader when parsing references. Therefore, have the ref parsing code take pointers to struct reader instead of having to pass multiple arguments to each function. Rename the len variable to "linelen" to make it clearer what the variable does in light of the variable change. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- connect.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/connect.c b/connect.c index 23013c6344..ccc5274189 100644 --- a/connect.c +++ b/connect.c @@ -204,17 +204,19 @@ static void annotate_refs_with_symref_info(struct ref *ref) string_list_clear(&symref, 0); } -static void process_capabilities(const char *line, int *len) +static void process_capabilities(struct packet_reader *reader, int *linelen) { + const char *line = reader->line; int nul_location = strlen(line); - if (nul_location == *len) + if (nul_location == *linelen) return; server_capabilities_v1 = xstrdup(line + nul_location + 1); - *len = nul_location; + *linelen = nul_location; } -static int process_dummy_ref(const char *line) +static int process_dummy_ref(const struct packet_reader *reader) { + const char *line = reader->line; struct object_id oid; const char *name; @@ -234,9 +236,11 @@ static void check_no_capabilities(const char *line, int len) line + strlen(line)); } -static int process_ref(const char *line, int len, struct ref ***list, - unsigned int flags, struct oid_array *extra_have) +static int process_ref(const struct packet_reader *reader, int len, + struct ref ***list, unsigned int flags, + struct oid_array *extra_have) { + const char *line = reader->line; struct object_id old_oid; const char *name; @@ -260,9 +264,10 @@ static int process_ref(const char *line, int len, struct ref ***list, return 1; } -static int process_shallow(const char *line, int len, +static int process_shallow(const struct packet_reader *reader, int len, struct oid_array *shallow_points) { + const char *line = reader->line; const char *arg; struct object_id old_oid; @@ -315,20 +320,20 @@ struct ref **get_remote_heads(struct packet_reader *reader, switch (state) { case EXPECTING_FIRST_REF: - process_capabilities(reader->line, &len); - if (process_dummy_ref(reader->line)) { + process_capabilities(reader, &len); + if (process_dummy_ref(reader)) { state = EXPECTING_SHALLOW; break; } state = EXPECTING_REF; /* fallthrough */ case EXPECTING_REF: - if (process_ref(reader->line, len, &list, flags, extra_have)) + if (process_ref(reader, len, &list, flags, extra_have)) break; state = EXPECTING_SHALLOW; /* fallthrough */ case EXPECTING_SHALLOW: - if (process_shallow(reader->line, len, shallow_points)) + if (process_shallow(reader, len, shallow_points)) break; die(_("protocol error: unexpected '%s'"), reader->line); case EXPECTING_DONE: From 14570dc67d2a500dfb9f33a7445bdbd6133af4ac Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:58:50 +0000 Subject: [PATCH 04/44] wrapper: add function to compare strings with different NUL termination When parsing capabilities for the pack protocol, there are times we'll want to compare the value of a capability to a NUL-terminated string. Since the data we're reading will be space-terminated, not NUL-terminated, we need a function that compares the two strings, but also checks that they're the same length. Otherwise, if we used strncmp to compare these strings, we might accidentally accept a parameter that was a prefix of the expected value. Add a function, xstrncmpz, that takes a NUL-terminated string and a non-NUL-terminated string, plus a length, and compares them, ensuring that they are the same length. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- git-compat-util.h | 6 ++++++ wrapper.c | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/git-compat-util.h b/git-compat-util.h index 8ba576e81e..a3dcfc8613 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -868,6 +868,12 @@ char *xgetcwd(void); FILE *fopen_for_writing(const char *path); FILE *fopen_or_warn(const char *path, const char *mode); +/* + * Like strncmp, but only return zero if s is NUL-terminated and exactly len + * characters long. If it is not, consider it greater than t. + */ +int xstrncmpz(const char *s, const char *t, size_t len); + /* * FREE_AND_NULL(ptr) is like free(ptr) followed by ptr = NULL. Note * that ptr is used twice, so don't pass e.g. ptr++. diff --git a/wrapper.c b/wrapper.c index 3a1c0e0526..4ff4a9c3db 100644 --- a/wrapper.c +++ b/wrapper.c @@ -105,6 +105,14 @@ char *xstrndup(const char *str, size_t len) return xmemdupz(str, p ? p - str : len); } +int xstrncmpz(const char *s, const char *t, size_t len) +{ + int res = strncmp(s, t, len); + if (res) + return res; + return s[len] == '\0' ? 0 : 1; +} + void *xrealloc(void *ptr, size_t size) { void *ret; From bf30dbf82611e3bb7e2bbac1d38bb38ad10c8636 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:58:51 +0000 Subject: [PATCH 05/44] remote: advertise the object-format capability on the server side Advertise the current hash algorithm in use by using the object-format capability as part of the ref advertisement. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 1 + upload-pack.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index d37ab776b3..a4159b559e 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -248,6 +248,7 @@ static void show_ref(const char *path, const struct object_id *oid) strbuf_addf(&cap, " push-cert=%s", push_cert_nonce); if (advertise_push_options) strbuf_addstr(&cap, " push-options"); + strbuf_addf(&cap, " object-format=%s", the_hash_algo->name); strbuf_addf(&cap, " agent=%s", git_user_agent_sanitized()); packet_write_fmt(1, "%s %s%c%s\n", oid_to_hex(oid), path, 0, cap.buf); diff --git a/upload-pack.c b/upload-pack.c index 902d0ad5e1..df6cb51db7 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -1005,7 +1005,7 @@ static int send_ref(const char *refname, const struct object_id *oid, struct strbuf symref_info = STRBUF_INIT; format_symref_info(&symref_info, cb_data); - packet_write_fmt(1, "%s %s%c%s%s%s%s%s%s agent=%s\n", + packet_write_fmt(1, "%s %s%c%s%s%s%s%s%s object-format=%s agent=%s\n", oid_to_hex(oid), refname_nons, 0, capabilities, (allow_unadvertised_object_request & ALLOW_TIP_SHA1) ? @@ -1015,6 +1015,7 @@ static int send_ref(const char *refname, const struct object_id *oid, stateless_rpc ? " no-done" : "", symref_info.buf, allow_filter ? " filter" : "", + the_hash_algo->name, git_user_agent_sanitized()); strbuf_release(&symref_info); } else { From 2c6a403d96cd5d31f1638679502f06e2b953647f Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:58:52 +0000 Subject: [PATCH 06/44] connect: add function to parse multiple v1 capability values In a capability response, we can have multiple symref entries. In the future, we will also allow for multiple hash algorithms to be specified. To avoid duplication, expand the parse_feature_value function to take an optional offset where the parsing should begin next time. Add a wrapper function that allows us to query the next server feature value, and use it in the existing symref parsing code. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- connect.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/connect.c b/connect.c index ccc5274189..2b55a32d4d 100644 --- a/connect.c +++ b/connect.c @@ -18,7 +18,8 @@ static char *server_capabilities_v1; static struct argv_array server_capabilities_v2 = ARGV_ARRAY_INIT; -static const char *parse_feature_value(const char *, const char *, int *); +static const char *parse_feature_value(const char *, const char *, int *, int *); +static const char *next_server_feature_value(const char *feature, int *len, int *offset); static int check_ref(const char *name, unsigned int flags) { @@ -180,17 +181,16 @@ reject: static void annotate_refs_with_symref_info(struct ref *ref) { struct string_list symref = STRING_LIST_INIT_DUP; - const char *feature_list = server_capabilities_v1; + int offset = 0; - while (feature_list) { + while (1) { int len; const char *val; - val = parse_feature_value(feature_list, "symref", &len); + val = next_server_feature_value("symref", &len, &offset); if (!val) break; parse_one_symref_info(&symref, val, len); - feature_list = val + 1; } string_list_sort(&symref); @@ -452,7 +452,7 @@ struct ref **get_remote_refs(int fd_out, struct packet_reader *reader, return list; } -static const char *parse_feature_value(const char *feature_list, const char *feature, int *lenp) +static const char *parse_feature_value(const char *feature_list, const char *feature, int *lenp, int *offset) { int len; @@ -460,6 +460,8 @@ static const char *parse_feature_value(const char *feature_list, const char *fea return NULL; len = strlen(feature); + if (offset) + feature_list += *offset; while (*feature_list) { const char *found = strstr(feature_list, feature); if (!found) @@ -474,9 +476,14 @@ static const char *parse_feature_value(const char *feature_list, const char *fea } /* feature with a value (e.g., "agent=git/1.2.3") */ else if (*value == '=') { + int end; + value++; + end = strcspn(value, " \t\n"); if (lenp) - *lenp = strcspn(value, " \t\n"); + *lenp = end; + if (offset) + *offset = value + end - feature_list; return value; } /* @@ -491,12 +498,17 @@ static const char *parse_feature_value(const char *feature_list, const char *fea int parse_feature_request(const char *feature_list, const char *feature) { - return !!parse_feature_value(feature_list, feature, NULL); + return !!parse_feature_value(feature_list, feature, NULL, NULL); +} + +static const char *next_server_feature_value(const char *feature, int *len, int *offset) +{ + return parse_feature_value(server_capabilities_v1, feature, len, offset); } const char *server_feature_value(const char *feature, int *len) { - return parse_feature_value(server_capabilities_v1, feature, len); + return parse_feature_value(server_capabilities_v1, feature, len, NULL); } int server_supports(const char *feature) From 1349ffed6dfa8ddcf9f48ede3b9cfd16fdde16fc Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:58:53 +0000 Subject: [PATCH 07/44] connect: add function to fetch value of a v2 server capability So far in protocol v2, all of our server capabilities that have values have not had values that we've been interested in parsing. For example, we receive but ignore the agent value. However, in a future commit, we're going to want to parse out the value of a server capability. To make this easy, add a function, server_feature_v2, that can fetch the value provided as part of the server capability. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- connect.c | 15 +++++++++++++++ connect.h | 1 + 2 files changed, 16 insertions(+) diff --git a/connect.c b/connect.c index 2b55a32d4d..ad0e4e8e56 100644 --- a/connect.c +++ b/connect.c @@ -84,6 +84,21 @@ int server_supports_v2(const char *c, int die_on_error) return 0; } +int server_feature_v2(const char *c, const char **v) +{ + int i; + + for (i = 0; i < server_capabilities_v2.argc; i++) { + const char *out; + if (skip_prefix(server_capabilities_v2.argv[i], c, &out) && + (*out == '=')) { + *v = out + 1; + return 1; + } + } + return 0; +} + int server_supports_feature(const char *c, const char *feature, int die_on_error) { diff --git a/connect.h b/connect.h index 5f2382e018..4d76a6017d 100644 --- a/connect.h +++ b/connect.h @@ -19,6 +19,7 @@ struct packet_reader; enum protocol_version discover_version(struct packet_reader *reader); int server_supports_v2(const char *c, int die_on_error); +int server_feature_v2(const char *c, const char **v); int server_supports_feature(const char *c, const char *feature, int die_on_error); From 9a9f0d3fc0888599723812be62fa2d7b3cc4d2d6 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:58:54 +0000 Subject: [PATCH 08/44] pkt-line: add a member for hash algorithm Add a member for the hash algorithm currently in use to the packet reader so it can parse references correctly. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- pkt-line.c | 1 + pkt-line.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/pkt-line.c b/pkt-line.c index a0e87b1e81..a4aea075de 100644 --- a/pkt-line.c +++ b/pkt-line.c @@ -479,6 +479,7 @@ void packet_reader_init(struct packet_reader *reader, int fd, reader->buffer_size = sizeof(packet_buffer); reader->options = options; reader->me = "git"; + reader->hash_algo = &hash_algos[GIT_HASH_SHA1]; } enum packet_read_status packet_reader_read(struct packet_reader *reader) diff --git a/pkt-line.h b/pkt-line.h index fef3a0d792..4cd9435e9a 100644 --- a/pkt-line.h +++ b/pkt-line.h @@ -166,6 +166,9 @@ struct packet_reader { unsigned use_sideband : 1; const char *me; + + /* hash algorithm in use */ + const struct git_hash_algo *hash_algo; }; /* From 7c97af4d64100bf9ce4b335ee91e743378e2e181 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:58:55 +0000 Subject: [PATCH 09/44] transport: add a hash algorithm member When connecting to a remote system, we need to know what hash algorithm it will be using to talk to us. Add a hash_algo member to struct transport and add a function to read this data from the transport object. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- transport.c | 8 ++++++++ transport.h | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/transport.c b/transport.c index 15f5ba4e8f..b43d985f90 100644 --- a/transport.c +++ b/transport.c @@ -311,6 +311,7 @@ static struct ref *handshake(struct transport *transport, int for_push, BUG("unknown protocol version"); } data->got_remote_heads = 1; + transport->hash_algo = reader.hash_algo; if (reader.line_peeked) BUG("buffer must be empty at the end of handshake()"); @@ -996,9 +997,16 @@ struct transport *transport_get(struct remote *remote, const char *url) ret->smart_options->receivepack = remote->receivepack; } + ret->hash_algo = &hash_algos[GIT_HASH_SHA1]; + return ret; } +const struct git_hash_algo *transport_get_hash_algo(struct transport *transport) +{ + return transport->hash_algo; +} + int transport_set_option(struct transport *transport, const char *name, const char *value) { diff --git a/transport.h b/transport.h index 4298c855be..2a9f96c05a 100644 --- a/transport.h +++ b/transport.h @@ -115,6 +115,8 @@ struct transport { struct git_transport_options *smart_options; enum transport_family family; + + const struct git_hash_algo *hash_algo; }; #define TRANSPORT_PUSH_ALL (1<<0) @@ -243,6 +245,12 @@ int transport_push(struct repository *repo, const struct ref *transport_get_remote_refs(struct transport *transport, const struct argv_array *ref_prefixes); +/* + * Fetch the hash algorithm used by a remote. + * + * This can only be called after fetching the remote refs. + */ +const struct git_hash_algo *transport_get_hash_algo(struct transport *transport); int transport_fetch_refs(struct transport *transport, struct ref *refs); void transport_unlock_pack(struct transport *transport); int transport_disconnect(struct transport *transport); From 122037c2edec0c2bbcbfe52679fddc438165ab54 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:58:56 +0000 Subject: [PATCH 10/44] connect: add function to detect supported v1 hash functions Add a function, server_supports_hash, to see if the remote server supports a particular hash algorithm when speaking protocol v1. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- connect.c | 22 ++++++++++++++++++++++ connect.h | 1 + 2 files changed, 23 insertions(+) diff --git a/connect.c b/connect.c index ad0e4e8e56..eaa13b41bb 100644 --- a/connect.c +++ b/connect.c @@ -511,6 +511,28 @@ static const char *parse_feature_value(const char *feature_list, const char *fea return NULL; } +int server_supports_hash(const char *desired, int *feature_supported) +{ + int offset = 0; + int len; + const char *hash; + + hash = next_server_feature_value("object-format", &len, &offset); + if (feature_supported) + *feature_supported = !!hash; + if (!hash) { + hash = hash_algos[GIT_HASH_SHA1].name; + len = strlen(hash); + } + while (hash) { + if (!xstrncmpz(desired, hash, len)) + return 1; + + hash = next_server_feature_value("object-format", &len, &offset); + } + return 0; +} + int parse_feature_request(const char *feature_list, const char *feature) { return !!parse_feature_value(feature_list, feature, NULL, NULL); diff --git a/connect.h b/connect.h index 4d76a6017d..fc75d6a457 100644 --- a/connect.h +++ b/connect.h @@ -18,6 +18,7 @@ int url_is_local_not_ssh(const char *url); struct packet_reader; enum protocol_version discover_version(struct packet_reader *reader); +int server_supports_hash(const char *desired, int *feature_supported); int server_supports_v2(const char *c, int die_on_error); int server_feature_v2(const char *c, const char **v); int server_supports_feature(const char *c, const char *feature, From 82db03abbbce3cfe28f42a16a16c2b5723974954 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:58:57 +0000 Subject: [PATCH 11/44] send-pack: detect when the server doesn't support our hash Detect when the server doesn't support our hash algorithm and abort. If the server does support our hash, advertise it as part of our capabilities. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- send-pack.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/send-pack.c b/send-pack.c index d1b7edc995..fb037568a9 100644 --- a/send-pack.c +++ b/send-pack.c @@ -362,6 +362,7 @@ int send_pack(struct send_pack_args *args, int atomic_supported = 0; int use_push_options = 0; int push_options_supported = 0; + int object_format_supported = 0; unsigned cmds_sent = 0; int ret; struct async demux; @@ -388,6 +389,9 @@ int send_pack(struct send_pack_args *args, if (server_supports("push-options")) push_options_supported = 1; + if (!server_supports_hash(the_hash_algo->name, &object_format_supported)) + die(_("the receiving end does not support this repository's hash algorithm")); + if (args->push_cert != SEND_PACK_PUSH_CERT_NEVER) { int len; push_cert_nonce = server_feature_value("push-cert", &len); @@ -428,6 +432,8 @@ int send_pack(struct send_pack_args *args, strbuf_addstr(&cap_buf, " atomic"); if (use_push_options) strbuf_addstr(&cap_buf, " push-options"); + if (object_format_supported) + strbuf_addf(&cap_buf, " object-format=%s", the_hash_algo->name); if (agent_supported) strbuf_addf(&cap_buf, " agent=%s", git_user_agent_sanitized()); From 84eca27aebeb11ce96441f957f2595c28d89fe36 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:58:58 +0000 Subject: [PATCH 12/44] connect: make parse_feature_value extern We're going to be using this function in other files, so no longer mark this function static. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- connect.c | 3 +-- connect.h | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/connect.c b/connect.c index eaa13b41bb..397fad7e32 100644 --- a/connect.c +++ b/connect.c @@ -18,7 +18,6 @@ static char *server_capabilities_v1; static struct argv_array server_capabilities_v2 = ARGV_ARRAY_INIT; -static const char *parse_feature_value(const char *, const char *, int *, int *); static const char *next_server_feature_value(const char *feature, int *len, int *offset); static int check_ref(const char *name, unsigned int flags) @@ -467,7 +466,7 @@ struct ref **get_remote_refs(int fd_out, struct packet_reader *reader, return list; } -static const char *parse_feature_value(const char *feature_list, const char *feature, int *lenp, int *offset) +const char *parse_feature_value(const char *feature_list, const char *feature, int *lenp, int *offset) { int len; diff --git a/connect.h b/connect.h index fc75d6a457..ace074dcb6 100644 --- a/connect.h +++ b/connect.h @@ -19,6 +19,7 @@ struct packet_reader; enum protocol_version discover_version(struct packet_reader *reader); int server_supports_hash(const char *desired, int *feature_supported); +const char *parse_feature_value(const char *feature_list, const char *feature, int *lenp, int *offset); int server_supports_v2(const char *c, int die_on_error); int server_feature_v2(const char *c, const char **v); int server_supports_feature(const char *c, const char *feature, From 48bf1415896db0f890530dcd182f33c2f0df10a8 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:58:59 +0000 Subject: [PATCH 13/44] fetch-pack: detect when the server doesn't support our hash Detect when the server doesn't support our hash algorithm and abort. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- fetch-pack.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fetch-pack.c b/fetch-pack.c index f73a2ce6cb..1d277190e7 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1039,6 +1039,8 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, print_verbose(args, _("Server supports %s"), "deepen-relative"); else if (args->deepen_relative) die(_("Server does not support --deepen")); + if (!server_supports_hash(the_hash_algo->name, NULL)) + die(_("Server does not support this repository's object format")); if (!args->no_dependents) { mark_complete_and_common_ref(negotiator, args, &ref); From 7c601dc333b6cd86a84e77f41c968a3bb773ba36 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:00 +0000 Subject: [PATCH 14/44] connect: detect algorithm when fetching refs If we're fetching refs, detect the hash algorithm and parse the refs using that algorithm. As mentioned in the documentation, if multiple versions of the object-format capability are provided, we use the first. No known implementation supports multiple algorithms now, but they may in the future. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- connect.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/connect.c b/connect.c index 397fad7e32..915f1736a0 100644 --- a/connect.c +++ b/connect.c @@ -220,12 +220,25 @@ static void annotate_refs_with_symref_info(struct ref *ref) static void process_capabilities(struct packet_reader *reader, int *linelen) { + const char *feat_val; + int feat_len; const char *line = reader->line; int nul_location = strlen(line); if (nul_location == *linelen) return; server_capabilities_v1 = xstrdup(line + nul_location + 1); *linelen = nul_location; + + feat_val = server_feature_value("object-format", &feat_len); + if (feat_val) { + char *hash_name = xstrndup(feat_val, feat_len); + int hash_algo = hash_algo_by_name(hash_name); + if (hash_algo != GIT_HASH_UNKNOWN) + reader->hash_algo = &hash_algos[hash_algo]; + free(hash_name); + } else { + reader->hash_algo = &hash_algos[GIT_HASH_SHA1]; + } } static int process_dummy_ref(const struct packet_reader *reader) @@ -234,7 +247,7 @@ static int process_dummy_ref(const struct packet_reader *reader) struct object_id oid; const char *name; - if (parse_oid_hex(line, &oid, &name)) + if (parse_oid_hex_algop(line, &oid, &name, reader->hash_algo)) return 0; if (*name != ' ') return 0; @@ -258,7 +271,7 @@ static int process_ref(const struct packet_reader *reader, int len, struct object_id old_oid; const char *name; - if (parse_oid_hex(line, &old_oid, &name)) + if (parse_oid_hex_algop(line, &old_oid, &name, reader->hash_algo)) return 0; if (*name != ' ') return 0; @@ -270,7 +283,7 @@ static int process_ref(const struct packet_reader *reader, int len, die(_("protocol error: unexpected capabilities^{}")); } else if (check_ref(name, flags)) { struct ref *ref = alloc_ref(name); - oidcpy(&ref->old_oid, &old_oid); + memcpy(ref->old_oid.hash, old_oid.hash, reader->hash_algo->rawsz); **list = ref; *list = &ref->next; } @@ -288,7 +301,7 @@ static int process_shallow(const struct packet_reader *reader, int len, if (!skip_prefix(line, "shallow ", &arg)) return 0; - if (get_oid_hex(arg, &old_oid)) + if (get_oid_hex_algop(arg, &old_oid, reader->hash_algo)) die(_("protocol error: expected shallow sha-1, got '%s'"), arg); if (!shallow_points) die(_("repository on the other end cannot be shallow")); From bb095d087557b9ded2245270d3fac64ddc774af6 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:01 +0000 Subject: [PATCH 15/44] builtin/receive-pack: detect when the server doesn't support our hash Detect when the server doesn't support our hash algorithm and abort. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index a4159b559e..0da8ca5134 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1624,6 +1624,8 @@ static struct command *read_head_info(struct packet_reader *reader, linelen = strlen(reader->line); if (linelen < reader->pktlen) { const char *feature_list = reader->line + linelen + 1; + const char *hash = NULL; + int len = 0; if (parse_feature_request(feature_list, "report-status")) report_status = 1; if (parse_feature_request(feature_list, "side-band-64k")) @@ -1636,6 +1638,13 @@ static struct command *read_head_info(struct packet_reader *reader, if (advertise_push_options && parse_feature_request(feature_list, "push-options")) use_push_options = 1; + hash = parse_feature_value(feature_list, "object-format", &len, NULL); + if (!hash) { + hash = hash_algos[GIT_HASH_SHA1].name; + len = strlen(hash); + } + if (xstrncmpz(the_hash_algo->name, hash, len)) + die("error: unsupported object format '%s'", hash); } if (!strcmp(reader->line, "push-cert")) { From 452e35684f97be1ef3b1fa608b8ec7fbaebf18a6 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:02 +0000 Subject: [PATCH 16/44] docs: update remote helper docs for object-format extensions Update the remote helper docs to document the object-format extensions we will implement in remote-curl and the transport helper code shortly. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- Documentation/gitremote-helpers.txt | 33 +++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/Documentation/gitremote-helpers.txt b/Documentation/gitremote-helpers.txt index f48a031dc3..26f32e4421 100644 --- a/Documentation/gitremote-helpers.txt +++ b/Documentation/gitremote-helpers.txt @@ -238,6 +238,9 @@ the remote repository. `--signed-tags=verbatim` to linkgit:git-fast-export[1]. In the absence of this capability, Git will use `--signed-tags=warn-strip`. +'object-format':: + This indicates that the helper is able to interact with the remote + side using an explicit hash algorithm extension. COMMANDS @@ -257,12 +260,14 @@ Support for this command is mandatory. 'list':: Lists the refs, one per line, in the format " [ ...]". The value may be a hex sha1 hash, "@" for - a symref, or "?" to indicate that the helper could not get the - value of the ref. A space-separated list of attributes follows - the name; unrecognized attributes are ignored. The list ends - with a blank line. + a symref, ": " for a key-value pair, or + "?" to indicate that the helper could not get the value of the + ref. A space-separated list of attributes follows the name; + unrecognized attributes are ignored. The list ends with a + blank line. + See REF LIST ATTRIBUTES for a list of currently defined attributes. +See REF LIST KEYWORDS for a list of currently defined keywords. + Supported if the helper has the "fetch" or "import" capability. @@ -430,6 +435,18 @@ attributes are defined. This ref is unchanged since the last import or fetch, although the helper cannot necessarily determine what value that produced. +REF LIST KEYWORDS +----------------- + +The 'list' command may produce a list of key-value pairs. +The following keys are defined. + +'object-format':: + The refs are using the given hash algorithm. This keyword is only + used if the server and client both support the object-format + extension. + + OPTIONS ------- @@ -514,6 +531,14 @@ set by Git if the remote helper has the 'option' capability. transaction. If successful, all refs will be updated, or none will. If the remote side does not support this capability, the push will fail. +'option object-format' {'true'|algorithm}:: + If 'true', indicate that the caller wants hash algorithm information + to be passed back from the remote. This mode is used when fetching + refs. ++ +If set to an algorithm, indicate that the caller wants to interact with +the remote side using that algorithm. + SEE ALSO -------- linkgit:git-remote[1] From 8b85ee4f47aadc23c8806bff5540baf819bbde95 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:03 +0000 Subject: [PATCH 17/44] transport-helper: implement object-format extensions Implement the object-format extensions that let us determine the hash algorithm in use when pushing or pulling data. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- transport-helper.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/transport-helper.c b/transport-helper.c index a46afcb69d..ae33b0eea7 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -32,7 +32,8 @@ struct helper_data { signed_tags : 1, check_connectivity : 1, no_disconnect_req : 1, - no_private_update : 1; + no_private_update : 1, + object_format : 1; /* * As an optimization, the transport code may invoke fetch before @@ -207,6 +208,8 @@ static struct child_process *get_helper(struct transport *transport) data->import_marks = xstrdup(arg); } else if (starts_with(capname, "no-private-update")) { data->no_private_update = 1; + } else if (starts_with(capname, "object-format")) { + data->object_format = 1; } else if (mandatory) { die(_("unknown mandatory capability %s; this remote " "helper probably needs newer version of Git"), @@ -1103,6 +1106,12 @@ static struct ref *get_refs_list_using_list(struct transport *transport, data->get_refs_list_called = 1; helper = get_helper(transport); + if (data->object_format) { + write_str_in_full(helper->in, "option object-format\n"); + if (recvline(data, &buf) || strcmp(buf.buf, "ok")) + exit(128); + } + if (data->push && for_push) write_str_in_full(helper->in, "list for-push\n"); else @@ -1115,6 +1124,17 @@ static struct ref *get_refs_list_using_list(struct transport *transport, if (!*buf.buf) break; + else if (buf.buf[0] == ':') { + const char *value; + if (skip_prefix(buf.buf, ":object-format ", &value)) { + int algo = hash_algo_by_name(value); + if (algo == GIT_HASH_UNKNOWN) + die(_("unsupported object format '%s'"), + value); + transport->hash_algo = &hash_algos[algo]; + } + continue; + } eov = strchr(buf.buf, ' '); if (!eov) @@ -1127,7 +1147,7 @@ static struct ref *get_refs_list_using_list(struct transport *transport, if (buf.buf[0] == '@') (*tail)->symref = xstrdup(buf.buf + 1); else if (buf.buf[0] != '?') - get_oid_hex(buf.buf, &(*tail)->old_oid); + get_oid_hex_algop(buf.buf, &(*tail)->old_oid, transport->hash_algo); if (eon) { if (has_attribute(eon + 1, "unchanged")) { (*tail)->status |= REF_STATUS_UPTODATE; From 7f60501775b2a0e0dcabb98fde3eb46fd980a8cc Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:04 +0000 Subject: [PATCH 18/44] remote-curl: implement object-format extensions Implement the object-format extensions that let us determine the hash algorithm in use when pushing, pulling, and fetching. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- remote-curl.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/remote-curl.c b/remote-curl.c index 1c9aa3d0ab..3ed0dfec1b 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -41,7 +41,9 @@ struct options { deepen_relative : 1, from_promisor : 1, no_dependents : 1, - atomic : 1; + atomic : 1, + object_format : 1; + const struct git_hash_algo *hash_algo; }; static struct options options; static struct string_list cas_options = STRING_LIST_INIT_DUP; @@ -190,6 +192,16 @@ static int set_option(const char *name, const char *value) } else if (!strcmp(name, "filter")) { options.filter = xstrdup(value); return 0; + } else if (!strcmp(name, "object-format")) { + int algo; + options.object_format = 1; + if (strcmp(value, "true")) { + algo = hash_algo_by_name(value); + if (algo == GIT_HASH_UNKNOWN) + die("unknown object format '%s'", value); + options.hash_algo = &hash_algos[algo]; + } + return 0; } else { return 1 /* unsupported */; } @@ -231,6 +243,7 @@ static struct ref *parse_git_refs(struct discovery *heads, int for_push) case protocol_v0: get_remote_heads(&reader, &list, for_push ? REF_NORMAL : 0, NULL, &heads->shallow); + options.hash_algo = reader.hash_algo; break; case protocol_unknown_version: BUG("unknown protocol version"); @@ -509,6 +522,9 @@ static struct ref *get_refs(int for_push) static void output_refs(struct ref *refs) { struct ref *posn; + if (options.object_format && options.hash_algo) { + printf(":object-format %s\n", options.hash_algo->name); + } for (posn = refs; posn; posn = posn->next) { if (posn->symref) printf("@%s %s\n", posn->symref, posn->name); @@ -1439,6 +1455,7 @@ int cmd_main(int argc, const char **argv) printf("option\n"); printf("push\n"); printf("check-connectivity\n"); + printf("object-format\n"); printf("\n"); fflush(stdout); } else if (skip_prefix(buf.buf, "stateless-connect ", &arg)) { From b65dc2cebd6ac7d869895297ed5061af15428544 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:05 +0000 Subject: [PATCH 19/44] builtin/clone: initialize hash algorithm properly When performing a clone, we don't know what hash algorithm the other end will support. Currently, we don't support fetching data belonging to a different algorithm, so we must know what algorithm the remote side is using in order to properly initialize the repository. We can know that only after fetching the refs, so if the remote side has any references, use that information to reinitialize the repository with the correct hash algorithm information. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/clone.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/builtin/clone.c b/builtin/clone.c index cb48a291ca..f27d38bc8e 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1217,6 +1217,15 @@ int cmd_clone(int argc, const char **argv, const char *prefix) refs = transport_get_remote_refs(transport, &ref_prefixes); if (refs) { + int hash_algo = hash_algo_by_ptr(transport_get_hash_algo(transport)); + + /* + * Now that we know what algorithm the remote side is using, + * let's set ours to the same thing. + */ + initialize_repository_version(hash_algo); + repo_set_hash_algo(the_repository, hash_algo); + mapped_refs = wanted_peer_refs(refs, &remote->fetch); /* * transport_get_remote_refs() may return refs with null sha-1 From 9dc78c20dc2aa5ed073a95dc35880d940658a5be Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:06 +0000 Subject: [PATCH 20/44] t5562: pass object-format in synthesized test data Ensure that we pass the object-format capability in the synthesized test data so that this test works with algorithms other than SHA-1. In addition, add an additional test using the old data for when we're using SHA-1 so that we can be sure that we preserve backwards compatibility with servers not offering the object-format capability. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t5562-http-backend-content-length.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/t/t5562-http-backend-content-length.sh b/t/t5562-http-backend-content-length.sh index 3f4ac71f83..c6ec625497 100755 --- a/t/t5562-http-backend-content-length.sh +++ b/t/t5562-http-backend-content-length.sh @@ -46,6 +46,7 @@ ssize_b100dots() { } test_expect_success 'setup' ' + test_oid_init && HTTP_CONTENT_ENCODING="identity" && export HTTP_CONTENT_ENCODING && git config http.receivepack true && @@ -62,8 +63,8 @@ test_expect_success 'setup' ' test_copy_bytes 10 fetch_body.trunc && hash_next=$(git commit-tree -p HEAD -m next HEAD^{tree}) && { - printf "%s %s refs/heads/newbranch\\0report-status\\n" \ - "$ZERO_OID" "$hash_next" | packetize && + printf "%s %s refs/heads/newbranch\\0report-status object-format=%s\\n" \ + "$ZERO_OID" "$hash_next" "$(test_oid algo)" | packetize && printf 0000 && echo "$hash_next" | git pack-objects --stdout } >push_body && From 4b831208bb365fedd0cce286e0c27627598d9393 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:07 +0000 Subject: [PATCH 21/44] fetch-pack: parse and advertise the object-format capability Parse the server's object-format capability and respond accordingly, dying if there is a mismatch. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- fetch-pack.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fetch-pack.c b/fetch-pack.c index 1d277190e7..d5a919f2aa 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1179,6 +1179,7 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, int sideband_all, int seen_ack) { int ret = 0; + const char *hash_name; struct strbuf req_buf = STRBUF_INIT; if (server_supports_v2("fetch", 1)) @@ -1193,6 +1194,17 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, args->server_options->items[i].string); } + if (server_feature_v2("object-format", &hash_name)) { + int hash_algo = hash_algo_by_name(hash_name); + if (hash_algo_by_ptr(the_hash_algo) != hash_algo) + die(_("mismatched algorithms: client %s; server %s"), + the_hash_algo->name, hash_name); + packet_write_fmt(fd_out, "object-format=%s", the_hash_algo->name); + } else if (hash_algo_by_ptr(the_hash_algo) != GIT_HASH_SHA1) { + die(_("the server does not support algorithm '%s'"), + the_hash_algo->name); + } + packet_buf_delim(&req_buf); if (args->use_thin_pack) packet_buf_write(&req_buf, "thin-pack"); From d553acebeeda584e0e749cfe04dad450c667de90 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:08 +0000 Subject: [PATCH 22/44] setup: set the_repository's hash algo when checking format When we're checking the repository's format, set the hash algorithm at the same time. This ensures that we perform a suitable initialization early enough to avoid confusing any parts of the code. If we defer until later, we can end up with portions of the code which are confused about the hash algorithm, resulting in segfaults when working with SHA-256 repositories. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.c b/setup.c index 65fe5ecefb..019a1c6367 100644 --- a/setup.c +++ b/setup.c @@ -1273,6 +1273,7 @@ void check_repository_format(struct repository_format *fmt) fmt = &repo_fmt; check_repository_format_gently(get_git_dir(), fmt, NULL); startup_info->have_repository = 1; + repo_set_hash_algo(the_repository, fmt->hash_algo); clear_repository_format(&repo_fmt); } From 49c9a2ffe59b2e89ed4fd3f90b3dbc5d67760d74 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:09 +0000 Subject: [PATCH 23/44] t3200: mark assertion with SHA1 prerequisite One of the test assertions in this test checks that git branch -m works even without a .git/config file. However, if the repository requires configuration extensions, such as because it uses a non-SHA-1 algorithm, this assertion will fail. Mark the assertion as requiring SHA-1. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t3200-branch.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t3200-branch.sh b/t/t3200-branch.sh index 411a70b0ce..2a3fedc6b0 100755 --- a/t/t3200-branch.sh +++ b/t/t3200-branch.sh @@ -402,7 +402,7 @@ EOF mv .git/config .git/config-saved -test_expect_success 'git branch -m q q2 without config should succeed' ' +test_expect_success SHA1 'git branch -m q q2 without config should succeed' ' git branch -m q q2 && git branch -m q2 q ' From 629dffc461f3631bb7acfe905d805caa38b49dfa Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:10 +0000 Subject: [PATCH 24/44] packfile: compute and use the index CRC offset Both v2 pack index files and the v3 format specified as part of the NewHash work have similar data starting at the CRC table. Much of the existing code wants to read either this table or the offset entries following it, and in doing so computes the offset each time. In order to share as much code between v2 and v3, compute the offset of the CRC table and store it when the pack is opened. Use this value to compute offsets to not only the CRC table, but to the offset entries beyond it. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 6 +----- object-store.h | 1 + packfile.c | 1 + 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index f176dd28c8..7bea1fba52 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1555,13 +1555,9 @@ static void read_v2_anomalous_offsets(struct packed_git *p, { const uint32_t *idx1, *idx2; uint32_t i; - const uint32_t hashwords = the_hash_algo->rawsz / sizeof(uint32_t); /* The address of the 4-byte offset table */ - idx1 = (((const uint32_t *)p->index_data) - + 2 /* 8-byte header */ - + 256 /* fan out */ - + hashwords * p->num_objects /* object ID table */ + idx1 = (((const uint32_t *)((const uint8_t *)p->index_data + p->crc_offset)) + p->num_objects /* CRC32 table */ ); diff --git a/object-store.h b/object-store.h index d1e490f203..f439d47af8 100644 --- a/object-store.h +++ b/object-store.h @@ -70,6 +70,7 @@ struct packed_git { size_t index_size; uint32_t num_objects; uint32_t num_bad_objects; + uint32_t crc_offset; unsigned char *bad_object_sha1; int index_version; time_t mtime; diff --git a/packfile.c b/packfile.c index f4e752996d..6ab5233613 100644 --- a/packfile.c +++ b/packfile.c @@ -178,6 +178,7 @@ int load_idx(const char *path, const unsigned int hashsz, void *idx_map, */ (sizeof(off_t) <= 4)) return error("pack too large for current definition of off_t in %s", path); + p->crc_offset = 8 + 4 * 256 + nr * hashsz; } p->index_version = version; From 1610dda8ae5190438b3de205c2a0f87dfe878ca3 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:11 +0000 Subject: [PATCH 25/44] t5302: modernize test formatting Our style these days is to place the description and the opening quote of the body on the same line as test_expect_success (if it fits), to place the trailing quote on a line by itself after the body, and to use tabs. Since we're going to be making several significant changes to this test, modernize the style to aid in readability of the subsequent patches. This patch should have no functional change. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t5302-pack-index.sh | 360 +++++++++++++++++++++--------------------- 1 file changed, 184 insertions(+), 176 deletions(-) diff --git a/t/t5302-pack-index.sh b/t/t5302-pack-index.sh index ad07f2f7fc..8981c9b90e 100755 --- a/t/t5302-pack-index.sh +++ b/t/t5302-pack-index.sh @@ -7,65 +7,65 @@ test_description='pack index with 64-bit offsets and object CRC' . ./test-lib.sh test_expect_success 'setup' ' - test_oid_init && - rawsz=$(test_oid rawsz) && - rm -rf .git && - git init && - git config pack.threads 1 && - i=1 && - while test $i -le 100 - do - iii=$(printf '%03i' $i) - test-tool genrandom "bar" 200 > wide_delta_$iii && - test-tool genrandom "baz $iii" 50 >> wide_delta_$iii && - test-tool genrandom "foo"$i 100 > deep_delta_$iii && - test-tool genrandom "foo"$(expr $i + 1) 100 >> deep_delta_$iii && - test-tool genrandom "foo"$(expr $i + 2) 100 >> deep_delta_$iii && - echo $iii >file_$iii && - test-tool genrandom "$iii" 8192 >>file_$iii && - git update-index --add file_$iii deep_delta_$iii wide_delta_$iii && - i=$(expr $i + 1) || return 1 - done && - { echo 101 && test-tool genrandom 100 8192; } >file_101 && - git update-index --add file_101 && - tree=$(git write-tree) && - commit=$(git commit-tree $tree obj-list && - git update-ref HEAD $commit + test_oid_init && + rawsz=$(test_oid rawsz) && + rm -rf .git && + git init && + git config pack.threads 1 && + i=1 && + while test $i -le 100 + do + iii=$(printf '%03i' $i) + test-tool genrandom "bar" 200 > wide_delta_$iii && + test-tool genrandom "baz $iii" 50 >> wide_delta_$iii && + test-tool genrandom "foo"$i 100 > deep_delta_$iii && + test-tool genrandom "foo"$(expr $i + 1) 100 >> deep_delta_$iii && + test-tool genrandom "foo"$(expr $i + 2) 100 >> deep_delta_$iii && + echo $iii >file_$iii && + test-tool genrandom "$iii" 8192 >>file_$iii && + git update-index --add file_$iii deep_delta_$iii wide_delta_$iii && + i=$(expr $i + 1) || return 1 + done && + { echo 101 && test-tool genrandom 100 8192; } >file_101 && + git update-index --add file_101 && + tree=$(git write-tree) && + commit=$(git commit-tree $tree obj-list && + git update-ref HEAD $commit ' -test_expect_success \ - 'pack-objects with index version 1' \ - 'pack1=$(git pack-objects --index-version=1 test-1 &1) || ! (echo "$msg" | grep "pack too large .* off_t") @@ -91,21 +91,21 @@ else say "# skipping tests concerning 64-bit offsets" fi -test_expect_success OFF64_T \ - 'index v2: verify a pack with some 64-bit offsets' \ - 'git verify-pack -v "test-3-${pack3}.pack"' +test_expect_success OFF64_T 'index v2: verify a pack with some 64-bit offsets' ' + git verify-pack -v "test-3-${pack3}.pack" +' -test_expect_success OFF64_T \ - '64-bit offsets: should be different from previous index v2 results' \ - '! cmp "test-2-${pack2}.idx" "test-3-${pack3}.idx"' +test_expect_success OFF64_T '64-bit offsets: should be different from previous index v2 results' ' + ! cmp "test-2-${pack2}.idx" "test-3-${pack3}.idx" +' -test_expect_success OFF64_T \ - 'index v2: force some 64-bit offsets with index-pack' \ - 'git index-pack --index-version=2,0x40000 -o 3.idx "test-1-${pack1}.pack"' +test_expect_success OFF64_T 'index v2: force some 64-bit offsets with index-pack' ' + git index-pack --index-version=2,0x40000 -o 3.idx "test-1-${pack1}.pack" +' -test_expect_success OFF64_T \ - '64-bit offsets: index-pack result should match pack-objects one' \ - 'cmp "test-3-${pack3}.idx" "3.idx"' +test_expect_success OFF64_T '64-bit offsets: index-pack result should match pack-objects one' ' + cmp "test-3-${pack3}.idx" "3.idx" +' test_expect_success OFF64_T 'index-pack --verify on 64-bit offset v2 (cheat)' ' # This cheats by knowing which lower offset should still be encoded @@ -120,135 +120,143 @@ test_expect_success OFF64_T 'index-pack --verify on 64-bit offset v2' ' # returns the object number for given object in given pack index index_obj_nr() { - idx_file=$1 - object_sha1=$2 - nr=0 - git show-index < $idx_file | - while read offs sha1 extra - do - nr=$(($nr + 1)) - test "$sha1" = "$object_sha1" || continue - echo "$(($nr - 1))" - break - done + idx_file=$1 + object_sha1=$2 + nr=0 + git show-index < $idx_file | + while read offs sha1 extra + do + nr=$(($nr + 1)) + test "$sha1" = "$object_sha1" || continue + echo "$(($nr - 1))" + break + done } # returns the pack offset for given object as found in given pack index index_obj_offset() { - idx_file=$1 - object_sha1=$2 - git show-index < $idx_file | grep $object_sha1 | - ( read offs extra && echo "$offs" ) + idx_file=$1 + object_sha1=$2 + git show-index < $idx_file | grep $object_sha1 | + ( read offs extra && echo "$offs" ) } -test_expect_success \ - '[index v1] 1) stream pack to repository' \ - 'git index-pack --index-version=1 --stdin < "test-1-${pack1}.pack" && - git prune-packed && - git count-objects | ( read nr rest && test "$nr" -eq 1 ) && - cmp "test-1-${pack1}.pack" ".git/objects/pack/pack-${pack1}.pack" && - cmp "test-1-${pack1}.idx" ".git/objects/pack/pack-${pack1}.idx"' +test_expect_success '[index v1] 1) stream pack to repository' ' + git index-pack --index-version=1 --stdin < "test-1-${pack1}.pack" && + git prune-packed && + git count-objects | ( read nr rest && test "$nr" -eq 1 ) && + cmp "test-1-${pack1}.pack" ".git/objects/pack/pack-${pack1}.pack" && + cmp "test-1-${pack1}.idx" ".git/objects/pack/pack-${pack1}.idx" +' test_expect_success \ - '[index v1] 2) create a stealth corruption in a delta base reference' \ - '# This test assumes file_101 is a delta smaller than 16 bytes. - # It should be against file_100 but we substitute its base for file_099 - sha1_101=$(git hash-object file_101) && - sha1_099=$(git hash-object file_099) && - offs_101=$(index_obj_offset 1.idx $sha1_101) && - nr_099=$(index_obj_nr 1.idx $sha1_099) && - chmod +w ".git/objects/pack/pack-${pack1}.pack" && - recordsz=$((rawsz + 4)) && - dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($offs_101 + 1)) \ - if=".git/objects/pack/pack-${pack1}.idx" \ - skip=$((4 + 256 * 4 + $nr_099 * recordsz)) \ - bs=1 count=$rawsz conv=notrunc && - git cat-file blob $sha1_101 > file_101_foo1' + '[index v1] 2) create a stealth corruption in a delta base reference' ' + # This test assumes file_101 is a delta smaller than 16 bytes. + # It should be against file_100 but we substitute its base for file_099 + sha1_101=$(git hash-object file_101) && + sha1_099=$(git hash-object file_099) && + offs_101=$(index_obj_offset 1.idx $sha1_101) && + nr_099=$(index_obj_nr 1.idx $sha1_099) && + chmod +w ".git/objects/pack/pack-${pack1}.pack" && + recordsz=$((rawsz + 4)) && + dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($offs_101 + 1)) \ + if=".git/objects/pack/pack-${pack1}.idx" \ + skip=$((4 + 256 * 4 + $nr_099 * recordsz)) \ + bs=1 count=$rawsz conv=notrunc && + git cat-file blob $sha1_101 > file_101_foo1 +' test_expect_success \ - '[index v1] 3) corrupted delta happily returned wrong data' \ - 'test -f file_101_foo1 && ! cmp file_101 file_101_foo1' + '[index v1] 3) corrupted delta happily returned wrong data' ' + test -f file_101_foo1 && ! cmp file_101 file_101_foo1 +' test_expect_success \ - '[index v1] 4) confirm that the pack is actually corrupted' \ - 'test_must_fail git fsck --full $commit' + '[index v1] 4) confirm that the pack is actually corrupted' ' + test_must_fail git fsck --full $commit +' test_expect_success \ - '[index v1] 5) pack-objects happily reuses corrupted data' \ - 'pack4=$(git pack-objects test-4 file_101_foo2 +' test_expect_success \ - '[index v2] 1) stream pack to repository' \ - 'rm -f .git/objects/pack/* && - git index-pack --index-version=2 --stdin < "test-1-${pack1}.pack" && - git prune-packed && - git count-objects | ( read nr rest && test "$nr" -eq 1 ) && - cmp "test-1-${pack1}.pack" ".git/objects/pack/pack-${pack1}.pack" && - cmp "test-2-${pack1}.idx" ".git/objects/pack/pack-${pack1}.idx"' + '[index v2] 3) corrupted delta happily returned wrong data' ' + test -f file_101_foo2 && ! cmp file_101 file_101_foo2 +' test_expect_success \ - '[index v2] 2) create a stealth corruption in a delta base reference' \ - '# This test assumes file_101 is a delta smaller than 16 bytes. - # It should be against file_100 but we substitute its base for file_099 - sha1_101=$(git hash-object file_101) && - sha1_099=$(git hash-object file_099) && - offs_101=$(index_obj_offset 1.idx $sha1_101) && - nr_099=$(index_obj_nr 1.idx $sha1_099) && - chmod +w ".git/objects/pack/pack-${pack1}.pack" && - dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($offs_101 + 1)) \ - if=".git/objects/pack/pack-${pack1}.idx" \ - skip=$((8 + 256 * 4 + $nr_099 * rawsz)) \ - bs=1 count=$rawsz conv=notrunc && - git cat-file blob $sha1_101 > file_101_foo2' + '[index v2] 4) confirm that the pack is actually corrupted' ' + test_must_fail git fsck --full $commit +' test_expect_success \ - '[index v2] 3) corrupted delta happily returned wrong data' \ - 'test -f file_101_foo2 && ! cmp file_101 file_101_foo2' + '[index v2] 5) pack-objects refuses to reuse corrupted data' ' + test_must_fail git pack-objects test-5 /dev/null || exit 1 - done /dev/null || exit 1 + done wrong-tag <wrong-tag <err && - grep "^warning:.* expected .tagger. line" err + tag=$(git hash-object -t tag -w --stdin err && + grep "^warning:.* expected .tagger. line" err ' test_expect_success 'index-pack --fsck-objects also warns upon missing tagger in tag' ' - git index-pack --fsck-objects tag-test-${pack1}.pack 2>err && - grep "^warning:.* expected .tagger. line" err + git index-pack --fsck-objects tag-test-${pack1}.pack 2>err && + grep "^warning:.* expected .tagger. line" err ' test_done From 88a09a557c7571760b3ded75764bf3b1fd0e8bf0 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:12 +0000 Subject: [PATCH 26/44] builtin/show-index: provide options to determine hash algo show-index is capable of reading any possible index file whether or not the index is inside a repository. However, because our index files lack metadata about the hash algorithm in use, it's not possible to autodetect the algorithm that a particular index file is using. In order to allow us to read index files of any algorithm, let's set up the .git directory gently so that we default to the algorithm for the current repository, and add an --object-format option to allow users to override this setting and continue to run show-index outside of a repository altogether. Let's also document this new option so that people can find it and use it. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- Documentation/git-show-index.txt | 11 ++++++++++- builtin/show-index.c | 29 ++++++++++++++++++++++++----- git.c | 2 +- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/Documentation/git-show-index.txt b/Documentation/git-show-index.txt index 424e4ba84c..39b1d8eaa1 100644 --- a/Documentation/git-show-index.txt +++ b/Documentation/git-show-index.txt @@ -9,7 +9,7 @@ git-show-index - Show packed archive index SYNOPSIS -------- [verse] -'git show-index' +'git show-index' [--object-format=] DESCRIPTION @@ -36,6 +36,15 @@ Note that you can get more information on a packfile by calling linkgit:git-verify-pack[1]. However, as this command considers only the index file itself, it's both faster and more flexible. +OPTIONS +------- + +--object-format=:: + Specify the given object format (hash algorithm) for the index file. The + valid values are 'sha1' and (if enabled) 'sha256'. The default is the + algorithm for the current repository (set by `extensions.objectFormat`), or + 'sha1' if no value is set or outside a repository.. + GIT --- Part of the linkgit:git[1] suite diff --git a/builtin/show-index.c b/builtin/show-index.c index 0826f6a5a2..8106b03a6b 100644 --- a/builtin/show-index.c +++ b/builtin/show-index.c @@ -1,9 +1,12 @@ #include "builtin.h" #include "cache.h" #include "pack.h" +#include "parse-options.h" -static const char show_index_usage[] = -"git show-index"; +static const char *const show_index_usage[] = { + "git show-index [--object-format=]", + NULL +}; int cmd_show_index(int argc, const char **argv, const char *prefix) { @@ -11,10 +14,26 @@ int cmd_show_index(int argc, const char **argv, const char *prefix) unsigned nr; unsigned int version; static unsigned int top_index[256]; - const unsigned hashsz = the_hash_algo->rawsz; + unsigned hashsz; + const char *hash_name = NULL; + int hash_algo; + const struct option show_index_options[] = { + OPT_STRING(0, "object-format", &hash_name, N_("hash-algorithm"), + N_("specify the hash algorithm to use")), + OPT_END() + }; + + argc = parse_options(argc, argv, prefix, show_index_options, show_index_usage, 0); + + if (hash_name) { + hash_algo = hash_algo_by_name(hash_name); + if (hash_algo == GIT_HASH_UNKNOWN) + die(_("Unknown hash algorithm")); + repo_set_hash_algo(the_repository, hash_algo); + } + + hashsz = the_hash_algo->rawsz; - if (argc != 1) - usage(show_index_usage); if (fread(top_index, 2 * 4, 1, stdin) != 1) die("unable to read header"); if (top_index[0] == htonl(PACK_IDX_SIGNATURE)) { diff --git a/git.c b/git.c index 2e4efb4ff0..e53e8159a2 100644 --- a/git.c +++ b/git.c @@ -573,7 +573,7 @@ static struct cmd_struct commands[] = { { "shortlog", cmd_shortlog, RUN_SETUP_GENTLY | USE_PAGER }, { "show", cmd_show, RUN_SETUP }, { "show-branch", cmd_show_branch, RUN_SETUP }, - { "show-index", cmd_show_index }, + { "show-index", cmd_show_index, RUN_SETUP_GENTLY }, { "show-ref", cmd_show_ref, RUN_SETUP }, { "sparse-checkout", cmd_sparse_checkout, RUN_SETUP | NEED_WORK_TREE }, { "stage", cmd_add, RUN_SETUP | NEED_WORK_TREE }, From 059d8066f843a09c615be2b7c7e42fd393852ed6 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:13 +0000 Subject: [PATCH 27/44] t1302: expect repo format version 1 for SHA-256 When using SHA-256, we need to take advantage of the extensions section in the config file, so we need to use repository format version 1. Update the test to look for the correct value. Note that test_oid produces a value without a trailing newline, so use echo to ensure we print a trailing newline to compare it correctly against the actual results. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t1302-repo-version.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/t/t1302-repo-version.sh b/t/t1302-repo-version.sh index ce4cff13bb..d60c042ce8 100755 --- a/t/t1302-repo-version.sh +++ b/t/t1302-repo-version.sh @@ -8,6 +8,10 @@ test_description='Test repository version check' . ./test-lib.sh test_expect_success 'setup' ' + test_oid_cache <<-\EOF && + version sha1:0 + version sha256:1 + EOF cat >test.patch <<-\EOF && diff --git a/test.txt b/test.txt new file mode 100644 @@ -23,7 +27,7 @@ test_expect_success 'setup' ' ' test_expect_success 'gitdir selection on normal repos' ' - echo 0 >expect && + echo $(test_oid version) >expect && git config core.repositoryformatversion >actual && git -C test config core.repositoryformatversion >actual2 && test_cmp expect actual && From 7f46e7ead1d82538866265228f9d743f0e93e17a Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:14 +0000 Subject: [PATCH 28/44] Documentation/technical: document object-format for protocol v2 Document the object-format extension for protocol v2. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- Documentation/technical/protocol-v2.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Documentation/technical/protocol-v2.txt b/Documentation/technical/protocol-v2.txt index 7e3766cafb..107e421fb7 100644 --- a/Documentation/technical/protocol-v2.txt +++ b/Documentation/technical/protocol-v2.txt @@ -453,3 +453,12 @@ included in a request. This is done by sending each option as a a request. The provided options must not contain a NUL or LF character. + + object-format +~~~~~~~~~~~~~~~ + +The server can advertise the `object-format` capability with a value `X` (in the +form `object-format=X`) to notify the client that the server is able to deal +with objects using hash algorithm X. If not specified, the server is assumed to +only handle SHA-1. If the client would like to use a hash algorithm other than +SHA-1, it should specify its object-format string. From 67e9a70741c577b6e49bd60556779c7ab32ae4f8 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:15 +0000 Subject: [PATCH 29/44] connect: pass full packet reader when parsing v2 refs When we're parsing refs, we need to know not only what the line we're parsing is, but also the hash algorithm we should use to parse it, which is stored in the reader object. Pass the packet reader object through to the protocol v2 ref parsing function. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- connect.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/connect.c b/connect.c index 915f1736a0..1d05bc56ed 100644 --- a/connect.c +++ b/connect.c @@ -374,7 +374,7 @@ struct ref **get_remote_heads(struct packet_reader *reader, } /* Returns 1 when a valid ref has been added to `list`, 0 otherwise */ -static int process_ref_v2(const char *line, struct ref ***list) +static int process_ref_v2(struct packet_reader *reader, struct ref ***list) { int ret = 1; int i = 0; @@ -382,6 +382,7 @@ static int process_ref_v2(const char *line, struct ref ***list) struct ref *ref; struct string_list line_sections = STRING_LIST_INIT_DUP; const char *end; + const char *line = reader->line; /* * Ref lines have a number of fields which are space deliminated. The @@ -469,7 +470,7 @@ struct ref **get_remote_refs(int fd_out, struct packet_reader *reader, /* Process response from server */ while (packet_reader_read(reader) == PACKET_READ_NORMAL) { - if (!process_ref_v2(reader->line, &list)) + if (!process_ref_v2(reader, &list)) die(_("invalid ls-refs response: %s"), reader->line); } From ab67235bc4900d8203a1a6b6f33cf8afa845e43e Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:16 +0000 Subject: [PATCH 30/44] connect: parse v2 refs with correct hash algorithm When using protocol v2, we need to know what hash algorithm is used by the remote end. See if the server has sent us an object-format capability, and if so, use it to determine the hash algorithm in use and set that value in the packet reader. Parse the refs using this algorithm. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- connect.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/connect.c b/connect.c index 1d05bc56ed..66650ff2d3 100644 --- a/connect.c +++ b/connect.c @@ -283,7 +283,7 @@ static int process_ref(const struct packet_reader *reader, int len, die(_("protocol error: unexpected capabilities^{}")); } else if (check_ref(name, flags)) { struct ref *ref = alloc_ref(name); - memcpy(ref->old_oid.hash, old_oid.hash, reader->hash_algo->rawsz); + oidcpy(&ref->old_oid, &old_oid); **list = ref; *list = &ref->next; } @@ -395,7 +395,7 @@ static int process_ref_v2(struct packet_reader *reader, struct ref ***list) goto out; } - if (parse_oid_hex(line_sections.items[i++].string, &old_oid, &end) || + if (parse_oid_hex_algop(line_sections.items[i++].string, &old_oid, &end, reader->hash_algo) || *end) { ret = 0; goto out; @@ -403,7 +403,7 @@ static int process_ref_v2(struct packet_reader *reader, struct ref ***list) ref = alloc_ref(line_sections.items[i++].string); - oidcpy(&ref->old_oid, &old_oid); + memcpy(ref->old_oid.hash, old_oid.hash, reader->hash_algo->rawsz); **list = ref; *list = &ref->next; @@ -416,7 +416,8 @@ static int process_ref_v2(struct packet_reader *reader, struct ref ***list) struct object_id peeled_oid; char *peeled_name; struct ref *peeled; - if (parse_oid_hex(arg, &peeled_oid, &end) || *end) { + if (parse_oid_hex_algop(arg, &peeled_oid, &end, + reader->hash_algo) || *end) { ret = 0; goto out; } @@ -424,7 +425,8 @@ static int process_ref_v2(struct packet_reader *reader, struct ref ***list) peeled_name = xstrfmt("%s^{}", ref->name); peeled = alloc_ref(peeled_name); - oidcpy(&peeled->old_oid, &peeled_oid); + memcpy(peeled->old_oid.hash, peeled_oid.hash, + reader->hash_algo->rawsz); **list = peeled; *list = &peeled->next; @@ -443,6 +445,7 @@ struct ref **get_remote_refs(int fd_out, struct packet_reader *reader, const struct string_list *server_options) { int i; + const char *hash_name; *list = NULL; if (server_supports_v2("ls-refs", 1)) @@ -451,6 +454,14 @@ struct ref **get_remote_refs(int fd_out, struct packet_reader *reader, if (server_supports_v2("agent", 0)) packet_write_fmt(fd_out, "agent=%s", git_user_agent_sanitized()); + if (server_feature_v2("object-format", &hash_name)) { + int hash_algo = hash_algo_by_name(hash_name); + if (hash_algo == GIT_HASH_UNKNOWN) + die(_("unknown object format '%s' specified by server"), hash_name); + reader->hash_algo = &hash_algos[hash_algo]; + packet_write_fmt(fd_out, "object-format=%s", reader->hash_algo->name); + } + if (server_options && server_options->nr && server_supports_v2("server-option", 1)) for (i = 0; i < server_options->nr; i++) From 9de0dd361c9ea2ca6eca14a7dd43fe11d170a253 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:17 +0000 Subject: [PATCH 31/44] serve: advertise object-format capability for protocol v2 In order to communicate the protocol supported by the server side, add support for advertising the object-format capability. We check that the client side sends us an identical algorithm if it sends us its own object-format capability, and assume it speaks SHA-1 if not. In the test, when we're using an algorithm other than SHA-1, we need to specify the algorithm in use so we don't get a failure with an "unknown format" message. Add a test that we handle a mismatched algorithm. Remove the test_oid_init call since it's no longer necessary. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- connect.c | 2 ++ serve.c | 27 +++++++++++++++++++++++++++ t/t5701-git-serve.sh | 25 +++++++++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/connect.c b/connect.c index 66650ff2d3..2ada5b5161 100644 --- a/connect.c +++ b/connect.c @@ -460,6 +460,8 @@ struct ref **get_remote_refs(int fd_out, struct packet_reader *reader, die(_("unknown object format '%s' specified by server"), hash_name); reader->hash_algo = &hash_algos[hash_algo]; packet_write_fmt(fd_out, "object-format=%s", reader->hash_algo->name); + } else { + reader->hash_algo = &hash_algos[GIT_HASH_SHA1]; } if (server_options && server_options->nr && diff --git a/serve.c b/serve.c index 317256c1a4..7ab7807fef 100644 --- a/serve.c +++ b/serve.c @@ -22,6 +22,14 @@ static int agent_advertise(struct repository *r, return 1; } +static int object_format_advertise(struct repository *r, + struct strbuf *value) +{ + if (value) + strbuf_addstr(value, r->hash_algo->name); + return 1; +} + struct protocol_capability { /* * The name of the capability. The server uses this name when @@ -57,6 +65,7 @@ static struct protocol_capability capabilities[] = { { "ls-refs", always_advertise, ls_refs }, { "fetch", upload_pack_advertise, upload_pack_v2 }, { "server-option", always_advertise, NULL }, + { "object-format", object_format_advertise, NULL }, }; static void advertise_capabilities(void) @@ -153,6 +162,22 @@ int has_capability(const struct argv_array *keys, const char *capability, return 0; } +static void check_algorithm(struct repository *r, struct argv_array *keys) +{ + int client = GIT_HASH_SHA1, server = hash_algo_by_ptr(r->hash_algo); + const char *algo_name; + + if (has_capability(keys, "object-format", &algo_name)) { + client = hash_algo_by_name(algo_name); + if (client == GIT_HASH_UNKNOWN) + die("unknown object format '%s'", algo_name); + } + + if (client != server) + die("mismatched object format: server %s; client %s\n", + r->hash_algo->name, hash_algos[client].name); +} + enum request_state { PROCESS_REQUEST_KEYS, PROCESS_REQUEST_DONE, @@ -223,6 +248,8 @@ static int process_request(void) if (!command) die("no command requested"); + check_algorithm(the_repository, &keys); + command->command(the_repository, &keys, &reader); argv_array_clear(&keys); diff --git a/t/t5701-git-serve.sh b/t/t5701-git-serve.sh index ffb9613885..a1f5fdc9fd 100755 --- a/t/t5701-git-serve.sh +++ b/t/t5701-git-serve.sh @@ -5,12 +5,17 @@ test_description='test protocol v2 server commands' . ./test-lib.sh test_expect_success 'test capability advertisement' ' + test_oid_cache <<-EOF && + wrong_algo sha1:sha256 + wrong_algo sha256:sha1 + EOF cat >expect <<-EOF && version 2 agent=git/$(git version | cut -d" " -f3) ls-refs fetch=shallow server-option + object-format=$(test_oid algo) 0000 EOF @@ -45,6 +50,7 @@ test_expect_success 'request invalid capability' ' test_expect_success 'request with no command' ' test-tool pkt-line pack >in <<-EOF && agent=git/test + object-format=$(test_oid algo) 0000 EOF test_must_fail test-tool serve-v2 --stateless-rpc 2>err in <<-EOF && command=foo + object-format=$(test_oid algo) agent=git/test 0000 EOF @@ -61,6 +68,17 @@ test_expect_success 'request invalid command' ' test_i18ngrep "invalid command" err ' +test_expect_success 'wrong object-format' ' + test-tool pkt-line pack >in <<-EOF && + command=fetch + agent=git/test + object-format=$(test_oid wrong_algo) + 0000 + EOF + test_must_fail test-tool serve-v2 --stateless-rpc 2>err in <<-EOF && command=ls-refs + object-format=$(test_oid algo) 0000 EOF @@ -96,6 +115,7 @@ test_expect_success 'basics of ls-refs' ' test_expect_success 'basic ref-prefixes' ' test-tool pkt-line pack >in <<-EOF && command=ls-refs + object-format=$(test_oid algo) 0001 ref-prefix refs/heads/master ref-prefix refs/tags/one @@ -116,6 +136,7 @@ test_expect_success 'basic ref-prefixes' ' test_expect_success 'refs/heads prefix' ' test-tool pkt-line pack >in <<-EOF && command=ls-refs + object-format=$(test_oid algo) 0001 ref-prefix refs/heads/ 0000 @@ -136,6 +157,7 @@ test_expect_success 'refs/heads prefix' ' test_expect_success 'peel parameter' ' test-tool pkt-line pack >in <<-EOF && command=ls-refs + object-format=$(test_oid algo) 0001 peel ref-prefix refs/tags/ @@ -157,6 +179,7 @@ test_expect_success 'peel parameter' ' test_expect_success 'symrefs parameter' ' test-tool pkt-line pack >in <<-EOF && command=ls-refs + object-format=$(test_oid algo) 0001 symrefs ref-prefix refs/heads/ @@ -178,6 +201,7 @@ test_expect_success 'symrefs parameter' ' test_expect_success 'sending server-options' ' test-tool pkt-line pack >in <<-EOF && command=ls-refs + object-format=$(test_oid algo) server-option=hello server-option=world 0001 @@ -200,6 +224,7 @@ test_expect_success 'unexpected lines are not allowed in fetch request' ' test-tool pkt-line pack >in <<-EOF && command=fetch + object-format=$(test_oid algo) 0001 this-is-not-a-command 0000 From f0af95f42455b03e3dd83f27f24cb6203eef99ae Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:18 +0000 Subject: [PATCH 32/44] t5500: make hash independent This test has hard-coded pkt-lines with object IDs. The pkt-line lengths necessarily differ between hash algorithms, so generate these lines with the packetize helper so they're always the right size. In addition, we will require an object-format capability for SHA-256, so pass that capability on to the upload-pack process. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t5500-fetch-pack.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/t/t5500-fetch-pack.sh b/t/t5500-fetch-pack.sh index 52dd1a688c..8fee99ecfb 100755 --- a/t/t5500-fetch-pack.sh +++ b/t/t5500-fetch-pack.sh @@ -871,9 +871,10 @@ test_expect_success 'shallow since with commit graph and already-seen commit' ' GIT_PROTOCOL=version=2 git upload-pack . <<-EOF >/dev/null 0012command=fetch + $(echo "object-format=$(test_oid algo)" | packetize) 00010013deepen-since 1 - 0032want $(git rev-parse other) - 0032have $(git rev-parse master) + $(echo "want $(git rev-parse other)" | packetize) + $(echo "have $(git rev-parse master)" | packetize) 0000 EOF ) From d96dab868e63cd5a55b50e1d8a23b640e17413bb Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:19 +0000 Subject: [PATCH 33/44] builtin/ls-remote: initialize repository based on fetch ls-remote may or may not operate within a repository, and as such will not have been initialized with the repository's hash algorithm. Even if it were, the remote side could be using a different algorithm and we would still want to display those refs properly. Find the hash algorithm used by the remote side by querying the transport object and set our hash algorithm accordingly. Without this change, if the remote side is using SHA-256, we truncate the refs to 40 hex characters, since that's the length of the default hash algorithm (SHA-1). Note that technically this is not a correct setting of the repository hash algorithm since, if we are in a repository, it might be one of a different hash algorithm from the remote side. However, our current code paths don't handle multiple algorithms and won't for some time, so this is the best we can do. We rely on the fact that ls-remote never modifies the current repository, which is a reasonable assumption to make. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/ls-remote.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/builtin/ls-remote.c b/builtin/ls-remote.c index 6ef519514b..3a4dd12903 100644 --- a/builtin/ls-remote.c +++ b/builtin/ls-remote.c @@ -118,6 +118,10 @@ int cmd_ls_remote(int argc, const char **argv, const char *prefix) transport->server_options = &server_options; ref = transport_get_remote_refs(transport, &ref_prefixes); + if (ref) { + int hash_algo = hash_algo_by_ptr(transport_get_hash_algo(transport)); + repo_set_hash_algo(the_repository, hash_algo); + } if (transport_disconnect(transport)) { UNLEAK(sorting); return 1; From ac093d0790cf9073e6cb03744b1d0fbc1e07d3f7 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Fri, 19 Jun 2020 17:55:51 +0000 Subject: [PATCH 34/44] remote-curl: detect algorithm for dumb HTTP by size When reading the info/refs file for a repository, we have no explicit way to detect which hash algorithm is in use because the file doesn't provide one. Detect the hash algorithm in use by the size of the first object ID. If we have an empty repository, we don't know what the hash algorithm is on the remote side, so default to whatever the local side has configured. Without doing this, we cannot clone an empty repository since we don't know its hash algorithm. Test this case appropriately, since we currently have no tests for cloning an empty repository with the dumb HTTP protocol. We anonymize the URL like elsewhere in the function in case the user has decided to include a secret in the URL. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- remote-curl.c | 23 +++++++++++++++++++++-- t/t5550-http-fetch-dumb.sh | 18 ++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/remote-curl.c b/remote-curl.c index 3ed0dfec1b..774a1ca71f 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -252,6 +252,19 @@ static struct ref *parse_git_refs(struct discovery *heads, int for_push) return list; } +static const struct git_hash_algo *detect_hash_algo(struct discovery *heads) +{ + const char *p = memchr(heads->buf, '\t', heads->len); + int algo; + if (!p) + return the_hash_algo; + + algo = hash_algo_by_length((p - heads->buf) / 2); + if (algo == GIT_HASH_UNKNOWN) + return NULL; + return &hash_algos[algo]; +} + static struct ref *parse_info_refs(struct discovery *heads) { char *data, *start, *mid; @@ -262,6 +275,12 @@ static struct ref *parse_info_refs(struct discovery *heads) struct ref *ref = NULL; struct ref *last_ref = NULL; + options.hash_algo = detect_hash_algo(heads); + if (!options.hash_algo) + die("%sinfo/refs not valid: could not determine hash algorithm; " + "is this a git repository?", + transport_anonymize_url(url.buf)); + data = heads->buf; start = NULL; mid = data; @@ -272,13 +291,13 @@ static struct ref *parse_info_refs(struct discovery *heads) if (data[i] == '\t') mid = &data[i]; if (data[i] == '\n') { - if (mid - start != the_hash_algo->hexsz) + if (mid - start != options.hash_algo->hexsz) die(_("%sinfo/refs not valid: is this a git repository?"), transport_anonymize_url(url.buf)); data[i] = 0; ref_name = mid + 1; ref = alloc_ref(ref_name); - get_oid_hex(start, &ref->old_oid); + get_oid_hex_algop(start, &ref->old_oid, options.hash_algo); if (!refs) refs = ref; if (last_ref) diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh index 50485300eb..e57716bacd 100755 --- a/t/t5550-http-fetch-dumb.sh +++ b/t/t5550-http-fetch-dumb.sh @@ -50,6 +50,24 @@ test_expect_success 'create password-protected repository' ' "$HTTPD_DOCUMENT_ROOT_PATH/auth/dumb/repo.git" ' +test_expect_success 'create empty remote repository' ' + git init --bare "$HTTPD_DOCUMENT_ROOT_PATH/empty.git" && + (cd "$HTTPD_DOCUMENT_ROOT_PATH/empty.git" && + mkdir -p hooks && + write_script "hooks/post-update" <<-\EOF && + exec git update-server-info + EOF + hooks/post-update + ) +' + +test_expect_success 'empty dumb HTTP repository has default hash algorithm' ' + test_when_finished "rm -fr clone-empty" && + git clone $HTTPD_URL/dumb/empty.git clone-empty && + git -C clone-empty rev-parse --show-object-format >empty-format && + test "$(cat empty-format)" = "$(test_oid algo)" +' + setup_askpass_helper test_expect_success 'cloning password-protected repository can fail' ' From 586740aa6e70013a837e0d9f3e0ea04c7f180fbd Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Fri, 19 Jun 2020 17:55:52 +0000 Subject: [PATCH 35/44] builtin/index-pack: add option to specify hash algorithm git index-pack is usually run in a repository, but need not be. Since packs don't contains information on the algorithm in use, instead relying on context, add an option to index-pack to tell it which one we're using in case someone runs it outside of a repository. Since using --stdin necessarily implies a repository, don't allow specifying an object format if it's provided to prevent users from passing an option that won't work. Add documentation for this option. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- Documentation/git-index-pack.txt | 8 ++++++++ builtin/index-pack.c | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/Documentation/git-index-pack.txt b/Documentation/git-index-pack.txt index d5b7560bfe..9316d9a80b 100644 --- a/Documentation/git-index-pack.txt +++ b/Documentation/git-index-pack.txt @@ -93,6 +93,14 @@ OPTIONS --max-input-size=:: Die, if the pack is larger than . +--object-format=:: + Specify the given object format (hash algorithm) for the pack. The valid + values are 'sha1' and (if enabled) 'sha256'. The default is the algorithm for + the current repository (set by `extensions.objectFormat`), or 'sha1' if no + value is set or outside a repository. ++ +This option cannot be used with --stdin. + NOTES ----- diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 7bea1fba52..f865666db9 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1667,6 +1667,7 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) unsigned char pack_hash[GIT_MAX_RAWSZ]; unsigned foreign_nr = 1; /* zero is a "good" value, assume bad */ int report_end_of_input = 0; + int hash_algo = 0; /* * index-pack never needs to fetch missing objects except when @@ -1760,6 +1761,11 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) die(_("bad %s"), arg); } else if (skip_prefix(arg, "--max-input-size=", &arg)) { max_input_size = strtoumax(arg, NULL, 10); + } else if (skip_prefix(arg, "--object-format=", &arg)) { + hash_algo = hash_algo_by_name(arg); + if (hash_algo == GIT_HASH_UNKNOWN) + die(_("unknown hash algorithm '%s'"), arg); + repo_set_hash_algo(the_repository, hash_algo); } else usage(index_pack_usage); continue; @@ -1776,6 +1782,8 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) die(_("--fix-thin cannot be used without --stdin")); if (from_stdin && !startup_info->have_repository) die(_("--stdin requires a git repository")); + if (from_stdin && hash_algo) + die(_("--object-format cannot be used with --stdin")); if (!index_name && pack_name) index_name = derive_filename(pack_name, "idx", &index_name_buf); From 793731f742ab1df28deb289714e61244e3de3b37 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Fri, 19 Jun 2020 17:55:53 +0000 Subject: [PATCH 36/44] t1050: pass algorithm to index-pack when outside repo When outside a repository, git index-pack is unable to guess the hash algorithm in use for a pack, since packs don't contain any information on the algorithm in use. Pass an option to index-pack to help it out in this test. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t1050-large.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/t/t1050-large.sh b/t/t1050-large.sh index 7f88ea07c2..6a56d1ca24 100755 --- a/t/t1050-large.sh +++ b/t/t1050-large.sh @@ -12,6 +12,7 @@ file_size () { } test_expect_success setup ' + test_oid_init && # clone does not allow us to pass core.bigfilethreshold to # new repos, so set core.bigfilethreshold globally git config --global core.bigfilethreshold 200k && @@ -177,7 +178,8 @@ test_expect_success 'git-show a large file' ' test_expect_success 'index-pack' ' git clone file://"$(pwd)"/.git foo && - GIT_DIR=non-existent git index-pack --strict --verify foo/.git/objects/pack/*.pack + GIT_DIR=non-existent git index-pack --object-format=$(test_oid algo) \ + --strict --verify foo/.git/objects/pack/*.pack ' test_expect_success 'repack' ' From 97997e6ad2790e58c727c180548a3d0073c4c1d2 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Fri, 19 Jun 2020 17:55:54 +0000 Subject: [PATCH 37/44] remote-curl: avoid truncating refs with ls-remote Normally, the remote-curl transport helper is aware of the hash algorithm we're using because we're in a repo with the appropriate hash algorithm set. However, when using git ls-remote outside of a repository, we won't have initialized the hash algorithm properly, so use hash_to_hex_algop to print the ref corresponding to the algorithm we've detected. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- remote-curl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/remote-curl.c b/remote-curl.c index 774a1ca71f..0090e68f4a 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -548,7 +548,9 @@ static void output_refs(struct ref *refs) if (posn->symref) printf("@%s %s\n", posn->symref, posn->name); else - printf("%s %s\n", oid_to_hex(&posn->old_oid), posn->name); + printf("%s %s\n", hash_to_hex_algop(posn->old_oid.hash, + options.hash_algo), + posn->name); } printf("\n"); fflush(stdout); From 54cbbe4c6e4f82a765e70bcd28172e7d56542991 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Fri, 19 Jun 2020 17:55:55 +0000 Subject: [PATCH 38/44] t/helper: initialize the repository for test-sha1-array test-sha1-array uses the_hash_algo under the hood. Since t0064 wants to use the value that is correct for the hash algorithm that we're testing, make sure the test helper initializes the repository to set the_hash_algo correctly. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/helper/test-oid-array.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/t/helper/test-oid-array.c b/t/helper/test-oid-array.c index ce9fd5f091..b16cd0b11b 100644 --- a/t/helper/test-oid-array.c +++ b/t/helper/test-oid-array.c @@ -12,6 +12,9 @@ int cmd__oid_array(int argc, const char **argv) { struct oid_array array = OID_ARRAY_INIT; struct strbuf line = STRBUF_INIT; + int nongit_ok; + + setup_git_directory_gently(&nongit_ok); while (strbuf_getline(&line, stdin) != EOF) { const char *arg; From 8fc700354045ccf1eb3c10fe0f7d6cb2c6427628 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Fri, 19 Jun 2020 17:55:56 +0000 Subject: [PATCH 39/44] t5702: offer an object-format capability in the test In order to make this test work with SHA-256, offer an object-format capability so that both sides use the same algorithm. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t5702-protocol-v2.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh index 5039e66dc4..116358b9ac 100755 --- a/t/t5702-protocol-v2.sh +++ b/t/t5702-protocol-v2.sh @@ -13,6 +13,7 @@ start_git_daemon --export-all --enable=receive-pack daemon_parent=$GIT_DAEMON_DOCUMENT_ROOT_PATH/parent test_expect_success 'create repo to be served by git-daemon' ' + test_oid_init && git init "$daemon_parent" && test_commit -C "$daemon_parent" one ' @@ -394,6 +395,7 @@ test_expect_success 'even with handcrafted request, filter does not work if not # Custom request that tries to filter even though it is not advertised. test-tool pkt-line pack >in <<-EOF && command=fetch + object-format=$(test_oid algo) 0001 want $(git -C server rev-parse master) filter blob:none From f7c6a3bf080aae499e0aa0c69aa0d86423bd24f8 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Fri, 19 Jun 2020 17:55:57 +0000 Subject: [PATCH 40/44] t5703: use object-format serve option When we're using an algorithm other than SHA-1, we need to specify the algorithm in use so we don't get a failure with an "unknown format" message. Add a wrapper function that specifies this header if required. Skip specifying this header for SHA-1 to test that it works both with an without this header. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t5703-upload-pack-ref-in-want.sh | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/t/t5703-upload-pack-ref-in-want.sh b/t/t5703-upload-pack-ref-in-want.sh index a34460f7d8..afe7f7f919 100755 --- a/t/t5703-upload-pack-ref-in-want.sh +++ b/t/t5703-upload-pack-ref-in-want.sh @@ -27,6 +27,15 @@ check_output () { test_cmp sorted_commits actual_commits } +write_command () { + echo "command=$1" + + if test "$(test_oid algo)" != sha1 + then + echo "object-format=$(test_oid algo)" + fi +} + # c(o/foo) d(o/bar) # \ / # b e(baz) f(master) @@ -62,7 +71,7 @@ test_expect_success 'config controls ref-in-want advertisement' ' test_expect_success 'invalid want-ref line' ' test-tool pkt-line pack >in <<-EOF && - command=fetch + $(write_command fetch) 0001 no-progress want-ref refs/heads/non-existent @@ -83,7 +92,7 @@ test_expect_success 'basic want-ref' ' oid=$(git rev-parse a) && test-tool pkt-line pack >in <<-EOF && - command=fetch + $(write_command fetch) 0001 no-progress want-ref refs/heads/master @@ -107,7 +116,7 @@ test_expect_success 'multiple want-ref lines' ' oid=$(git rev-parse b) && test-tool pkt-line pack >in <<-EOF && - command=fetch + $(write_command fetch) 0001 no-progress want-ref refs/heads/o/foo @@ -129,7 +138,7 @@ test_expect_success 'mix want and want-ref' ' git rev-parse e f >expected_commits && test-tool pkt-line pack >in <<-EOF && - command=fetch + $(write_command fetch) 0001 no-progress want-ref refs/heads/master @@ -152,7 +161,7 @@ test_expect_success 'want-ref with ref we already have commit for' ' oid=$(git rev-parse c) && test-tool pkt-line pack >in <<-EOF && - command=fetch + $(write_command fetch) 0001 no-progress want-ref refs/heads/o/foo From 4ddd3f506390af217fb86508691cf07f3279e6d2 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Fri, 19 Jun 2020 17:55:58 +0000 Subject: [PATCH 41/44] t5704: send object-format capability with SHA-256 When we speak protocol v2 in this test, we must pass the object-format header if the algorithm is not SHA-1. Otherwise, git upload-pack fails because the hash algorithm doesn't match and not because we've failed to speak the protocol correctly. Pass the header so that our assertions test what we're really interested in. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t5704-protocol-violations.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/t/t5704-protocol-violations.sh b/t/t5704-protocol-violations.sh index 950cfb21fe..5c941949b9 100755 --- a/t/t5704-protocol-violations.sh +++ b/t/t5704-protocol-violations.sh @@ -9,6 +9,7 @@ making sure that we do not segfault or otherwise behave badly.' test_expect_success 'extra delim packet in v2 ls-refs args' ' { packetize command=ls-refs && + packetize "object-format=$(test_oid algo)" && printf 0001 && # protocol expects 0000 flush here printf 0001 @@ -21,6 +22,7 @@ test_expect_success 'extra delim packet in v2 ls-refs args' ' test_expect_success 'extra delim packet in v2 fetch args' ' { packetize command=fetch && + packetize "object-format=$(test_oid algo)" && printf 0001 && # protocol expects 0000 flush here printf 0001 From 371c4079f4d14bd9412fc62478407b319f13e3e1 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Fri, 19 Jun 2020 17:55:59 +0000 Subject: [PATCH 42/44] t5300: pass --object-format to git index-pack git index-pack by default reads the repository to determine the object format. However, when outside of a repository, it's necessary to specify the hash algorithm in use so that the pack can be properly indexed. Add an --object-format argument when invoking git index-pack outside of a repository. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t5300-pack-object.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh index 410a09b0dd..746cdb626e 100755 --- a/t/t5300-pack-object.sh +++ b/t/t5300-pack-object.sh @@ -12,7 +12,8 @@ TRASH=$(pwd) test_expect_success \ 'setup' \ - 'rm -f .git/index* && + 'test_oid_init && + rm -f .git/index* && perl -e "print \"a\" x 4096;" > a && perl -e "print \"b\" x 4096;" > b && perl -e "print \"c\" x 4096;" > c && @@ -412,18 +413,18 @@ test_expect_success 'set up pack for non-repo tests' ' ' test_expect_success 'index-pack --stdin complains of non-repo' ' - nongit test_must_fail git index-pack --stdin works in non-repo' ' - nongit git index-pack ../foo.pack && + nongit git index-pack --object-format=$(test_oid algo) ../foo.pack && test_path_is_file foo.idx ' test_expect_success 'index-pack --strict works in non-repo' ' rm -f foo.idx && - nongit git index-pack --strict ../foo.pack && + nongit git index-pack --strict --object-format=$(test_oid algo) ../foo.pack && test_path_is_file foo.idx ' From 6161ce7bbeeb128dd1a176d8355e2ce18168b16a Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Fri, 19 Jun 2020 17:56:00 +0000 Subject: [PATCH 43/44] bundle: detect hash algorithm when reading refs Much like with the dumb HTTP transport, there isn't a way to explicitly specify the hash algorithm when dealing with a bundle, so detect the algorithm based on the length of the object IDs in the prerequisites and ref advertisements. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- bundle.c | 22 +++++++++++++++++++++- bundle.h | 1 + transport.c | 10 ++++++++-- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/bundle.c b/bundle.c index 99439e07a1..2a0d744d3f 100644 --- a/bundle.c +++ b/bundle.c @@ -23,6 +23,17 @@ static void add_to_ref_list(const struct object_id *oid, const char *name, list->nr++; } +static const struct git_hash_algo *detect_hash_algo(struct strbuf *buf) +{ + size_t len = strcspn(buf->buf, " \n"); + int algo; + + algo = hash_algo_by_length(len / 2); + if (algo == GIT_HASH_UNKNOWN) + return NULL; + return &hash_algos[algo]; +} + static int parse_bundle_header(int fd, struct bundle_header *header, const char *report_path) { @@ -52,12 +63,21 @@ static int parse_bundle_header(int fd, struct bundle_header *header, } strbuf_rtrim(&buf); + if (!header->hash_algo) { + header->hash_algo = detect_hash_algo(&buf); + if (!header->hash_algo) { + error(_("unknown hash algorithm length")); + status = -1; + break; + } + } + /* * Tip lines have object name, SP, and refname. * Prerequisites have object name that is optionally * followed by SP and subject line. */ - if (parse_oid_hex(buf.buf, &oid, &p) || + if (parse_oid_hex_algop(buf.buf, &oid, &p, header->hash_algo) || (*p && !isspace(*p)) || (!is_prereq && !*p)) { if (report_path) diff --git a/bundle.h b/bundle.h index ceab0c7475..2dc9442024 100644 --- a/bundle.h +++ b/bundle.h @@ -15,6 +15,7 @@ struct ref_list { struct bundle_header { struct ref_list prerequisites; struct ref_list references; + const struct git_hash_algo *hash_algo; }; int is_bundle(const char *path, int quiet); diff --git a/transport.c b/transport.c index b43d985f90..38a432be69 100644 --- a/transport.c +++ b/transport.c @@ -143,6 +143,9 @@ static struct ref *get_refs_from_bundle(struct transport *transport, data->fd = read_bundle_header(transport->url, &data->header); if (data->fd < 0) die(_("could not read bundle '%s'"), transport->url); + + transport->hash_algo = data->header.hash_algo; + for (i = 0; i < data->header.references.nr; i++) { struct ref_list_entry *e = data->header.references.list + i; struct ref *ref = alloc_ref(e->name); @@ -157,11 +160,14 @@ static int fetch_refs_from_bundle(struct transport *transport, int nr_heads, struct ref **to_fetch) { struct bundle_transport_data *data = transport->data; + int ret; if (!data->get_refs_from_bundle_called) get_refs_from_bundle(transport, 0, NULL); - return unbundle(the_repository, &data->header, data->fd, - transport->progress ? BUNDLE_VERBOSE : 0); + ret = unbundle(the_repository, &data->header, data->fd, + transport->progress ? BUNDLE_VERBOSE : 0); + transport->hash_algo = data->header.hash_algo; + return ret; } static int close_bundle(struct transport *transport) From 3716d50dd5c8ee7e5ccaa89fcbfff2cd1b82ad1d Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Fri, 19 Jun 2020 17:56:01 +0000 Subject: [PATCH 44/44] remote-testgit: adapt for object-format When using an algorithm other than SHA-1, we need the remote helper to advertise support for the object-format extension and provide information back to us so that we can properly parse refs and return data. Ensure that the test remote helper understands these extensions. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t5801/git-remote-testgit | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/t/t5801/git-remote-testgit b/t/t5801/git-remote-testgit index 6b9f0b5dc7..1544d6dc6b 100755 --- a/t/t5801/git-remote-testgit +++ b/t/t5801/git-remote-testgit @@ -52,9 +52,11 @@ do test -n "$GIT_REMOTE_TESTGIT_SIGNED_TAGS" && echo "signed-tags" test -n "$GIT_REMOTE_TESTGIT_NO_PRIVATE_UPDATE" && echo "no-private-update" echo 'option' + echo 'object-format' echo ;; list) + echo ":object-format $(git rev-parse --show-object-format=storage)" git for-each-ref --format='? %(refname)' 'refs/heads/' 'refs/tags/' head=$(git symbolic-ref HEAD) echo "@$head HEAD" @@ -139,6 +141,10 @@ do test $val = "true" && force="true" || force= echo "ok" ;; + object-format) + test $val = "true" && object_format="true" || object_format= + echo "ok" + ;; *) echo "unsupported" ;;