fast-import: let importers retrieve blobs

New objects written by fast-import are not available immediately.
Until a checkpoint has been started and finishes writing the pack
index, any new blobs will not be accessible using standard git tools.

So introduce a new way to access them: a "cat-blob" command in the
command stream requests for fast-import to print a blob to stdout or a
file descriptor specified by the argument to --cat-blob-fd.  The value
for cat-blob-fd cannot be specified in the stream because that would
be a layering violation: the decision of where to direct a stream has
to be made when fast-import is started anyway, so we might as well
make the stream format is independent of that detail.

Output uses the same format as "git cat-file --batch".

Thanks to Sverre Rabbelier and Sam Vilain for guidance in designing
the protocol.

Based-on-patch-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
Acked-by: Ramkumar Ramachandra <artagnon@gmail.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
David Barr 2010-11-28 13:45:01 -06:00 committed by Junio C Hamano
parent 4980fffb2c
commit 85c62395b1
3 changed files with 329 additions and 2 deletions

View File

@ -92,6 +92,11 @@ OPTIONS
--(no-)-relative-marks= with the --(import|export)-marks= --(no-)-relative-marks= with the --(import|export)-marks=
options. options.
--cat-blob-fd=<fd>::
Specify the file descriptor that will be written to
when the `cat-blob` command is encountered in the stream.
The default behaviour is to write to `stdout`.
--export-pack-edges=<file>:: --export-pack-edges=<file>::
After creating a packfile, print a line of data to After creating a packfile, print a line of data to
<file> listing the filename of the packfile and the last <file> listing the filename of the packfile and the last
@ -320,6 +325,11 @@ and control the current import process. More detailed discussion
standard output. This command is optional and is not needed standard output. This command is optional and is not needed
to perform an import. to perform an import.
`cat-blob`::
Causes fast-import to print a blob in 'cat-file --batch'
format to the file descriptor set with `--cat-blob-fd` or
`stdout` if unspecified.
`feature`:: `feature`::
Require that fast-import supports the specified feature, or Require that fast-import supports the specified feature, or
abort if it does not. abort if it does not.
@ -879,6 +889,29 @@ Placing a `progress` command immediately after a `checkpoint` will
inform the reader when the `checkpoint` has been completed and it inform the reader when the `checkpoint` has been completed and it
can safely access the refs that fast-import updated. can safely access the refs that fast-import updated.
`cat-blob`
~~~~~~~~~~
Causes fast-import to print a blob to a file descriptor previously
arranged with the `--cat-blob-fd` argument. The command otherwise
has no impact on the current import; its main purpose is to
retrieve blobs that may be in fast-import's memory but not
accessible from the target repository.
....
'cat-blob' SP <dataref> LF
....
The `<dataref>` can be either a mark reference (`:<idnum>`)
set previously or a full 40-byte SHA-1 of a Git blob, preexisting or
ready to be written.
output uses the same format as `git cat-file --batch`:
====
<sha1> SP 'blob' SP <size> LF
<contents> LF
====
`feature` `feature`
~~~~~~~~~ ~~~~~~~~~
Require that fast-import supports the specified feature, or abort if Require that fast-import supports the specified feature, or abort if
@ -905,6 +938,13 @@ import-marks::
second, an --import-marks= command-line option overrides second, an --import-marks= command-line option overrides
any "feature import-marks" command in the stream. any "feature import-marks" command in the stream.
cat-blob::
Ignored. Versions of fast-import not supporting the
"cat-blob" command will exit with a message indicating so.
This lets the import error out early with a clear message,
rather than wasting time on the early part of an import
before the unsupported command is detected.
`option` `option`
~~~~~~~~ ~~~~~~~~
Processes the specified option so that git fast-import behaves in a Processes the specified option so that git fast-import behaves in a
@ -930,6 +970,7 @@ not be passed as option:
* date-format * date-format
* import-marks * import-marks
* export-marks * export-marks
* cat-blob-fd
* force * force
Crash Reports Crash Reports

View File

@ -55,6 +55,8 @@ Format of STDIN stream:
('from' sp committish lf)? ('from' sp committish lf)?
lf?; lf?;
cat_blob ::= 'cat-blob' sp (hexsha1 | idnum) lf;
checkpoint ::= 'checkpoint' lf checkpoint ::= 'checkpoint' lf
lf?; lf?;
@ -361,6 +363,9 @@ static uintmax_t next_mark;
static struct strbuf new_data = STRBUF_INIT; static struct strbuf new_data = STRBUF_INIT;
static int seen_data_command; static int seen_data_command;
/* Where to write output of cat-blob commands */
static int cat_blob_fd = STDOUT_FILENO;
static void parse_argv(void); static void parse_argv(void);
static void write_branch_report(FILE *rpt, struct branch *b) static void write_branch_report(FILE *rpt, struct branch *b)
@ -2689,6 +2694,81 @@ static void parse_reset_branch(void)
unread_command_buf = 1; unread_command_buf = 1;
} }
static void cat_blob_write(const char *buf, unsigned long size)
{
if (write_in_full(cat_blob_fd, buf, size) != size)
die_errno("Write to frontend failed");
}
static void cat_blob(struct object_entry *oe, unsigned char sha1[20])
{
struct strbuf line = STRBUF_INIT;
unsigned long size;
enum object_type type = 0;
char *buf;
if (!oe || oe->pack_id == MAX_PACK_ID) {
buf = read_sha1_file(sha1, &type, &size);
} else {
type = oe->type;
buf = gfi_unpack_entry(oe, &size);
}
/*
* Output based on batch_one_object() from cat-file.c.
*/
if (type <= 0) {
strbuf_reset(&line);
strbuf_addf(&line, "%s missing\n", sha1_to_hex(sha1));
cat_blob_write(line.buf, line.len);
strbuf_release(&line);
free(buf);
return;
}
if (!buf)
die("Can't read object %s", sha1_to_hex(sha1));
if (type != OBJ_BLOB)
die("Object %s is a %s but a blob was expected.",
sha1_to_hex(sha1), typename(type));
strbuf_reset(&line);
strbuf_addf(&line, "%s %s %lu\n", sha1_to_hex(sha1),
typename(type), size);
cat_blob_write(line.buf, line.len);
strbuf_release(&line);
cat_blob_write(buf, size);
cat_blob_write("\n", 1);
free(buf);
}
static void parse_cat_blob(void)
{
const char *p;
struct object_entry *oe = oe;
unsigned char sha1[20];
/* cat-blob SP <object> LF */
p = command_buf.buf + strlen("cat-blob ");
if (*p == ':') {
char *x;
oe = find_mark(strtoumax(p + 1, &x, 10));
if (x == p + 1)
die("Invalid mark: %s", command_buf.buf);
if (!oe)
die("Unknown mark: %s", command_buf.buf);
if (*x)
die("Garbage after mark: %s", command_buf.buf);
hashcpy(sha1, oe->idx.sha1);
} else {
if (get_sha1_hex(p, sha1))
die("Invalid SHA1: %s", command_buf.buf);
if (p[40])
die("Garbage after SHA1: %s", command_buf.buf);
oe = find_object(sha1);
}
cat_blob(oe, sha1);
}
static void parse_checkpoint(void) static void parse_checkpoint(void)
{ {
if (object_count) { if (object_count) {
@ -2773,6 +2853,14 @@ static void option_export_marks(const char *marks)
safe_create_leading_directories_const(export_marks_file); safe_create_leading_directories_const(export_marks_file);
} }
static void option_cat_blob_fd(const char *fd)
{
unsigned long n = ulong_arg("--cat-blob-fd", fd);
if (n > (unsigned long) INT_MAX)
die("--cat-blob-fd cannot exceed %d", INT_MAX);
cat_blob_fd = (int) n;
}
static void option_export_pack_edges(const char *edges) static void option_export_pack_edges(const char *edges)
{ {
if (pack_edges) if (pack_edges)
@ -2826,6 +2914,8 @@ static int parse_one_feature(const char *feature, int from_stream)
option_import_marks(feature + 13, from_stream); option_import_marks(feature + 13, from_stream);
} else if (!prefixcmp(feature, "export-marks=")) { } else if (!prefixcmp(feature, "export-marks=")) {
option_export_marks(feature + 13); option_export_marks(feature + 13);
} else if (!strcmp(feature, "cat-blob")) {
; /* Don't die - this feature is supported */
} else if (!prefixcmp(feature, "relative-marks")) { } else if (!prefixcmp(feature, "relative-marks")) {
relative_marks_paths = 1; relative_marks_paths = 1;
} else if (!prefixcmp(feature, "no-relative-marks")) { } else if (!prefixcmp(feature, "no-relative-marks")) {
@ -2920,6 +3010,11 @@ static void parse_argv(void)
if (parse_one_feature(a + 2, 0)) if (parse_one_feature(a + 2, 0))
continue; continue;
if (!prefixcmp(a + 2, "cat-blob-fd=")) {
option_cat_blob_fd(a + 2 + strlen("cat-blob-fd="));
continue;
}
die("unknown option %s", a); die("unknown option %s", a);
} }
if (i != global_argc) if (i != global_argc)
@ -2971,6 +3066,8 @@ int main(int argc, const char **argv)
parse_new_tag(); parse_new_tag();
else if (!prefixcmp(command_buf.buf, "reset ")) else if (!prefixcmp(command_buf.buf, "reset "))
parse_reset_branch(); parse_reset_branch();
else if (!prefixcmp(command_buf.buf, "cat-blob "))
parse_cat_blob();
else if (!strcmp("checkpoint", command_buf.buf)) else if (!strcmp("checkpoint", command_buf.buf))
parse_checkpoint(); parse_checkpoint();
else if (!prefixcmp(command_buf.buf, "progress ")) else if (!prefixcmp(command_buf.buf, "progress "))

View File

@ -23,11 +23,18 @@ file5_data='an inline file.
file6_data='#!/bin/sh file6_data='#!/bin/sh
echo "$@"' echo "$@"'
>empty
### ###
### series A ### series A
### ###
test_tick test_tick
test_expect_success 'empty stream succeeds' '
git fast-import </dev/null
'
cat >input <<INPUT_END cat >input <<INPUT_END
blob blob
mark :2 mark :2
@ -1632,6 +1639,190 @@ test_expect_success 'R: feature no-relative-marks should be honoured' '
test_cmp marks.new non-relative.out test_cmp marks.new non-relative.out
' '
test_expect_success 'R: feature cat-blob supported' '
echo "feature cat-blob" |
git fast-import
'
test_expect_success 'R: cat-blob-fd must be a nonnegative integer' '
test_must_fail git fast-import --cat-blob-fd=-1 </dev/null
'
test_expect_success 'R: print old blob' '
blob=$(echo "yes it can" | git hash-object -w --stdin) &&
cat >expect <<-EOF &&
${blob} blob 11
yes it can
EOF
echo "cat-blob $blob" |
git fast-import --cat-blob-fd=6 6>actual &&
test_cmp expect actual
'
test_expect_success 'R: in-stream cat-blob-fd not respected' '
echo hello >greeting &&
blob=$(git hash-object -w greeting) &&
cat >expect <<-EOF &&
${blob} blob 6
hello
EOF
git fast-import --cat-blob-fd=3 3>actual.3 >actual.1 <<-EOF &&
cat-blob $blob
EOF
test_cmp expect actual.3 &&
test_cmp empty actual.1 &&
git fast-import 3>actual.3 >actual.1 <<-EOF &&
option cat-blob-fd=3
cat-blob $blob
EOF
test_cmp empty actual.3 &&
test_cmp expect actual.1
'
test_expect_success 'R: print new blob' '
blob=$(echo "yep yep yep" | git hash-object --stdin) &&
cat >expect <<-EOF &&
${blob} blob 12
yep yep yep
EOF
git fast-import --cat-blob-fd=6 6>actual <<-\EOF &&
blob
mark :1
data <<BLOB_END
yep yep yep
BLOB_END
cat-blob :1
EOF
test_cmp expect actual
'
test_expect_success 'R: print new blob by sha1' '
blob=$(echo "a new blob named by sha1" | git hash-object --stdin) &&
cat >expect <<-EOF &&
${blob} blob 25
a new blob named by sha1
EOF
git fast-import --cat-blob-fd=6 6>actual <<-EOF &&
blob
data <<BLOB_END
a new blob named by sha1
BLOB_END
cat-blob $blob
EOF
test_cmp expect actual
'
test_expect_success 'setup: big file' '
(
echo "the quick brown fox jumps over the lazy dog" >big &&
for i in 1 2 3
do
cat big big big big >bigger &&
cat bigger bigger bigger bigger >big ||
exit
done
)
'
test_expect_success 'R: print two blobs to stdout' '
blob1=$(git hash-object big) &&
blob1_len=$(wc -c <big) &&
blob2=$(echo hello | git hash-object --stdin) &&
{
echo ${blob1} blob $blob1_len &&
cat big &&
cat <<-EOF
${blob2} blob 6
hello
EOF
} >expect &&
{
cat <<-\END_PART1 &&
blob
mark :1
data <<data_end
END_PART1
cat big &&
cat <<-\EOF
data_end
blob
mark :2
data <<data_end
hello
data_end
cat-blob :1
cat-blob :2
EOF
} |
git fast-import >actual &&
test_cmp expect actual
'
test_expect_success 'setup: have pipes?' '
rm -f frob &&
if mkfifo frob
then
test_set_prereq PIPE
fi
'
test_expect_success PIPE 'R: copy using cat-file' '
expect_id=$(git hash-object big) &&
expect_len=$(wc -c <big) &&
echo $expect_id blob $expect_len >expect.response &&
rm -f blobs &&
cat >frontend <<-\FRONTEND_END &&
#!/bin/sh
cat <<EOF &&
feature cat-blob
blob
mark :1
data <<BLOB
EOF
cat big
cat <<EOF
BLOB
cat-blob :1
EOF
read blob_id type size <&3 &&
echo "$blob_id $type $size" >response &&
dd if=/dev/stdin of=blob bs=$size count=1 <&3 &&
read newline <&3 &&
cat <<EOF &&
commit refs/heads/copied
committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
data <<COMMIT
copy big file as file3
COMMIT
M 644 inline file3
data <<BLOB
EOF
cat blob &&
cat <<EOF
BLOB
EOF
FRONTEND_END
mkfifo blobs &&
(
export GIT_COMMITTER_NAME GIT_COMMITTER_EMAIL GIT_COMMITTER_DATE &&
sh frontend 3<blobs |
git fast-import --cat-blob-fd=3 3>blobs
) &&
git show copied:file3 >actual &&
test_cmp expect.response response &&
test_cmp big actual
'
cat >input << EOF cat >input << EOF
option git quiet option git quiet
blob blob
@ -1640,8 +1831,6 @@ hi
EOF EOF
touch empty
test_expect_success 'R: quiet option results in no stats being output' ' test_expect_success 'R: quiet option results in no stats being output' '
cat input | git fast-import 2> output && cat input | git fast-import 2> output &&
test_cmp empty output test_cmp empty output