Merge branch 'nd/stream-to-archive'

Stream large blobs directly out to archive files without slurping
everything in memory first.

By René Scharfe (6) and Nguyễn Thái Ngọc Duy (4)
* nd/stream-to-archive:
  t5000: rationalize unzip tests
  archive-zip: streaming for deflated files
  archive-zip: streaming for stored files
  archive-zip: factor out helpers for writing sizes and CRC
  archive-zip: remove uncompressed_size
  archive-tar: stream large blobs to tar file
  archive: delegate blob reading to backend
  archive-tar: unindent write_tar_entry by one level
  archive-tar: turn write_tar_entry into blob-writing only
  streaming: void pointer instead of char pointer
This commit is contained in:
Junio C Hamano 2012-05-10 10:49:13 -07:00
commit d41355fc31
8 changed files with 405 additions and 136 deletions

View File

@ -4,6 +4,7 @@
#include "cache.h" #include "cache.h"
#include "tar.h" #include "tar.h"
#include "archive.h" #include "archive.h"
#include "streaming.h"
#include "run-command.h" #include "run-command.h"
#define RECORDSIZE (512) #define RECORDSIZE (512)
@ -30,10 +31,9 @@ static void write_if_needed(void)
* queues up writes, so that all our write(2) calls write exactly one * queues up writes, so that all our write(2) calls write exactly one
* full block; pads writes to RECORDSIZE * full block; pads writes to RECORDSIZE
*/ */
static void write_blocked(const void *data, unsigned long size) static void do_write_blocked(const void *data, unsigned long size)
{ {
const char *buf = data; const char *buf = data;
unsigned long tail;
if (offset) { if (offset) {
unsigned long chunk = BLOCKSIZE - offset; unsigned long chunk = BLOCKSIZE - offset;
@ -54,6 +54,11 @@ static void write_blocked(const void *data, unsigned long size)
memcpy(block + offset, buf, size); memcpy(block + offset, buf, size);
offset += size; offset += size;
} }
}
static void finish_record(void)
{
unsigned long tail;
tail = offset % RECORDSIZE; tail = offset % RECORDSIZE;
if (tail) { if (tail) {
memset(block + offset, 0, RECORDSIZE - tail); memset(block + offset, 0, RECORDSIZE - tail);
@ -62,6 +67,12 @@ static void write_blocked(const void *data, unsigned long size)
write_if_needed(); write_if_needed();
} }
static void write_blocked(const void *data, unsigned long size)
{
do_write_blocked(data, size);
finish_record();
}
/* /*
* The end of tar archives is marked by 2*512 nul bytes and after that * The end of tar archives is marked by 2*512 nul bytes and after that
* follows the rest of the block (if any). * follows the rest of the block (if any).
@ -77,6 +88,33 @@ static void write_trailer(void)
} }
} }
/*
* queues up writes, so that all our write(2) calls write exactly one
* full block; pads writes to RECORDSIZE
*/
static int stream_blocked(const unsigned char *sha1)
{
struct git_istream *st;
enum object_type type;
unsigned long sz;
char buf[BLOCKSIZE];
ssize_t readlen;
st = open_istream(sha1, &type, &sz, NULL);
if (!st)
return error("cannot stream blob %s", sha1_to_hex(sha1));
for (;;) {
readlen = read_istream(st, buf, sizeof(buf));
if (readlen <= 0)
break;
do_write_blocked(buf, readlen);
}
close_istream(st);
if (!readlen)
finish_record();
return readlen;
}
/* /*
* pax extended header records have the format "%u %s=%s\n". %u contains * pax extended header records have the format "%u %s=%s\n". %u contains
* the size of the whole string (including the %u), the first %s is the * the size of the whole string (including the %u), the first %s is the
@ -123,56 +161,101 @@ static size_t get_path_prefix(const char *path, size_t pathlen, size_t maxlen)
return i; return i;
} }
static void prepare_header(struct archiver_args *args,
struct ustar_header *header,
unsigned int mode, unsigned long size)
{
sprintf(header->mode, "%07o", mode & 07777);
sprintf(header->size, "%011lo", S_ISREG(mode) ? size : 0);
sprintf(header->mtime, "%011lo", (unsigned long) args->time);
sprintf(header->uid, "%07o", 0);
sprintf(header->gid, "%07o", 0);
strlcpy(header->uname, "root", sizeof(header->uname));
strlcpy(header->gname, "root", sizeof(header->gname));
sprintf(header->devmajor, "%07o", 0);
sprintf(header->devminor, "%07o", 0);
memcpy(header->magic, "ustar", 6);
memcpy(header->version, "00", 2);
sprintf(header->chksum, "%07o", ustar_header_chksum(header));
}
static int write_extended_header(struct archiver_args *args,
const unsigned char *sha1,
const void *buffer, unsigned long size)
{
struct ustar_header header;
unsigned int mode;
memset(&header, 0, sizeof(header));
*header.typeflag = TYPEFLAG_EXT_HEADER;
mode = 0100666;
sprintf(header.name, "%s.paxheader", sha1_to_hex(sha1));
prepare_header(args, &header, mode, size);
write_blocked(&header, sizeof(header));
write_blocked(buffer, size);
return 0;
}
static int write_tar_entry(struct archiver_args *args, static int write_tar_entry(struct archiver_args *args,
const unsigned char *sha1, const char *path, size_t pathlen, const unsigned char *sha1,
unsigned int mode, void *buffer, unsigned long size) const char *path, size_t pathlen,
unsigned int mode)
{ {
struct ustar_header header; struct ustar_header header;
struct strbuf ext_header = STRBUF_INIT; struct strbuf ext_header = STRBUF_INIT;
unsigned int old_mode = mode;
unsigned long size;
void *buffer;
int err = 0; int err = 0;
memset(&header, 0, sizeof(header)); memset(&header, 0, sizeof(header));
if (!sha1) { if (S_ISDIR(mode) || S_ISGITLINK(mode)) {
*header.typeflag = TYPEFLAG_GLOBAL_HEADER; *header.typeflag = TYPEFLAG_DIR;
mode = 0100666; mode = (mode | 0777) & ~tar_umask;
strcpy(header.name, "pax_global_header"); } else if (S_ISLNK(mode)) {
} else if (!path) { *header.typeflag = TYPEFLAG_LNK;
*header.typeflag = TYPEFLAG_EXT_HEADER; mode |= 0777;
mode = 0100666; } else if (S_ISREG(mode)) {
sprintf(header.name, "%s.paxheader", sha1_to_hex(sha1)); *header.typeflag = TYPEFLAG_REG;
mode = (mode | ((mode & 0100) ? 0777 : 0666)) & ~tar_umask;
} else { } else {
if (S_ISDIR(mode) || S_ISGITLINK(mode)) { return error("unsupported file mode: 0%o (SHA1: %s)",
*header.typeflag = TYPEFLAG_DIR; mode, sha1_to_hex(sha1));
mode = (mode | 0777) & ~tar_umask; }
} else if (S_ISLNK(mode)) { if (pathlen > sizeof(header.name)) {
*header.typeflag = TYPEFLAG_LNK; size_t plen = get_path_prefix(path, pathlen,
mode |= 0777; sizeof(header.prefix));
} else if (S_ISREG(mode)) { size_t rest = pathlen - plen - 1;
*header.typeflag = TYPEFLAG_REG; if (plen > 0 && rest <= sizeof(header.name)) {
mode = (mode | ((mode & 0100) ? 0777 : 0666)) & ~tar_umask; memcpy(header.prefix, path, plen);
} else {
return error("unsupported file mode: 0%o (SHA1: %s)",
mode, sha1_to_hex(sha1));
}
if (pathlen > sizeof(header.name)) {
size_t plen = get_path_prefix(path, pathlen,
sizeof(header.prefix));
size_t rest = pathlen - plen - 1;
if (plen > 0 && rest <= sizeof(header.name)) {
memcpy(header.prefix, path, plen);
memcpy(header.name, path + plen + 1, rest); memcpy(header.name, path + plen + 1, rest);
} else { } else {
sprintf(header.name, "%s.data", sprintf(header.name, "%s.data",
sha1_to_hex(sha1)); sha1_to_hex(sha1));
strbuf_append_ext_header(&ext_header, "path", strbuf_append_ext_header(&ext_header, "path",
path, pathlen); path, pathlen);
} }
} else } else
memcpy(header.name, path, pathlen); memcpy(header.name, path, pathlen);
if (S_ISREG(mode) && !args->convert &&
sha1_object_info(sha1, &size) == OBJ_BLOB &&
size > big_file_threshold)
buffer = NULL;
else if (S_ISLNK(mode) || S_ISREG(mode)) {
enum object_type type;
buffer = sha1_file_to_archive(args, path, sha1, old_mode, &type, &size);
if (!buffer)
return error("cannot read %s", sha1_to_hex(sha1));
} else {
buffer = NULL;
size = 0;
} }
if (S_ISLNK(mode) && buffer) { if (S_ISLNK(mode)) {
if (size > sizeof(header.linkname)) { if (size > sizeof(header.linkname)) {
sprintf(header.linkname, "see %s.paxheader", sprintf(header.linkname, "see %s.paxheader",
sha1_to_hex(sha1)); sha1_to_hex(sha1));
@ -182,32 +265,25 @@ static int write_tar_entry(struct archiver_args *args,
memcpy(header.linkname, buffer, size); memcpy(header.linkname, buffer, size);
} }
sprintf(header.mode, "%07o", mode & 07777); prepare_header(args, &header, mode, size);
sprintf(header.size, "%011lo", S_ISREG(mode) ? size : 0);
sprintf(header.mtime, "%011lo", (unsigned long) args->time);
sprintf(header.uid, "%07o", 0);
sprintf(header.gid, "%07o", 0);
strlcpy(header.uname, "root", sizeof(header.uname));
strlcpy(header.gname, "root", sizeof(header.gname));
sprintf(header.devmajor, "%07o", 0);
sprintf(header.devminor, "%07o", 0);
memcpy(header.magic, "ustar", 6);
memcpy(header.version, "00", 2);
sprintf(header.chksum, "%07o", ustar_header_chksum(&header));
if (ext_header.len > 0) { if (ext_header.len > 0) {
err = write_tar_entry(args, sha1, NULL, 0, 0, ext_header.buf, err = write_extended_header(args, sha1, ext_header.buf,
ext_header.len); ext_header.len);
if (err) if (err) {
free(buffer);
return err; return err;
}
} }
strbuf_release(&ext_header); strbuf_release(&ext_header);
write_blocked(&header, sizeof(header)); write_blocked(&header, sizeof(header));
if (S_ISREG(mode) && buffer && size > 0) if (S_ISREG(mode) && size > 0) {
write_blocked(buffer, size); if (buffer)
write_blocked(buffer, size);
else
err = stream_blocked(sha1);
}
free(buffer);
return err; return err;
} }
@ -215,11 +291,18 @@ static int write_global_extended_header(struct archiver_args *args)
{ {
const unsigned char *sha1 = args->commit_sha1; const unsigned char *sha1 = args->commit_sha1;
struct strbuf ext_header = STRBUF_INIT; struct strbuf ext_header = STRBUF_INIT;
int err; struct ustar_header header;
unsigned int mode;
int err = 0;
strbuf_append_ext_header(&ext_header, "comment", sha1_to_hex(sha1), 40); strbuf_append_ext_header(&ext_header, "comment", sha1_to_hex(sha1), 40);
err = write_tar_entry(args, NULL, NULL, 0, 0, ext_header.buf, memset(&header, 0, sizeof(header));
ext_header.len); *header.typeflag = TYPEFLAG_GLOBAL_HEADER;
mode = 0100666;
strcpy(header.name, "pax_global_header");
prepare_header(args, &header, mode, ext_header.len);
write_blocked(&header, sizeof(header));
write_blocked(ext_header.buf, ext_header.len);
strbuf_release(&ext_header); strbuf_release(&ext_header);
return err; return err;
} }

View File

@ -3,6 +3,7 @@
*/ */
#include "cache.h" #include "cache.h"
#include "archive.h" #include "archive.h"
#include "streaming.h"
static int zip_date; static int zip_date;
static int zip_time; static int zip_time;
@ -15,6 +16,7 @@ static unsigned int zip_dir_offset;
static unsigned int zip_dir_entries; static unsigned int zip_dir_entries;
#define ZIP_DIRECTORY_MIN_SIZE (1024 * 1024) #define ZIP_DIRECTORY_MIN_SIZE (1024 * 1024)
#define ZIP_STREAM (8)
struct zip_local_header { struct zip_local_header {
unsigned char magic[4]; unsigned char magic[4];
@ -31,6 +33,14 @@ struct zip_local_header {
unsigned char _end[1]; unsigned char _end[1];
}; };
struct zip_data_desc {
unsigned char magic[4];
unsigned char crc32[4];
unsigned char compressed_size[4];
unsigned char size[4];
unsigned char _end[1];
};
struct zip_dir_header { struct zip_dir_header {
unsigned char magic[4]; unsigned char magic[4];
unsigned char creator_version[2]; unsigned char creator_version[2];
@ -70,6 +80,7 @@ struct zip_dir_trailer {
* we're interested in. * we're interested in.
*/ */
#define ZIP_LOCAL_HEADER_SIZE offsetof(struct zip_local_header, _end) #define ZIP_LOCAL_HEADER_SIZE offsetof(struct zip_local_header, _end)
#define ZIP_DATA_DESC_SIZE offsetof(struct zip_data_desc, _end)
#define ZIP_DIR_HEADER_SIZE offsetof(struct zip_dir_header, _end) #define ZIP_DIR_HEADER_SIZE offsetof(struct zip_dir_header, _end)
#define ZIP_DIR_TRAILER_SIZE offsetof(struct zip_dir_trailer, _end) #define ZIP_DIR_TRAILER_SIZE offsetof(struct zip_dir_trailer, _end)
@ -120,20 +131,59 @@ static void *zlib_deflate(void *data, unsigned long size,
return buffer; return buffer;
} }
static void write_zip_data_desc(unsigned long size,
unsigned long compressed_size,
unsigned long crc)
{
struct zip_data_desc trailer;
copy_le32(trailer.magic, 0x08074b50);
copy_le32(trailer.crc32, crc);
copy_le32(trailer.compressed_size, compressed_size);
copy_le32(trailer.size, size);
write_or_die(1, &trailer, ZIP_DATA_DESC_SIZE);
}
static void set_zip_dir_data_desc(struct zip_dir_header *header,
unsigned long size,
unsigned long compressed_size,
unsigned long crc)
{
copy_le32(header->crc32, crc);
copy_le32(header->compressed_size, compressed_size);
copy_le32(header->size, size);
}
static void set_zip_header_data_desc(struct zip_local_header *header,
unsigned long size,
unsigned long compressed_size,
unsigned long crc)
{
copy_le32(header->crc32, crc);
copy_le32(header->compressed_size, compressed_size);
copy_le32(header->size, size);
}
#define STREAM_BUFFER_SIZE (1024 * 16)
static int write_zip_entry(struct archiver_args *args, static int write_zip_entry(struct archiver_args *args,
const unsigned char *sha1, const char *path, size_t pathlen, const unsigned char *sha1,
unsigned int mode, void *buffer, unsigned long size) const char *path, size_t pathlen,
unsigned int mode)
{ {
struct zip_local_header header; struct zip_local_header header;
struct zip_dir_header dirent; struct zip_dir_header dirent;
unsigned long attr2; unsigned long attr2;
unsigned long compressed_size; unsigned long compressed_size;
unsigned long uncompressed_size;
unsigned long crc; unsigned long crc;
unsigned long direntsize; unsigned long direntsize;
int method; int method;
unsigned char *out; unsigned char *out;
void *deflated = NULL; void *deflated = NULL;
void *buffer;
struct git_istream *stream = NULL;
unsigned long flags = 0;
unsigned long size;
crc = crc32(0, NULL, 0); crc = crc32(0, NULL, 0);
@ -146,24 +196,43 @@ static int write_zip_entry(struct archiver_args *args,
method = 0; method = 0;
attr2 = 16; attr2 = 16;
out = NULL; out = NULL;
uncompressed_size = 0; size = 0;
compressed_size = 0; compressed_size = 0;
buffer = NULL;
size = 0;
} else if (S_ISREG(mode) || S_ISLNK(mode)) { } else if (S_ISREG(mode) || S_ISLNK(mode)) {
enum object_type type = sha1_object_info(sha1, &size);
method = 0; method = 0;
attr2 = S_ISLNK(mode) ? ((mode | 0777) << 16) : attr2 = S_ISLNK(mode) ? ((mode | 0777) << 16) :
(mode & 0111) ? ((mode) << 16) : 0; (mode & 0111) ? ((mode) << 16) : 0;
if (S_ISREG(mode) && args->compression_level != 0) if (S_ISREG(mode) && args->compression_level != 0 && size > 0)
method = 8; method = 8;
crc = crc32(crc, buffer, size);
out = buffer;
uncompressed_size = size;
compressed_size = size; compressed_size = size;
if (S_ISREG(mode) && type == OBJ_BLOB && !args->convert &&
size > big_file_threshold) {
stream = open_istream(sha1, &type, &size, NULL);
if (!stream)
return error("cannot stream blob %s",
sha1_to_hex(sha1));
flags |= ZIP_STREAM;
out = buffer = NULL;
} else {
buffer = sha1_file_to_archive(args, path, sha1, mode,
&type, &size);
if (!buffer)
return error("cannot read %s",
sha1_to_hex(sha1));
crc = crc32(crc, buffer, size);
out = buffer;
}
} else { } else {
return error("unsupported file mode: 0%o (SHA1: %s)", mode, return error("unsupported file mode: 0%o (SHA1: %s)", mode,
sha1_to_hex(sha1)); sha1_to_hex(sha1));
} }
if (method == 8) { if (buffer && method == 8) {
deflated = zlib_deflate(buffer, size, args->compression_level, deflated = zlib_deflate(buffer, size, args->compression_level,
&compressed_size); &compressed_size);
if (deflated && compressed_size - 6 < size) { if (deflated && compressed_size - 6 < size) {
@ -188,13 +257,11 @@ static int write_zip_entry(struct archiver_args *args,
copy_le16(dirent.creator_version, copy_le16(dirent.creator_version,
S_ISLNK(mode) || (S_ISREG(mode) && (mode & 0111)) ? 0x0317 : 0); S_ISLNK(mode) || (S_ISREG(mode) && (mode & 0111)) ? 0x0317 : 0);
copy_le16(dirent.version, 10); copy_le16(dirent.version, 10);
copy_le16(dirent.flags, 0); copy_le16(dirent.flags, flags);
copy_le16(dirent.compression_method, method); copy_le16(dirent.compression_method, method);
copy_le16(dirent.mtime, zip_time); copy_le16(dirent.mtime, zip_time);
copy_le16(dirent.mdate, zip_date); copy_le16(dirent.mdate, zip_date);
copy_le32(dirent.crc32, crc); set_zip_dir_data_desc(&dirent, size, compressed_size, crc);
copy_le32(dirent.compressed_size, compressed_size);
copy_le32(dirent.size, uncompressed_size);
copy_le16(dirent.filename_length, pathlen); copy_le16(dirent.filename_length, pathlen);
copy_le16(dirent.extra_length, 0); copy_le16(dirent.extra_length, 0);
copy_le16(dirent.comment_length, 0); copy_le16(dirent.comment_length, 0);
@ -202,33 +269,120 @@ static int write_zip_entry(struct archiver_args *args,
copy_le16(dirent.attr1, 0); copy_le16(dirent.attr1, 0);
copy_le32(dirent.attr2, attr2); copy_le32(dirent.attr2, attr2);
copy_le32(dirent.offset, zip_offset); copy_le32(dirent.offset, zip_offset);
memcpy(zip_dir + zip_dir_offset, &dirent, ZIP_DIR_HEADER_SIZE);
zip_dir_offset += ZIP_DIR_HEADER_SIZE;
memcpy(zip_dir + zip_dir_offset, path, pathlen);
zip_dir_offset += pathlen;
zip_dir_entries++;
copy_le32(header.magic, 0x04034b50); copy_le32(header.magic, 0x04034b50);
copy_le16(header.version, 10); copy_le16(header.version, 10);
copy_le16(header.flags, 0); copy_le16(header.flags, flags);
copy_le16(header.compression_method, method); copy_le16(header.compression_method, method);
copy_le16(header.mtime, zip_time); copy_le16(header.mtime, zip_time);
copy_le16(header.mdate, zip_date); copy_le16(header.mdate, zip_date);
copy_le32(header.crc32, crc); if (flags & ZIP_STREAM)
copy_le32(header.compressed_size, compressed_size); set_zip_header_data_desc(&header, 0, 0, 0);
copy_le32(header.size, uncompressed_size); else
set_zip_header_data_desc(&header, size, compressed_size, crc);
copy_le16(header.filename_length, pathlen); copy_le16(header.filename_length, pathlen);
copy_le16(header.extra_length, 0); copy_le16(header.extra_length, 0);
write_or_die(1, &header, ZIP_LOCAL_HEADER_SIZE); write_or_die(1, &header, ZIP_LOCAL_HEADER_SIZE);
zip_offset += ZIP_LOCAL_HEADER_SIZE; zip_offset += ZIP_LOCAL_HEADER_SIZE;
write_or_die(1, path, pathlen); write_or_die(1, path, pathlen);
zip_offset += pathlen; zip_offset += pathlen;
if (compressed_size > 0) { if (stream && method == 0) {
unsigned char buf[STREAM_BUFFER_SIZE];
ssize_t readlen;
for (;;) {
readlen = read_istream(stream, buf, sizeof(buf));
if (readlen <= 0)
break;
crc = crc32(crc, buf, readlen);
write_or_die(1, buf, readlen);
}
close_istream(stream);
if (readlen)
return readlen;
compressed_size = size;
zip_offset += compressed_size;
write_zip_data_desc(size, compressed_size, crc);
zip_offset += ZIP_DATA_DESC_SIZE;
set_zip_dir_data_desc(&dirent, size, compressed_size, crc);
} else if (stream && method == 8) {
unsigned char buf[STREAM_BUFFER_SIZE];
ssize_t readlen;
git_zstream zstream;
int result;
size_t out_len;
unsigned char compressed[STREAM_BUFFER_SIZE * 2];
memset(&zstream, 0, sizeof(zstream));
git_deflate_init(&zstream, args->compression_level);
compressed_size = 0;
zstream.next_out = compressed;
zstream.avail_out = sizeof(compressed);
for (;;) {
readlen = read_istream(stream, buf, sizeof(buf));
if (readlen <= 0)
break;
crc = crc32(crc, buf, readlen);
zstream.next_in = buf;
zstream.avail_in = readlen;
result = git_deflate(&zstream, 0);
if (result != Z_OK)
die("deflate error (%d)", result);
out = compressed;
if (!compressed_size)
out += 2;
out_len = zstream.next_out - out;
if (out_len > 0) {
write_or_die(1, out, out_len);
compressed_size += out_len;
zstream.next_out = compressed;
zstream.avail_out = sizeof(compressed);
}
}
close_istream(stream);
if (readlen)
return readlen;
zstream.next_in = buf;
zstream.avail_in = 0;
result = git_deflate(&zstream, Z_FINISH);
if (result != Z_STREAM_END)
die("deflate error (%d)", result);
git_deflate_end(&zstream);
out = compressed;
if (!compressed_size)
out += 2;
out_len = zstream.next_out - out - 4;
write_or_die(1, out, out_len);
compressed_size += out_len;
zip_offset += compressed_size;
write_zip_data_desc(size, compressed_size, crc);
zip_offset += ZIP_DATA_DESC_SIZE;
set_zip_dir_data_desc(&dirent, size, compressed_size, crc);
} else if (compressed_size > 0) {
write_or_die(1, out, compressed_size); write_or_die(1, out, compressed_size);
zip_offset += compressed_size; zip_offset += compressed_size;
} }
free(deflated); free(deflated);
free(buffer);
memcpy(zip_dir + zip_dir_offset, &dirent, ZIP_DIR_HEADER_SIZE);
zip_dir_offset += ZIP_DIR_HEADER_SIZE;
memcpy(zip_dir + zip_dir_offset, path, pathlen);
zip_dir_offset += pathlen;
zip_dir_entries++;
return 0; return 0;
} }

View File

@ -59,12 +59,15 @@ static void format_subst(const struct commit *commit,
free(to_free); free(to_free);
} }
static void *sha1_file_to_archive(const char *path, const unsigned char *sha1, void *sha1_file_to_archive(const struct archiver_args *args,
unsigned int mode, enum object_type *type, const char *path, const unsigned char *sha1,
unsigned long *sizep, const struct commit *commit) unsigned int mode, enum object_type *type,
unsigned long *sizep)
{ {
void *buffer; void *buffer;
const struct commit *commit = args->convert ? args->commit : NULL;
path += args->baselen;
buffer = read_sha1_file(sha1, type, sizep); buffer = read_sha1_file(sha1, type, sizep);
if (buffer && S_ISREG(mode)) { if (buffer && S_ISREG(mode)) {
struct strbuf buf = STRBUF_INIT; struct strbuf buf = STRBUF_INIT;
@ -109,12 +112,9 @@ static int write_archive_entry(const unsigned char *sha1, const char *base,
write_archive_entry_fn_t write_entry = c->write_entry; write_archive_entry_fn_t write_entry = c->write_entry;
struct git_attr_check check[2]; struct git_attr_check check[2];
const char *path_without_prefix; const char *path_without_prefix;
int convert = 0;
int err; int err;
enum object_type type;
unsigned long size;
void *buffer;
args->convert = 0;
strbuf_reset(&path); strbuf_reset(&path);
strbuf_grow(&path, PATH_MAX); strbuf_grow(&path, PATH_MAX);
strbuf_add(&path, args->base, args->baselen); strbuf_add(&path, args->base, args->baselen);
@ -126,28 +126,22 @@ static int write_archive_entry(const unsigned char *sha1, const char *base,
if (!git_check_attr(path_without_prefix, ARRAY_SIZE(check), check)) { if (!git_check_attr(path_without_prefix, ARRAY_SIZE(check), check)) {
if (ATTR_TRUE(check[0].value)) if (ATTR_TRUE(check[0].value))
return 0; return 0;
convert = ATTR_TRUE(check[1].value); args->convert = ATTR_TRUE(check[1].value);
} }
if (S_ISDIR(mode) || S_ISGITLINK(mode)) { if (S_ISDIR(mode) || S_ISGITLINK(mode)) {
strbuf_addch(&path, '/'); strbuf_addch(&path, '/');
if (args->verbose) if (args->verbose)
fprintf(stderr, "%.*s\n", (int)path.len, path.buf); fprintf(stderr, "%.*s\n", (int)path.len, path.buf);
err = write_entry(args, sha1, path.buf, path.len, mode, NULL, 0); err = write_entry(args, sha1, path.buf, path.len, mode);
if (err) if (err)
return err; return err;
return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0); return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
} }
buffer = sha1_file_to_archive(path_without_prefix, sha1, mode,
&type, &size, convert ? args->commit : NULL);
if (!buffer)
return error("cannot read %s", sha1_to_hex(sha1));
if (args->verbose) if (args->verbose)
fprintf(stderr, "%.*s\n", (int)path.len, path.buf); fprintf(stderr, "%.*s\n", (int)path.len, path.buf);
err = write_entry(args, sha1, path.buf, path.len, mode, buffer, size); return write_entry(args, sha1, path.buf, path.len, mode);
free(buffer);
return err;
} }
int write_archive_entries(struct archiver_args *args, int write_archive_entries(struct archiver_args *args,
@ -167,7 +161,7 @@ int write_archive_entries(struct archiver_args *args,
if (args->verbose) if (args->verbose)
fprintf(stderr, "%.*s\n", (int)len, args->base); fprintf(stderr, "%.*s\n", (int)len, args->base);
err = write_entry(args, args->tree->object.sha1, args->base, err = write_entry(args, args->tree->object.sha1, args->base,
len, 040777, NULL, 0); len, 040777);
if (err) if (err)
return err; return err;
} }

View File

@ -11,6 +11,7 @@ struct archiver_args {
const char **pathspec; const char **pathspec;
unsigned int verbose : 1; unsigned int verbose : 1;
unsigned int worktree_attributes : 1; unsigned int worktree_attributes : 1;
unsigned int convert : 1;
int compression_level; int compression_level;
}; };
@ -27,11 +28,18 @@ extern void register_archiver(struct archiver *);
extern void init_tar_archiver(void); extern void init_tar_archiver(void);
extern void init_zip_archiver(void); extern void init_zip_archiver(void);
typedef int (*write_archive_entry_fn_t)(struct archiver_args *args, const unsigned char *sha1, const char *path, size_t pathlen, unsigned int mode, void *buffer, unsigned long size); typedef int (*write_archive_entry_fn_t)(struct archiver_args *args,
const unsigned char *sha1,
const char *path, size_t pathlen,
unsigned int mode);
extern int write_archive_entries(struct archiver_args *args, write_archive_entry_fn_t write_entry); extern int write_archive_entries(struct archiver_args *args, write_archive_entry_fn_t write_entry);
extern int write_archive(int argc, const char **argv, const char *prefix, int setup_prefix, const char *name_hint, int remote); extern int write_archive(int argc, const char **argv, const char *prefix, int setup_prefix, const char *name_hint, int remote);
const char *archive_format_from_filename(const char *filename); const char *archive_format_from_filename(const char *filename);
extern void *sha1_file_to_archive(const struct archiver_args *args,
const char *path, const unsigned char *sha1,
unsigned int mode, enum object_type *type,
unsigned long *sizep);
#endif /* ARCHIVE_H */ #endif /* ARCHIVE_H */

View File

@ -99,7 +99,7 @@ int close_istream(struct git_istream *st)
return r; return r;
} }
ssize_t read_istream(struct git_istream *st, char *buf, size_t sz) ssize_t read_istream(struct git_istream *st, void *buf, size_t sz)
{ {
return st->vtbl->read(st, buf, sz); return st->vtbl->read(st, buf, sz);
} }

View File

@ -10,7 +10,7 @@ struct git_istream;
extern struct git_istream *open_istream(const unsigned char *, enum object_type *, unsigned long *, struct stream_filter *); extern struct git_istream *open_istream(const unsigned char *, enum object_type *, unsigned long *, struct stream_filter *);
extern int close_istream(struct git_istream *); extern int close_istream(struct git_istream *);
extern ssize_t read_istream(struct git_istream *, char *, size_t); extern ssize_t read_istream(struct git_istream *, void *, size_t);
extern int stream_blob_to_fd(int fd, const unsigned char *, struct stream_filter *, int can_seek); extern int stream_blob_to_fd(int fd, const unsigned char *, struct stream_filter *, int can_seek);

View File

@ -134,4 +134,16 @@ test_expect_success 'repack' '
git repack -ad git repack -ad
' '
test_expect_success 'tar achiving' '
git archive --format=tar HEAD >/dev/null
'
test_expect_success 'zip achiving, store only' '
git archive --format=zip -0 HEAD >/dev/null
'
test_expect_success 'zip achiving, deflate' '
git archive --format=zip HEAD >/dev/null
'
test_done test_done

View File

@ -31,6 +31,26 @@ GUNZIP=${GUNZIP:-gzip -d}
SUBSTFORMAT=%H%n SUBSTFORMAT=%H%n
check_zip() {
zipfile=$1.zip
listfile=$1.lst
dir=$1
dir_with_prefix=$dir/$2
test_expect_success UNZIP " extract ZIP archive" "
(mkdir $dir && cd $dir && $UNZIP ../$zipfile)
"
test_expect_success UNZIP " validate filenames" "
(cd ${dir_with_prefix}a && find .) | sort >$listfile &&
test_cmp a.lst $listfile
"
test_expect_success UNZIP " validate file contents" "
diff -r a ${dir_with_prefix}a
"
}
test_expect_success \ test_expect_success \
'populate workdir' \ 'populate workdir' \
'mkdir a b c && 'mkdir a b c &&
@ -84,6 +104,12 @@ test_expect_success \
'git archive vs. git tar-tree' \ 'git archive vs. git tar-tree' \
'test_cmp b.tar b2.tar' 'test_cmp b.tar b2.tar'
test_expect_success 'git archive on large files' '
test_config core.bigfilethreshold 1 &&
git archive HEAD >b3.tar &&
test_cmp b.tar b3.tar
'
test_expect_success \ test_expect_success \
'git archive in a bare repo' \ 'git archive in a bare repo' \
'(cd bare.git && git archive HEAD) >b3.tar' '(cd bare.git && git archive HEAD) >b3.tar'
@ -175,10 +201,19 @@ test_expect_success \
test_cmp a/substfile2 g/prefix/a/substfile2 test_cmp a/substfile2 g/prefix/a/substfile2
' '
$UNZIP -v >/dev/null 2>&1
if [ $? -eq 127 ]; then
say "Skipping ZIP tests, because unzip was not found"
else
test_set_prereq UNZIP
fi
test_expect_success \ test_expect_success \
'git archive --format=zip' \ 'git archive --format=zip' \
'git archive --format=zip HEAD >d.zip' 'git archive --format=zip HEAD >d.zip'
check_zip d
test_expect_success \ test_expect_success \
'git archive --format=zip in a bare repo' \ 'git archive --format=zip in a bare repo' \
'(cd bare.git && git archive --format=zip HEAD) >d1.zip' '(cd bare.git && git archive --format=zip HEAD) >d1.zip'
@ -201,42 +236,25 @@ test_expect_success 'git archive with --output, override inferred format' '
test_cmp b.tar d4.zip test_cmp b.tar d4.zip
' '
$UNZIP -v >/dev/null 2>&1
if [ $? -eq 127 ]; then
say "Skipping ZIP tests, because unzip was not found"
else
test_set_prereq UNZIP
fi
test_expect_success UNZIP \
'extract ZIP archive' \
'(mkdir d && cd d && $UNZIP ../d.zip)'
test_expect_success UNZIP \
'validate filenames' \
'(cd d/a && find .) | sort >d.lst &&
test_cmp a.lst d.lst'
test_expect_success UNZIP \
'validate file contents' \
'diff -r a d/a'
test_expect_success \ test_expect_success \
'git archive --format=zip with prefix' \ 'git archive --format=zip with prefix' \
'git archive --format=zip --prefix=prefix/ HEAD >e.zip' 'git archive --format=zip --prefix=prefix/ HEAD >e.zip'
test_expect_success UNZIP \ check_zip e prefix/
'extract ZIP archive with prefix' \
'(mkdir e && cd e && $UNZIP ../e.zip)'
test_expect_success UNZIP \ test_expect_success 'git archive -0 --format=zip on large files' '
'validate filenames with prefix' \ test_config core.bigfilethreshold 1 &&
'(cd e/prefix/a && find .) | sort >e.lst && git archive -0 --format=zip HEAD >large.zip
test_cmp a.lst e.lst' '
test_expect_success UNZIP \ check_zip large
'validate file contents with prefix' \
'diff -r a e/prefix/a' test_expect_success 'git archive --format=zip on large files' '
test_config core.bigfilethreshold 1 &&
git archive --format=zip HEAD >large-compressed.zip
'
check_zip large-compressed
test_expect_success \ test_expect_success \
'git archive --list outside of a git repo' \ 'git archive --list outside of a git repo' \