2011-05-12 04:30:25 +02:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2011, Google Inc.
|
|
|
|
*/
|
|
|
|
#include "cache.h"
|
|
|
|
#include "streaming.h"
|
2018-03-23 18:21:14 +01:00
|
|
|
#include "repository.h"
|
|
|
|
#include "object-store.h"
|
2018-04-12 02:21:06 +02:00
|
|
|
#include "replace-object.h"
|
2017-08-19 00:20:22 +02:00
|
|
|
#include "packfile.h"
|
2011-05-12 04:30:25 +02:00
|
|
|
|
|
|
|
typedef int (*open_istream_fn)(struct git_istream *,
|
2020-01-30 21:32:20 +01:00
|
|
|
struct repository *,
|
2018-03-12 03:27:50 +01:00
|
|
|
const struct object_id *,
|
2011-05-12 04:30:25 +02:00
|
|
|
enum object_type *);
|
|
|
|
typedef int (*close_istream_fn)(struct git_istream *);
|
|
|
|
typedef ssize_t (*read_istream_fn)(struct git_istream *, char *, size_t);
|
|
|
|
|
2011-05-20 23:33:31 +02:00
|
|
|
#define FILTER_BUFFER (1024*16)
|
|
|
|
|
|
|
|
struct filtered_istream {
|
|
|
|
struct git_istream *upstream;
|
|
|
|
struct stream_filter *filter;
|
|
|
|
char ibuf[FILTER_BUFFER];
|
|
|
|
char obuf[FILTER_BUFFER];
|
|
|
|
int i_end, i_ptr;
|
|
|
|
int o_end, o_ptr;
|
2011-05-21 23:05:51 +02:00
|
|
|
int input_finished;
|
2011-05-20 23:33:31 +02:00
|
|
|
};
|
|
|
|
|
2011-05-12 04:30:25 +02:00
|
|
|
struct git_istream {
|
2021-05-05 14:33:32 +02:00
|
|
|
open_istream_fn open;
|
|
|
|
close_istream_fn close;
|
|
|
|
read_istream_fn read;
|
|
|
|
|
2011-05-12 04:30:25 +02:00
|
|
|
unsigned long size; /* inflated size of full object */
|
2011-07-19 18:33:03 +02:00
|
|
|
git_zstream z;
|
2011-05-14 00:34:58 +02:00
|
|
|
enum { z_unused, z_used, z_done, z_error } z_state;
|
2011-05-12 04:30:25 +02:00
|
|
|
|
|
|
|
union {
|
|
|
|
struct {
|
2023-01-07 14:49:15 +01:00
|
|
|
char *buf; /* from oid_object_info_extended() */
|
2011-05-12 04:30:25 +02:00
|
|
|
unsigned long read_ptr;
|
|
|
|
} incore;
|
|
|
|
|
|
|
|
struct {
|
2011-05-15 04:17:10 +02:00
|
|
|
void *mapped;
|
|
|
|
unsigned long mapsize;
|
|
|
|
char hdr[32];
|
|
|
|
int hdr_avail;
|
|
|
|
int hdr_used;
|
2011-05-12 04:30:25 +02:00
|
|
|
} loose;
|
|
|
|
|
|
|
|
struct {
|
2011-05-14 00:34:58 +02:00
|
|
|
struct packed_git *pack;
|
|
|
|
off_t pos;
|
2011-05-12 04:30:25 +02:00
|
|
|
} in_pack;
|
2011-05-20 23:33:31 +02:00
|
|
|
|
|
|
|
struct filtered_istream filtered;
|
2011-05-12 04:30:25 +02:00
|
|
|
} u;
|
|
|
|
};
|
|
|
|
|
2011-05-14 00:34:58 +02:00
|
|
|
/*****************************************************************
|
|
|
|
*
|
|
|
|
* Common helpers
|
|
|
|
*
|
|
|
|
*****************************************************************/
|
|
|
|
|
|
|
|
static void close_deflated_stream(struct git_istream *st)
|
|
|
|
{
|
|
|
|
if (st->z_state == z_used)
|
|
|
|
git_inflate_end(&st->z);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-05-20 23:33:31 +02:00
|
|
|
/*****************************************************************
|
|
|
|
*
|
|
|
|
* Filtered stream
|
|
|
|
*
|
|
|
|
*****************************************************************/
|
|
|
|
|
2021-05-05 14:33:30 +02:00
|
|
|
static int close_istream_filtered(struct git_istream *st)
|
2011-05-20 23:33:31 +02:00
|
|
|
{
|
|
|
|
free_stream_filter(st->u.filtered.filter);
|
|
|
|
return close_istream(st->u.filtered.upstream);
|
|
|
|
}
|
|
|
|
|
2021-05-05 14:33:30 +02:00
|
|
|
static ssize_t read_istream_filtered(struct git_istream *st, char *buf,
|
|
|
|
size_t sz)
|
2011-05-20 23:33:31 +02:00
|
|
|
{
|
|
|
|
struct filtered_istream *fs = &(st->u.filtered);
|
|
|
|
size_t filled = 0;
|
|
|
|
|
|
|
|
while (sz) {
|
|
|
|
/* do we already have filtered output? */
|
|
|
|
if (fs->o_ptr < fs->o_end) {
|
|
|
|
size_t to_move = fs->o_end - fs->o_ptr;
|
|
|
|
if (sz < to_move)
|
|
|
|
to_move = sz;
|
|
|
|
memcpy(buf + filled, fs->obuf + fs->o_ptr, to_move);
|
|
|
|
fs->o_ptr += to_move;
|
|
|
|
sz -= to_move;
|
|
|
|
filled += to_move;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
fs->o_end = fs->o_ptr = 0;
|
|
|
|
|
|
|
|
/* do we have anything to feed the filter with? */
|
|
|
|
if (fs->i_ptr < fs->i_end) {
|
|
|
|
size_t to_feed = fs->i_end - fs->i_ptr;
|
|
|
|
size_t to_receive = FILTER_BUFFER;
|
|
|
|
if (stream_filter(fs->filter,
|
|
|
|
fs->ibuf + fs->i_ptr, &to_feed,
|
|
|
|
fs->obuf, &to_receive))
|
|
|
|
return -1;
|
|
|
|
fs->i_ptr = fs->i_end - to_feed;
|
|
|
|
fs->o_end = FILTER_BUFFER - to_receive;
|
|
|
|
continue;
|
|
|
|
}
|
2011-05-21 23:05:51 +02:00
|
|
|
|
|
|
|
/* tell the filter to drain upon no more input */
|
|
|
|
if (fs->input_finished) {
|
|
|
|
size_t to_receive = FILTER_BUFFER;
|
|
|
|
if (stream_filter(fs->filter,
|
|
|
|
NULL, NULL,
|
|
|
|
fs->obuf, &to_receive))
|
|
|
|
return -1;
|
|
|
|
fs->o_end = FILTER_BUFFER - to_receive;
|
|
|
|
if (!fs->o_end)
|
|
|
|
break;
|
|
|
|
continue;
|
|
|
|
}
|
2011-05-20 23:33:31 +02:00
|
|
|
fs->i_end = fs->i_ptr = 0;
|
|
|
|
|
|
|
|
/* refill the input from the upstream */
|
2011-05-21 23:05:51 +02:00
|
|
|
if (!fs->input_finished) {
|
|
|
|
fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER);
|
|
|
|
if (fs->i_end < 0)
|
2013-03-25 21:18:16 +01:00
|
|
|
return -1;
|
2011-05-21 23:05:51 +02:00
|
|
|
if (fs->i_end)
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
fs->input_finished = 1;
|
2011-05-20 23:33:31 +02:00
|
|
|
}
|
|
|
|
return filled;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct git_istream *attach_stream_filter(struct git_istream *st,
|
|
|
|
struct stream_filter *filter)
|
|
|
|
{
|
|
|
|
struct git_istream *ifs = xmalloc(sizeof(*ifs));
|
|
|
|
struct filtered_istream *fs = &(ifs->u.filtered);
|
|
|
|
|
2021-05-05 14:33:32 +02:00
|
|
|
ifs->close = close_istream_filtered;
|
|
|
|
ifs->read = read_istream_filtered;
|
2011-05-20 23:33:31 +02:00
|
|
|
fs->upstream = st;
|
|
|
|
fs->filter = filter;
|
|
|
|
fs->i_end = fs->i_ptr = 0;
|
|
|
|
fs->o_end = fs->o_ptr = 0;
|
2011-05-21 23:05:51 +02:00
|
|
|
fs->input_finished = 0;
|
2011-05-20 23:33:31 +02:00
|
|
|
ifs->size = -1; /* unknown */
|
|
|
|
return ifs;
|
|
|
|
}
|
|
|
|
|
2011-05-12 04:30:25 +02:00
|
|
|
/*****************************************************************
|
|
|
|
*
|
|
|
|
* Loose object stream
|
|
|
|
*
|
|
|
|
*****************************************************************/
|
|
|
|
|
2021-05-05 14:33:30 +02:00
|
|
|
static ssize_t read_istream_loose(struct git_istream *st, char *buf, size_t sz)
|
2011-05-15 04:17:10 +02:00
|
|
|
{
|
|
|
|
size_t total_read = 0;
|
|
|
|
|
|
|
|
switch (st->z_state) {
|
|
|
|
case z_done:
|
|
|
|
return 0;
|
|
|
|
case z_error:
|
|
|
|
return -1;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (st->u.loose.hdr_used < st->u.loose.hdr_avail) {
|
|
|
|
size_t to_copy = st->u.loose.hdr_avail - st->u.loose.hdr_used;
|
|
|
|
if (sz < to_copy)
|
|
|
|
to_copy = sz;
|
|
|
|
memcpy(buf, st->u.loose.hdr + st->u.loose.hdr_used, to_copy);
|
|
|
|
st->u.loose.hdr_used += to_copy;
|
|
|
|
total_read += to_copy;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (total_read < sz) {
|
|
|
|
int status;
|
|
|
|
|
|
|
|
st->z.next_out = (unsigned char *)buf + total_read;
|
|
|
|
st->z.avail_out = sz - total_read;
|
|
|
|
status = git_inflate(&st->z, Z_FINISH);
|
|
|
|
|
|
|
|
total_read = st->z.next_out - (unsigned char *)buf;
|
|
|
|
|
|
|
|
if (status == Z_STREAM_END) {
|
|
|
|
git_inflate_end(&st->z);
|
|
|
|
st->z_state = z_done;
|
|
|
|
break;
|
|
|
|
}
|
avoid infinite loop in read_istream_loose
The read_istream_loose function loops on inflating a chunk of data
from an mmap'd loose object. We end the loop when we run out
of space in our output buffer, or if we see a zlib error.
We need to treat Z_BUF_ERROR specially, though, as it is not
fatal; it is just zlib's way of telling us that we need to
either feed it more input or give it more output space. It
is perfectly normal for us to hit this when we are at the
end of our buffer.
However, we may also get Z_BUF_ERROR because we have run out
of input. In a well-formed object, this should not happen,
because we have fed the whole mmap'd contents to zlib. But
if the object is truncated or corrupt, we will loop forever,
never giving zlib any more data, but continuing to ask it to
inflate.
We can fix this by considering it an error when zlib returns
Z_BUF_ERROR but we still have output space left (which means
it must want more input, which we know is a truncation
error). It would not be sufficient to just check whether
zlib had consumed all the input at the start of the loop, as
it might still want to generate output from what is in its
internal state.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-25 21:21:14 +01:00
|
|
|
if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) {
|
2011-05-15 04:17:10 +02:00
|
|
|
git_inflate_end(&st->z);
|
|
|
|
st->z_state = z_error;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return total_read;
|
|
|
|
}
|
|
|
|
|
2021-05-05 14:33:30 +02:00
|
|
|
static int close_istream_loose(struct git_istream *st)
|
2011-05-15 04:17:10 +02:00
|
|
|
{
|
|
|
|
close_deflated_stream(st);
|
|
|
|
munmap(st->u.loose.mapped, st->u.loose.mapsize);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-05-05 14:33:30 +02:00
|
|
|
static int open_istream_loose(struct git_istream *st, struct repository *r,
|
|
|
|
const struct object_id *oid,
|
|
|
|
enum object_type *type)
|
2011-05-12 04:30:25 +02:00
|
|
|
{
|
2021-10-01 11:16:47 +02:00
|
|
|
struct object_info oi = OBJECT_INFO_INIT;
|
|
|
|
oi.sizep = &st->size;
|
object-file.c: stop dying in parse_loose_header()
Make parse_loose_header() return error codes and data instead of
invoking die() by itself.
For now we'll move the relevant die() call to loose_object_info() and
read_loose_object() to keep this change smaller. In a subsequent
commit we'll make read_loose_object() return an error code instead of
dying. We should also address the "allow_unknown" case (should be
moved to builtin/cat-file.c), but for now I'll be leaving it.
For making parse_loose_header() not die() change its prototype to
accept a "struct object_info *" instead of the "unsigned long *sizep"
it accepted before. Its callers can now check the populated populated
"oi->typep".
Because of this we don't need to pass in the "unsigned int flags"
which we used for OBJECT_INFO_ALLOW_UNKNOWN_TYPE, we can instead do
that check in loose_object_info().
This also refactors some confusing control flow around the "status"
variable. In some cases we set it to the return value of "error()",
i.e. -1, and later checked if "status < 0" was true.
Since 93cff9a978e (sha1_loose_object_info: return error for corrupted
objects, 2017-04-01) the return value of loose_object_info() (then
named sha1_loose_object_info()) had been a "status" variable that be
any negative value, as we were expecting to return the "enum
object_type".
The only negative type happens to be OBJ_BAD, but the code still
assumed that more might be added. This was then used later in
e.g. c84a1f3ed4d (sha1_file: refactor read_object, 2017-06-21). Now
that parse_loose_header() will return 0 on success instead of the
type (which it'll stick into the "struct object_info") we don't need
to conflate these two cases in its callers.
Since parse_loose_header() doesn't need to return an arbitrary
"status" we only need to treat its "ret < 0" specially, but can
idiomatically overwrite it with our own error() return. This along
with having made unpack_loose_header() return an "enum
unpack_loose_header_result" in an earlier commit means that we can
move the previously nested if/else cases mostly into the "ULHR_OK"
branch of the "switch" statement.
We should be less silent if we reach that "status = -1" branch, which
happens if we've got trailing garbage in loose objects, see
f6371f92104 (sha1_file: add read_loose_object() function, 2017-01-13)
for a better way to handle it. For now let's punt on it, a subsequent
commit will address that edge case.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-01 11:16:51 +02:00
|
|
|
oi.typep = type;
|
2021-10-01 11:16:47 +02:00
|
|
|
|
2020-01-30 21:32:20 +01:00
|
|
|
st->u.loose.mapped = map_loose_object(r, oid, &st->u.loose.mapsize);
|
2011-05-15 04:17:10 +02:00
|
|
|
if (!st->u.loose.mapped)
|
|
|
|
return -1;
|
2021-10-01 11:16:49 +02:00
|
|
|
switch (unpack_loose_header(&st->z, st->u.loose.mapped,
|
|
|
|
st->u.loose.mapsize, st->u.loose.hdr,
|
|
|
|
sizeof(st->u.loose.hdr), NULL)) {
|
|
|
|
case ULHR_OK:
|
|
|
|
break;
|
|
|
|
case ULHR_BAD:
|
2021-10-01 11:16:50 +02:00
|
|
|
case ULHR_TOO_LONG:
|
2021-10-01 11:16:49 +02:00
|
|
|
goto error;
|
2011-05-15 04:17:10 +02:00
|
|
|
}
|
object-file.c: stop dying in parse_loose_header()
Make parse_loose_header() return error codes and data instead of
invoking die() by itself.
For now we'll move the relevant die() call to loose_object_info() and
read_loose_object() to keep this change smaller. In a subsequent
commit we'll make read_loose_object() return an error code instead of
dying. We should also address the "allow_unknown" case (should be
moved to builtin/cat-file.c), but for now I'll be leaving it.
For making parse_loose_header() not die() change its prototype to
accept a "struct object_info *" instead of the "unsigned long *sizep"
it accepted before. Its callers can now check the populated populated
"oi->typep".
Because of this we don't need to pass in the "unsigned int flags"
which we used for OBJECT_INFO_ALLOW_UNKNOWN_TYPE, we can instead do
that check in loose_object_info().
This also refactors some confusing control flow around the "status"
variable. In some cases we set it to the return value of "error()",
i.e. -1, and later checked if "status < 0" was true.
Since 93cff9a978e (sha1_loose_object_info: return error for corrupted
objects, 2017-04-01) the return value of loose_object_info() (then
named sha1_loose_object_info()) had been a "status" variable that be
any negative value, as we were expecting to return the "enum
object_type".
The only negative type happens to be OBJ_BAD, but the code still
assumed that more might be added. This was then used later in
e.g. c84a1f3ed4d (sha1_file: refactor read_object, 2017-06-21). Now
that parse_loose_header() will return 0 on success instead of the
type (which it'll stick into the "struct object_info") we don't need
to conflate these two cases in its callers.
Since parse_loose_header() doesn't need to return an arbitrary
"status" we only need to treat its "ret < 0" specially, but can
idiomatically overwrite it with our own error() return. This along
with having made unpack_loose_header() return an "enum
unpack_loose_header_result" in an earlier commit means that we can
move the previously nested if/else cases mostly into the "ULHR_OK"
branch of the "switch" statement.
We should be less silent if we reach that "status = -1" branch, which
happens if we've got trailing garbage in loose objects, see
f6371f92104 (sha1_file: add read_loose_object() function, 2017-01-13)
for a better way to handle it. For now let's punt on it, a subsequent
commit will address that edge case.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-01 11:16:51 +02:00
|
|
|
if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || *type < 0)
|
2021-10-01 11:16:49 +02:00
|
|
|
goto error;
|
2011-05-15 04:17:10 +02:00
|
|
|
|
|
|
|
st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1;
|
|
|
|
st->u.loose.hdr_avail = st->z.total_out;
|
|
|
|
st->z_state = z_used;
|
2021-05-05 14:33:32 +02:00
|
|
|
st->close = close_istream_loose;
|
|
|
|
st->read = read_istream_loose;
|
2011-05-15 04:17:10 +02:00
|
|
|
|
|
|
|
return 0;
|
2021-10-01 11:16:49 +02:00
|
|
|
error:
|
|
|
|
git_inflate_end(&st->z);
|
|
|
|
munmap(st->u.loose.mapped, st->u.loose.mapsize);
|
|
|
|
return -1;
|
2011-05-12 04:30:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*****************************************************************
|
|
|
|
*
|
|
|
|
* Non-delta packed object stream
|
|
|
|
*
|
|
|
|
*****************************************************************/
|
|
|
|
|
2021-05-05 14:33:30 +02:00
|
|
|
static ssize_t read_istream_pack_non_delta(struct git_istream *st, char *buf,
|
|
|
|
size_t sz)
|
2011-05-14 00:34:58 +02:00
|
|
|
{
|
|
|
|
size_t total_read = 0;
|
|
|
|
|
|
|
|
switch (st->z_state) {
|
|
|
|
case z_unused:
|
|
|
|
memset(&st->z, 0, sizeof(st->z));
|
|
|
|
git_inflate_init(&st->z);
|
|
|
|
st->z_state = z_used;
|
|
|
|
break;
|
|
|
|
case z_done:
|
|
|
|
return 0;
|
|
|
|
case z_error:
|
|
|
|
return -1;
|
|
|
|
case z_used:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (total_read < sz) {
|
|
|
|
int status;
|
|
|
|
struct pack_window *window = NULL;
|
|
|
|
unsigned char *mapped;
|
|
|
|
|
|
|
|
mapped = use_pack(st->u.in_pack.pack, &window,
|
|
|
|
st->u.in_pack.pos, &st->z.avail_in);
|
|
|
|
|
|
|
|
st->z.next_out = (unsigned char *)buf + total_read;
|
|
|
|
st->z.avail_out = sz - total_read;
|
|
|
|
st->z.next_in = mapped;
|
|
|
|
status = git_inflate(&st->z, Z_FINISH);
|
|
|
|
|
|
|
|
st->u.in_pack.pos += st->z.next_in - mapped;
|
|
|
|
total_read = st->z.next_out - (unsigned char *)buf;
|
|
|
|
unuse_pack(&window);
|
|
|
|
|
|
|
|
if (status == Z_STREAM_END) {
|
|
|
|
git_inflate_end(&st->z);
|
|
|
|
st->z_state = z_done;
|
|
|
|
break;
|
|
|
|
}
|
2018-10-31 06:13:16 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Unlike the loose object case, we do not have to worry here
|
|
|
|
* about running out of input bytes and spinning infinitely. If
|
|
|
|
* we get Z_BUF_ERROR due to too few input bytes, then we'll
|
|
|
|
* replenish them in the next use_pack() call when we loop. If
|
|
|
|
* we truly hit the end of the pack (i.e., because it's corrupt
|
|
|
|
* or truncated), then use_pack() catches that and will die().
|
|
|
|
*/
|
2011-05-14 00:34:58 +02:00
|
|
|
if (status != Z_OK && status != Z_BUF_ERROR) {
|
|
|
|
git_inflate_end(&st->z);
|
|
|
|
st->z_state = z_error;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return total_read;
|
|
|
|
}
|
|
|
|
|
2021-05-05 14:33:30 +02:00
|
|
|
static int close_istream_pack_non_delta(struct git_istream *st)
|
2011-05-14 00:34:58 +02:00
|
|
|
{
|
|
|
|
close_deflated_stream(st);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-05-05 14:33:30 +02:00
|
|
|
static int open_istream_pack_non_delta(struct git_istream *st,
|
2022-08-25 19:09:48 +02:00
|
|
|
struct repository *r UNUSED,
|
|
|
|
const struct object_id *oid UNUSED,
|
|
|
|
enum object_type *type UNUSED)
|
2011-05-12 04:30:25 +02:00
|
|
|
{
|
2011-05-14 00:34:58 +02:00
|
|
|
struct pack_window *window;
|
|
|
|
enum object_type in_pack_type;
|
|
|
|
|
|
|
|
window = NULL;
|
|
|
|
|
|
|
|
in_pack_type = unpack_object_header(st->u.in_pack.pack,
|
|
|
|
&window,
|
|
|
|
&st->u.in_pack.pos,
|
|
|
|
&st->size);
|
|
|
|
unuse_pack(&window);
|
|
|
|
switch (in_pack_type) {
|
|
|
|
default:
|
|
|
|
return -1; /* we do not do deltas for now */
|
|
|
|
case OBJ_COMMIT:
|
|
|
|
case OBJ_TREE:
|
|
|
|
case OBJ_BLOB:
|
|
|
|
case OBJ_TAG:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
st->z_state = z_unused;
|
2021-05-05 14:33:32 +02:00
|
|
|
st->close = close_istream_pack_non_delta;
|
|
|
|
st->read = read_istream_pack_non_delta;
|
|
|
|
|
2011-05-14 00:34:58 +02:00
|
|
|
return 0;
|
2011-05-12 04:30:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*****************************************************************
|
|
|
|
*
|
|
|
|
* In-core stream
|
|
|
|
*
|
|
|
|
*****************************************************************/
|
|
|
|
|
2021-05-05 14:33:30 +02:00
|
|
|
static int close_istream_incore(struct git_istream *st)
|
2011-05-12 04:30:25 +02:00
|
|
|
{
|
|
|
|
free(st->u.incore.buf);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-05-05 14:33:30 +02:00
|
|
|
static ssize_t read_istream_incore(struct git_istream *st, char *buf, size_t sz)
|
2011-05-12 04:30:25 +02:00
|
|
|
{
|
|
|
|
size_t read_size = sz;
|
|
|
|
size_t remainder = st->size - st->u.incore.read_ptr;
|
|
|
|
|
|
|
|
if (remainder <= read_size)
|
|
|
|
read_size = remainder;
|
|
|
|
if (read_size) {
|
|
|
|
memcpy(buf, st->u.incore.buf + st->u.incore.read_ptr, read_size);
|
|
|
|
st->u.incore.read_ptr += read_size;
|
|
|
|
}
|
|
|
|
return read_size;
|
|
|
|
}
|
|
|
|
|
2021-05-05 14:33:30 +02:00
|
|
|
static int open_istream_incore(struct git_istream *st, struct repository *r,
|
2021-05-05 14:33:31 +02:00
|
|
|
const struct object_id *oid, enum object_type *type)
|
2011-05-12 04:30:25 +02:00
|
|
|
{
|
2023-01-07 14:49:15 +01:00
|
|
|
struct object_info oi = OBJECT_INFO_INIT;
|
|
|
|
|
2011-05-12 04:30:25 +02:00
|
|
|
st->u.incore.read_ptr = 0;
|
2021-05-05 14:33:32 +02:00
|
|
|
st->close = close_istream_incore;
|
|
|
|
st->read = read_istream_incore;
|
2011-05-12 04:30:25 +02:00
|
|
|
|
2023-01-07 14:49:15 +01:00
|
|
|
oi.typep = type;
|
|
|
|
oi.sizep = &st->size;
|
|
|
|
oi.contentp = (void **)&st->u.incore.buf;
|
|
|
|
return oid_object_info_extended(r, oid, &oi,
|
|
|
|
OBJECT_INFO_DIE_IF_CORRUPT);
|
2011-05-12 04:30:25 +02:00
|
|
|
}
|
2012-03-07 11:54:15 +01:00
|
|
|
|
2021-05-05 14:33:28 +02:00
|
|
|
/*****************************************************************************
|
|
|
|
* static helpers variables and functions for users of streaming interface
|
|
|
|
*****************************************************************************/
|
|
|
|
|
2021-05-05 14:33:32 +02:00
|
|
|
static int istream_source(struct git_istream *st,
|
|
|
|
struct repository *r,
|
|
|
|
const struct object_id *oid,
|
|
|
|
enum object_type *type)
|
2021-05-05 14:33:28 +02:00
|
|
|
{
|
|
|
|
unsigned long size;
|
|
|
|
int status;
|
2021-05-05 14:33:31 +02:00
|
|
|
struct object_info oi = OBJECT_INFO_INIT;
|
2021-05-05 14:33:28 +02:00
|
|
|
|
2021-05-05 14:33:31 +02:00
|
|
|
oi.typep = type;
|
|
|
|
oi.sizep = &size;
|
|
|
|
status = oid_object_info_extended(r, oid, &oi, 0);
|
2021-05-05 14:33:28 +02:00
|
|
|
if (status < 0)
|
2021-05-05 14:33:32 +02:00
|
|
|
return status;
|
2021-05-05 14:33:28 +02:00
|
|
|
|
2021-05-05 14:33:31 +02:00
|
|
|
switch (oi.whence) {
|
2021-05-05 14:33:28 +02:00
|
|
|
case OI_LOOSE:
|
2021-05-05 14:33:32 +02:00
|
|
|
st->open = open_istream_loose;
|
|
|
|
return 0;
|
2021-05-05 14:33:28 +02:00
|
|
|
case OI_PACKED:
|
2021-05-05 14:33:31 +02:00
|
|
|
if (!oi.u.packed.is_delta && big_file_threshold < size) {
|
|
|
|
st->u.in_pack.pack = oi.u.packed.pack;
|
|
|
|
st->u.in_pack.pos = oi.u.packed.offset;
|
2021-05-05 14:33:32 +02:00
|
|
|
st->open = open_istream_pack_non_delta;
|
|
|
|
return 0;
|
2021-05-05 14:33:31 +02:00
|
|
|
}
|
2021-05-05 14:33:28 +02:00
|
|
|
/* fallthru */
|
|
|
|
default:
|
2021-05-05 14:33:32 +02:00
|
|
|
st->open = open_istream_incore;
|
|
|
|
return 0;
|
2021-05-05 14:33:28 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-03-07 11:54:15 +01:00
|
|
|
/****************************************************************
|
|
|
|
* Users of streaming interface
|
|
|
|
****************************************************************/
|
|
|
|
|
2021-05-05 14:33:28 +02:00
|
|
|
int close_istream(struct git_istream *st)
|
|
|
|
{
|
2021-05-05 14:33:32 +02:00
|
|
|
int r = st->close(st);
|
2021-05-05 14:33:28 +02:00
|
|
|
free(st);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
ssize_t read_istream(struct git_istream *st, void *buf, size_t sz)
|
|
|
|
{
|
2021-05-05 14:33:32 +02:00
|
|
|
return st->read(st, buf, sz);
|
2021-05-05 14:33:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
struct git_istream *open_istream(struct repository *r,
|
|
|
|
const struct object_id *oid,
|
|
|
|
enum object_type *type,
|
|
|
|
unsigned long *size,
|
|
|
|
struct stream_filter *filter)
|
|
|
|
{
|
2021-05-05 14:33:31 +02:00
|
|
|
struct git_istream *st = xmalloc(sizeof(*st));
|
2021-05-05 14:33:28 +02:00
|
|
|
const struct object_id *real = lookup_replace_object(r, oid);
|
2021-05-05 14:33:32 +02:00
|
|
|
int ret = istream_source(st, r, real, type);
|
2021-05-05 14:33:28 +02:00
|
|
|
|
2021-05-05 14:33:32 +02:00
|
|
|
if (ret) {
|
2021-05-05 14:33:31 +02:00
|
|
|
free(st);
|
2021-05-05 14:33:28 +02:00
|
|
|
return NULL;
|
2021-05-05 14:33:31 +02:00
|
|
|
}
|
2021-05-05 14:33:28 +02:00
|
|
|
|
2021-05-05 14:33:32 +02:00
|
|
|
if (st->open(st, r, real, type)) {
|
2021-05-05 14:33:31 +02:00
|
|
|
if (open_istream_incore(st, r, real, type)) {
|
2021-05-05 14:33:28 +02:00
|
|
|
free(st);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (filter) {
|
|
|
|
/* Add "&& !is_null_stream_filter(filter)" for performance */
|
|
|
|
struct git_istream *nst = attach_stream_filter(st, filter);
|
|
|
|
if (!nst) {
|
|
|
|
close_istream(st);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
st = nst;
|
|
|
|
}
|
|
|
|
|
|
|
|
*size = st->size;
|
|
|
|
return st;
|
|
|
|
}
|
|
|
|
|
2016-09-05 22:07:59 +02:00
|
|
|
int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter *filter,
|
2012-03-07 11:54:15 +01:00
|
|
|
int can_seek)
|
|
|
|
{
|
|
|
|
struct git_istream *st;
|
|
|
|
enum object_type type;
|
|
|
|
unsigned long sz;
|
|
|
|
ssize_t kept = 0;
|
|
|
|
int result = -1;
|
|
|
|
|
2020-01-30 21:32:20 +01:00
|
|
|
st = open_istream(the_repository, oid, &type, &sz, filter);
|
2015-03-31 03:22:11 +02:00
|
|
|
if (!st) {
|
|
|
|
if (filter)
|
|
|
|
free_stream_filter(filter);
|
2012-03-07 11:54:15 +01:00
|
|
|
return result;
|
2015-03-31 03:22:11 +02:00
|
|
|
}
|
2012-03-07 11:54:15 +01:00
|
|
|
if (type != OBJ_BLOB)
|
|
|
|
goto close_and_exit;
|
|
|
|
for (;;) {
|
|
|
|
char buf[1024 * 16];
|
|
|
|
ssize_t wrote, holeto;
|
|
|
|
ssize_t readlen = read_istream(st, buf, sizeof(buf));
|
|
|
|
|
2013-03-25 21:16:50 +01:00
|
|
|
if (readlen < 0)
|
|
|
|
goto close_and_exit;
|
2012-03-07 11:54:15 +01:00
|
|
|
if (!readlen)
|
|
|
|
break;
|
|
|
|
if (can_seek && sizeof(buf) == readlen) {
|
|
|
|
for (holeto = 0; holeto < readlen; holeto++)
|
|
|
|
if (buf[holeto])
|
|
|
|
break;
|
|
|
|
if (readlen == holeto) {
|
|
|
|
kept += holeto;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1)
|
|
|
|
goto close_and_exit;
|
|
|
|
else
|
|
|
|
kept = 0;
|
|
|
|
wrote = write_in_full(fd, buf, readlen);
|
|
|
|
|
2017-09-13 19:16:28 +02:00
|
|
|
if (wrote < 0)
|
2012-03-07 11:54:15 +01:00
|
|
|
goto close_and_exit;
|
|
|
|
}
|
|
|
|
if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 ||
|
2014-01-17 15:17:09 +01:00
|
|
|
xwrite(fd, "", 1) != 1))
|
2012-03-07 11:54:15 +01:00
|
|
|
goto close_and_exit;
|
|
|
|
result = 0;
|
|
|
|
|
|
|
|
close_and_exit:
|
|
|
|
close_istream(st);
|
|
|
|
return result;
|
|
|
|
}
|