git-commit-vandalism/convert.h

119 lines
3.9 KiB
C
Raw Normal View History

/*
* Copyright (c) 2011, Google Inc.
*/
#ifndef CONVERT_H
#define CONVERT_H
#include "string-list.h"
struct index_state;
#define CONV_EOL_RNDTRP_DIE (1<<0) /* Die if CRLF to LF to CRLF is different */
#define CONV_EOL_RNDTRP_WARN (1<<1) /* Warn if CRLF to LF to CRLF is different */
#define CONV_EOL_RENORMALIZE (1<<2) /* Convert CRLF to LF */
#define CONV_EOL_KEEP_CRLF (1<<3) /* Keep CRLF line endings as is */
#define CONV_WRITE_OBJECT (1<<4) /* Content is written to the index */
extern int global_conv_flags_eol;
enum auto_crlf {
AUTO_CRLF_FALSE = 0,
AUTO_CRLF_TRUE = 1,
AUTO_CRLF_INPUT = -1
};
extern enum auto_crlf auto_crlf;
enum eol {
EOL_UNSET,
EOL_CRLF,
EOL_LF,
#ifdef NATIVE_CRLF
EOL_NATIVE = EOL_CRLF
#else
EOL_NATIVE = EOL_LF
#endif
};
enum ce_delay_state {
CE_NO_DELAY = 0,
CE_CAN_DELAY = 1,
CE_RETRY = 2
};
struct delayed_checkout {
/*
* State of the currently processed cache entry. If the state is
* CE_CAN_DELAY, then the filter can delay the current cache entry.
* If the state is CE_RETRY, then this signals the filter that the
* cache entry was requested before.
*/
enum ce_delay_state state;
/* List of filter drivers that signaled delayed blobs. */
struct string_list filters;
/* List of delayed blobs identified by their path. */
struct string_list paths;
};
extern enum eol core_eol;
extern char *check_roundtrip_encoding;
extern const char *get_cached_convert_stats_ascii(const struct index_state *istate,
const char *path);
extern const char *get_wt_convert_stats_ascii(const char *path);
extern const char *get_convert_attr_ascii(const char *path);
/* returns 1 if *dst was used */
extern int convert_to_git(const struct index_state *istate,
const char *path, const char *src, size_t len,
struct strbuf *dst, int conv_flags);
extern int convert_to_working_tree(const char *path, const char *src,
size_t len, struct strbuf *dst);
extern int async_convert_to_working_tree(const char *path, const char *src,
size_t len, struct strbuf *dst,
void *dco);
extern int async_query_available_blobs(const char *cmd, struct string_list *available_paths);
extern int renormalize_buffer(const struct index_state *istate,
const char *path, const char *src, size_t len,
struct strbuf *dst);
static inline int would_convert_to_git(const struct index_state *istate,
const char *path)
{
return convert_to_git(istate, path, NULL, 0, NULL, 0);
}
convert: stream from fd to required clean filter to reduce used address space The data is streamed to the filter process anyway. Better avoid mapping the file if possible. This is especially useful if a clean filter reduces the size, for example if it computes a sha1 for binary data, like git media. The file size that the previous implementation could handle was limited by the available address space; large files for example could not be handled with (32-bit) msysgit. The new implementation can filter files of any size as long as the filter output is small enough. The new code path is only taken if the filter is required. The filter consumes data directly from the fd. If it fails, the original data is not immediately available. The condition can easily be handled as a fatal error, which is expected for a required filter anyway. If the filter was not required, the condition would need to be handled in a different way, like seeking to 0 and reading the data. But this would require more restructuring of the code and is probably not worth it. The obvious approach of falling back to reading all data would not help achieving the main purpose of this patch, which is to handle large files with limited address space. If reading all data is an option, we can simply take the old code path right away and mmap the entire file. The environment variable GIT_MMAP_LIMIT, which has been introduced in a previous commit is used to test that the expected code path is taken. A related test that exercises required filters is modified to verify that the data actually has been modified on its way from the file system to the object store. Signed-off-by: Steffen Prohaska <prohaska@zib.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-08-26 17:23:25 +02:00
/* Precondition: would_convert_to_git_filter_fd(path) == true */
extern void convert_to_git_filter_fd(const struct index_state *istate,
const char *path, int fd,
convert: stream from fd to required clean filter to reduce used address space The data is streamed to the filter process anyway. Better avoid mapping the file if possible. This is especially useful if a clean filter reduces the size, for example if it computes a sha1 for binary data, like git media. The file size that the previous implementation could handle was limited by the available address space; large files for example could not be handled with (32-bit) msysgit. The new implementation can filter files of any size as long as the filter output is small enough. The new code path is only taken if the filter is required. The filter consumes data directly from the fd. If it fails, the original data is not immediately available. The condition can easily be handled as a fatal error, which is expected for a required filter anyway. If the filter was not required, the condition would need to be handled in a different way, like seeking to 0 and reading the data. But this would require more restructuring of the code and is probably not worth it. The obvious approach of falling back to reading all data would not help achieving the main purpose of this patch, which is to handle large files with limited address space. If reading all data is an option, we can simply take the old code path right away and mmap the entire file. The environment variable GIT_MMAP_LIMIT, which has been introduced in a previous commit is used to test that the expected code path is taken. A related test that exercises required filters is modified to verify that the data actually has been modified on its way from the file system to the object store. Signed-off-by: Steffen Prohaska <prohaska@zib.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-08-26 17:23:25 +02:00
struct strbuf *dst,
int conv_flags);
convert: stream from fd to required clean filter to reduce used address space The data is streamed to the filter process anyway. Better avoid mapping the file if possible. This is especially useful if a clean filter reduces the size, for example if it computes a sha1 for binary data, like git media. The file size that the previous implementation could handle was limited by the available address space; large files for example could not be handled with (32-bit) msysgit. The new implementation can filter files of any size as long as the filter output is small enough. The new code path is only taken if the filter is required. The filter consumes data directly from the fd. If it fails, the original data is not immediately available. The condition can easily be handled as a fatal error, which is expected for a required filter anyway. If the filter was not required, the condition would need to be handled in a different way, like seeking to 0 and reading the data. But this would require more restructuring of the code and is probably not worth it. The obvious approach of falling back to reading all data would not help achieving the main purpose of this patch, which is to handle large files with limited address space. If reading all data is an option, we can simply take the old code path right away and mmap the entire file. The environment variable GIT_MMAP_LIMIT, which has been introduced in a previous commit is used to test that the expected code path is taken. A related test that exercises required filters is modified to verify that the data actually has been modified on its way from the file system to the object store. Signed-off-by: Steffen Prohaska <prohaska@zib.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-08-26 17:23:25 +02:00
extern int would_convert_to_git_filter_fd(const char *path);
/*****************************************************************
*
* Streaming conversion support
*
*****************************************************************/
struct stream_filter; /* opaque */
extern struct stream_filter *get_stream_filter(const char *path, const struct object_id *);
extern void free_stream_filter(struct stream_filter *);
extern int is_null_stream_filter(struct stream_filter *);
/*
* Use as much input up to *isize_p and fill output up to *osize_p;
* update isize_p and osize_p to indicate how much buffer space was
* consumed and filled. Return 0 on success, non-zero on error.
*
* Some filters may need to buffer the input and look-ahead inside it
* to decide what to output, and they may consume more than zero bytes
* of input and still not produce any output. After feeding all the
* input, pass NULL as input and keep calling this function, to let
* such filters know there is no more input coming and it is time for
* them to produce the remaining output based on the buffered input.
*/
extern int stream_filter(struct stream_filter *,
const char *input, size_t *isize_p,
char *output, size_t *osize_p);
#endif /* CONVERT_H */