git-commit-vandalism/cache.h

572 lines
20 KiB
C
Raw Normal View History

#ifndef CACHE_H
#define CACHE_H
#include "git-compat-util.h"
#include SHA1_HEADER
#include <zlib.h>
#if ZLIB_VERNUM < 0x1200
#define deflateBound(c,s) ((s) + (((s) + 7) >> 3) + (((s) + 63) >> 6) + 11)
#endif
#if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT)
#define DTYPE(de) ((de)->d_type)
#else
#undef DT_UNKNOWN
#undef DT_DIR
#undef DT_REG
#undef DT_LNK
#define DT_UNKNOWN 0
#define DT_DIR 1
#define DT_REG 2
#define DT_LNK 3
#define DTYPE(de) DT_UNKNOWN
#endif
/* unknown mode (impossible combination S_IFIFO|S_IFCHR) */
#define S_IFINVALID 0030000
/*
* A "directory link" is a link to another git directory.
*
* The value 0160000 is not normally a valid mode, and
* also just happens to be S_IFDIR + S_IFLNK
*
* NOTE! We *really* shouldn't depend on the S_IFxxx macros
* always having the same values everywhere. We should use
* our internal git values for these things, and then we can
* translate that to the OS-specific value. It just so
* happens that everybody shares the same bit representation
* in the UNIX world (and apparently wider too..)
*/
#define S_IFGITLINK 0160000
#define S_ISGITLINK(m) (((m) & S_IFMT) == S_IFGITLINK)
/*
* Intensive research over the course of many years has shown that
* port 9418 is totally unused by anything else. Or
*
* Your search - "port 9418" - did not match any documents.
*
* as www.google.com puts it.
*
* This port has been properly assigned for git use by IANA:
* git (Assigned-9418) [I06-050728-0001].
*
* git 9418/tcp git pack transfer service
* git 9418/udp git pack transfer service
*
* with Linus Torvalds <torvalds@osdl.org> as the point of
* contact. September 2005.
*
* See http://www.iana.org/assignments/port-numbers
*/
#define DEFAULT_GIT_PORT 9418
/*
* Basic data structures for the directory cache
*/
#define CACHE_SIGNATURE 0x44495243 /* "DIRC" */
struct cache_header {
unsigned int hdr_signature;
unsigned int hdr_version;
unsigned int hdr_entries;
};
/*
* The "cache_time" is just the low 32 bits of the
* time. It doesn't matter if it overflows - we only
* check it for equality in the 32 bits we save.
*/
struct cache_time {
unsigned int sec;
unsigned int nsec;
};
/*
* dev/ino/uid/gid/size are also just tracked to the low 32 bits
* Again - this is just a (very strong in practice) heuristic that
* the inode hasn't changed.
*
* We save the fields in big-endian order to allow using the
* index file over NFS transparently.
*/
struct cache_entry {
struct cache_time ce_ctime;
struct cache_time ce_mtime;
unsigned int ce_dev;
unsigned int ce_ino;
unsigned int ce_mode;
unsigned int ce_uid;
unsigned int ce_gid;
unsigned int ce_size;
unsigned char sha1[20];
unsigned short ce_flags;
char name[FLEX_ARRAY]; /* more */
};
#define CE_NAMEMASK (0x0fff)
#define CE_STAGEMASK (0x3000)
#define CE_UPDATE (0x4000)
"Assume unchanged" git This adds "assume unchanged" logic, started by this message in the list discussion recently: <Pine.LNX.4.64.0601311807470.7301@g5.osdl.org> This is a workaround for filesystems that do not have lstat() that is quick enough for the index mechanism to take advantage of. On the paths marked as "assumed to be unchanged", the user needs to explicitly use update-index to register the object name to be in the next commit. You can use two new options to update-index to set and reset the CE_VALID bit: git-update-index --assume-unchanged path... git-update-index --no-assume-unchanged path... These forms manipulate only the CE_VALID bit; it does not change the object name recorded in the index file. Nor they add a new entry to the index. When the configuration variable "core.ignorestat = true" is set, the index entries are marked with CE_VALID bit automatically after: - update-index to explicitly register the current object name to the index file. - when update-index --refresh finds the path to be up-to-date. - when tools like read-tree -u and apply --index update the working tree file and register the current object name to the index file. The flag is dropped upon read-tree that does not check out the index entry. This happens regardless of the core.ignorestat settings. Index entries marked with CE_VALID bit are assumed to be unchanged most of the time. However, there are cases that CE_VALID bit is ignored for the sake of safety and usability: - while "git-read-tree -m" or git-apply need to make sure that the paths involved in the merge do not have local modifications. This sacrifices performance for safety. - when git-checkout-index -f -q -u -a tries to see if it needs to checkout the paths. Otherwise you can never check anything out ;-). - when git-update-index --really-refresh (a new flag) tries to see if the index entry is up to date. You can start with everything marked as CE_VALID and run this once to drop CE_VALID bit for paths that are modified. Most notably, "update-index --refresh" honours CE_VALID and does not actively stat, so after you modified a file in the working tree, update-index --refresh would not notice until you tell the index about it with "git-update-index path" or "git-update-index --no-assume-unchanged path". This version is not expected to be perfect. I think diff between index and/or tree and working files may need some adjustment, and there probably needs other cases we should automatically unmark paths that are marked to be CE_VALID. But the basics seem to work, and ready to be tested by people who asked for this feature. Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-02-09 06:15:24 +01:00
#define CE_VALID (0x8000)
#define CE_STAGESHIFT 12
#define create_ce_flags(len, stage) htons((len) | ((stage) << CE_STAGESHIFT))
#define ce_namelen(ce) (CE_NAMEMASK & ntohs((ce)->ce_flags))
#define ce_size(ce) cache_entry_size(ce_namelen(ce))
#define ce_stage(ce) ((CE_STAGEMASK & ntohs((ce)->ce_flags)) >> CE_STAGESHIFT)
#define ce_permissions(mode) (((mode) & 0100) ? 0755 : 0644)
static inline unsigned int create_ce_mode(unsigned int mode)
{
if (S_ISLNK(mode))
return htonl(S_IFLNK);
if (S_ISDIR(mode) || S_ISGITLINK(mode))
return htonl(S_IFGITLINK);
return htonl(S_IFREG | ce_permissions(mode));
}
static inline unsigned int ce_mode_from_stat(struct cache_entry *ce, unsigned int mode)
{
extern int trust_executable_bit, has_symlinks;
if (!has_symlinks && S_ISREG(mode) &&
ce && S_ISLNK(ntohl(ce->ce_mode)))
return ce->ce_mode;
if (!trust_executable_bit && S_ISREG(mode)) {
if (ce && S_ISREG(ntohl(ce->ce_mode)))
return ce->ce_mode;
return create_ce_mode(0666);
}
return create_ce_mode(mode);
}
#define canon_mode(mode) \
(S_ISREG(mode) ? (S_IFREG | ce_permissions(mode)) : \
S_ISLNK(mode) ? S_IFLNK : S_ISDIR(mode) ? S_IFDIR : S_IFGITLINK)
#define cache_entry_size(len) ((offsetof(struct cache_entry,name) + (len) + 8) & ~7)
struct index_state {
struct cache_entry **cache;
unsigned int cache_nr, cache_alloc, cache_changed;
struct cache_tree *cache_tree;
time_t timestamp;
void *mmap;
size_t mmap_size;
};
extern struct index_state the_index;
#ifndef NO_THE_INDEX_COMPATIBILITY_MACROS
#define active_cache (the_index.cache)
#define active_nr (the_index.cache_nr)
#define active_alloc (the_index.cache_alloc)
#define active_cache_changed (the_index.cache_changed)
#define active_cache_tree (the_index.cache_tree)
#define read_cache() read_index(&the_index)
#define read_cache_from(path) read_index_from(&the_index, (path))
#define write_cache(newfd, cache, entries) write_index(&the_index, (newfd))
#define discard_cache() discard_index(&the_index)
#define cache_name_pos(name, namelen) index_name_pos(&the_index,(name),(namelen))
#define add_cache_entry(ce, option) add_index_entry(&the_index, (ce), (option))
#define remove_cache_entry_at(pos) remove_index_entry_at(&the_index, (pos))
#define remove_file_from_cache(path) remove_file_from_index(&the_index, (path))
#define add_file_to_cache(path, verbose) add_file_to_index(&the_index, (path), (verbose))
#define refresh_cache(flags) refresh_index(&the_index, flags)
#define ce_match_stat(ce, st, really) ie_match_stat(&the_index, (ce), (st), (really))
#define ce_modified(ce, st, really) ie_modified(&the_index, (ce), (st), (really))
#endif
enum object_type {
OBJ_BAD = -1,
OBJ_NONE = 0,
OBJ_COMMIT = 1,
OBJ_TREE = 2,
OBJ_BLOB = 3,
OBJ_TAG = 4,
/* 5 for future expansion */
OBJ_OFS_DELTA = 6,
OBJ_REF_DELTA = 7,
OBJ_MAX,
};
#define GIT_DIR_ENVIRONMENT "GIT_DIR"
#define DEFAULT_GIT_DIR_ENVIRONMENT ".git"
Rename environment variables. H. Peter Anvin mentioned that using SHA1_whatever as an environment variable name is not nice and we should instead use names starting with "GIT_" prefix to avoid conflicts. Here is what this patch does: * Renames the following environment variables: New name Old Name GIT_AUTHOR_DATE AUTHOR_DATE GIT_AUTHOR_EMAIL AUTHOR_EMAIL GIT_AUTHOR_NAME AUTHOR_NAME GIT_COMMITTER_EMAIL COMMIT_AUTHOR_EMAIL GIT_COMMITTER_NAME COMMIT_AUTHOR_NAME GIT_ALTERNATE_OBJECT_DIRECTORIES SHA1_FILE_DIRECTORIES GIT_OBJECT_DIRECTORY SHA1_FILE_DIRECTORY * Introduces a compatibility macro, gitenv(), which does an getenv() and if it fails calls gitenv_bc(), which in turn picks up the value from old name while giving a warning about using an old name. * Changes all users of the environment variable to fetch environment variable with the new name using gitenv(). * Updates the documentation and scripts shipped with Linus GIT distribution. The transition plan is as follows: * We will keep the backward compatibility list used by gitenv() for now, so the current scripts and user environments continue to work as before. The users will get warnings when they have old name but not new name in their environment to the stderr. * The Porcelain layers should start using new names. However, just in case it ends up calling old Plumbing layer implementation, they should also export old names, taking values from the corresponding new names, during the transition period. * After a transition period, we would drop the compatibility support and drop gitenv(). Revert the callers to directly call getenv() but keep using the new names. The last part is probably optional and the transition duration needs to be set to a reasonable value. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-05-10 02:57:56 +02:00
#define DB_ENVIRONMENT "GIT_OBJECT_DIRECTORY"
#define INDEX_ENVIRONMENT "GIT_INDEX_FILE"
#define GRAFT_ENVIRONMENT "GIT_GRAFT_FILE"
#define TEMPLATE_DIR_ENVIRONMENT "GIT_TEMPLATE_DIR"
#define CONFIG_ENVIRONMENT "GIT_CONFIG"
#define CONFIG_LOCAL_ENVIRONMENT "GIT_CONFIG_LOCAL"
#define EXEC_PATH_ENVIRONMENT "GIT_EXEC_PATH"
Add basic infrastructure to assign attributes to paths This adds the basic infrastructure to assign attributes to paths, in a way similar to what the exclusion mechanism does based on $GIT_DIR/info/exclude and .gitignore files. An attribute is just a simple string that does not contain any whitespace. They can be specified in $GIT_DIR/info/attributes file, and .gitattributes file in each directory. Each line in these files defines a pattern matching rule. Similar to the exclusion mechanism, a later match overrides an earlier match in the same file, and entries from .gitattributes file in the same directory takes precedence over the ones from parent directories. Lines in $GIT_DIR/info/attributes file are used as the lowest precedence default rules. A line is either a comment (an empty line, or a line that begins with a '#'), or a rule, which is a whitespace separated list of tokens. The first token on the line is a shell glob pattern. The rest are names of attributes, each of which can optionally be prefixed with '!'. Such a line means "if a path matches this glob, this attribute is set (or unset -- if the attribute name is prefixed with '!'). For glob matching, the same "if the pattern does not have a slash in it, the basename of the path is matched with fnmatch(3) against the pattern, otherwise, the path is matched with the pattern with FNM_PATHNAME" rule as the exclusion mechanism is used. This does not define what an attribute means. Tying an attribute to various effects it has on git operation for paths that have it will be specified separately. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-04-12 10:07:32 +02:00
#define GITATTRIBUTES_FILE ".gitattributes"
#define INFOATTRIBUTES_FILE "info/attributes"
attribute macro support This adds "attribute macros" (for lack of better name). So far, we have low-level attributes such as crlf and diff, which are defined in operational terms --- setting or unsetting them on a particular path directly affects what is done to the path. For example, in order to decline diffs or crlf conversions on a binary blob, no diffs on PostScript files, and treat all other files normally, you would have something like these: * diff crlf *.ps !diff proprietary.o !diff !crlf That is fine as the operation goes, but gets unwieldy rather rapidly, when we start adding more low-level attributes that are defined in operational terms. A near-term example of such an attribute would be 'merge-3way' which would control if git should attempt the usual 3-way file-level merge internally, or leave merging to a specialized external program of user's choice. When it is added, we do _not_ want to force the users to update the above to: * diff crlf merge-3way *.ps !diff proprietary.o !diff !crlf !merge-3way The way this patch solves this issue is to realize that the attributes the user is assigning to paths are not defined in terms of operations but in terms of what they are. All of the three low-level attributes usually make sense for most of the files that sane SCM users have git operate on (these files are typically called "text'). Only a few cases, such as binary blob, need exception to decline the "usual treatment given to text files" -- and people mark them as "binary". So this allows the $GIT_DIR/info/alternates and .gitattributes at the toplevel of the project to also specify attributes that assigns other attributes. The syntax is '[attr]' followed by an attribute name followed by a list of attribute names: [attr] binary !diff !crlf !merge-3way When "binary" attribute is set to a path, if the path has not got diff/crlf/merge-3way attribute set or unset by other rules, this rule unsets the three low-level attributes. It is expected that the user level .gitattributes will be expressed mostly in terms of attributes based on what the files are, and the above sample would become like this: (built-in attribute configuration) [attr] binary !diff !crlf !merge-3way * diff crlf merge-3way (project specific .gitattributes) proprietary.o binary (user preference $GIT_DIR/info/attributes) *.ps !diff There are a few caveats. * As described above, you can define these macros only in $GIT_DIR/info/attributes and toplevel .gitattributes. * There is no attempt to detect circular definition of macro attributes, and definitions are evaluated from bottom to top as usual to fill in other attributes that have not yet got values. The following would work as expected: [attr] text diff crlf [attr] ps text !diff *.ps ps while this would most likely not (I haven't tried): [attr] ps text !diff [attr] text diff crlf *.ps ps * When a macro says "[attr] A B !C", saying that a path does not have attribute A does not let you tell anything about attributes B or C. That is, given this: [attr] text diff crlf [attr] ps text !diff *.txt !ps path hello.txt, which would match "*.txt" pattern, would have "ps" attribute set to zero, but that does not make text attribute of hello.txt set to false (nor diff attribute set to true). Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-04-14 17:54:37 +02:00
#define ATTRIBUTE_MACRO_PREFIX "[attr]"
extern int is_bare_repository_cfg;
extern int is_bare_repository(void);
extern int is_inside_git_dir(void);
extern const char *get_git_dir(void);
extern char *get_object_directory(void);
extern char *get_refs_directory(void);
extern char *get_index_file(void);
extern char *get_graft_file(void);
#define ALTERNATE_DB_ENVIRONMENT "GIT_ALTERNATE_OBJECT_DIRECTORIES"
extern const char **get_pathspec(const char *prefix, const char **pathspec);
extern const char *setup_git_directory_gently(int *);
extern const char *setup_git_directory(void);
extern const char *prefix_path(const char *prefix, int len, const char *path);
extern const char *prefix_filename(const char *prefix, int len, const char *path);
extern void verify_filename(const char *prefix, const char *name);
extern void verify_non_filename(const char *prefix, const char *name);
#define alloc_nr(x) (((x)+16)*3/2)
/*
* Realloc the buffer pointed at by variable 'x' so that it can hold
* at least 'nr' entries; the number of entries currently allocated
* is 'alloc', using the standard growing factor alloc_nr() macro.
*
* DO NOT USE any expression with side-effect for 'x' or 'alloc'.
*/
#define ALLOC_GROW(x, nr, alloc) \
do { \
if ((nr) >= alloc) { \
alloc = alloc_nr(alloc); \
x = xrealloc((x), alloc * sizeof(*(x))); \
} \
} while(0)
/* Initialize and use the cache information */
extern int read_index(struct index_state *);
extern int read_index_from(struct index_state *, const char *path);
extern int write_index(struct index_state *, int newfd);
extern int discard_index(struct index_state *);
extern int verify_path(const char *path);
extern int index_name_pos(struct index_state *, const char *name, int namelen);
#define ADD_CACHE_OK_TO_ADD 1 /* Ok to add */
#define ADD_CACHE_OK_TO_REPLACE 2 /* Ok to replace file/directory */
#define ADD_CACHE_SKIP_DFCHECK 4 /* Ok to skip DF conflict checks */
extern int add_index_entry(struct index_state *, struct cache_entry *ce, int option);
extern struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int really);
extern int remove_index_entry_at(struct index_state *, int pos);
extern int remove_file_from_index(struct index_state *, const char *path);
extern int add_file_to_index(struct index_state *, const char *path, int verbose);
extern int ce_same_name(struct cache_entry *a, struct cache_entry *b);
extern int ie_match_stat(struct index_state *, struct cache_entry *, struct stat *, int);
extern int ie_modified(struct index_state *, struct cache_entry *, struct stat *, int);
extern int ce_path_match(const struct cache_entry *ce, const char **pathspec);
extern int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, enum object_type type, const char *path);
extern int read_pipe(int fd, char** return_buf, unsigned long* return_size);
extern int index_pipe(unsigned char *sha1, int fd, const char *type, int write_object);
extern int index_path(unsigned char *sha1, const char *path, struct stat *st, int write_object);
extern void fill_stat_cache_info(struct cache_entry *ce, struct stat *st);
#define REFRESH_REALLY 0x0001 /* ignore_valid */
#define REFRESH_UNMERGED 0x0002 /* allow unmerged */
#define REFRESH_QUIET 0x0004 /* be quiet about it */
#define REFRESH_IGNORE_MISSING 0x0008 /* ignore non-existent */
extern int refresh_index(struct index_state *, unsigned int flags);
struct lock_file {
struct lock_file *next;
pid_t owner;
char on_list;
char filename[PATH_MAX];
};
extern int hold_lock_file_for_update(struct lock_file *, const char *path, int);
extern int commit_lock_file(struct lock_file *);
extern int hold_locked_index(struct lock_file *, int);
extern int commit_locked_index(struct lock_file *);
extern void set_alternate_index_output(const char *);
extern void rollback_lock_file(struct lock_file *);
extern int delete_ref(const char *, const unsigned char *sha1);
/* Environment bits from configuration mechanism */
extern int trust_executable_bit;
extern int has_symlinks;
"Assume unchanged" git This adds "assume unchanged" logic, started by this message in the list discussion recently: <Pine.LNX.4.64.0601311807470.7301@g5.osdl.org> This is a workaround for filesystems that do not have lstat() that is quick enough for the index mechanism to take advantage of. On the paths marked as "assumed to be unchanged", the user needs to explicitly use update-index to register the object name to be in the next commit. You can use two new options to update-index to set and reset the CE_VALID bit: git-update-index --assume-unchanged path... git-update-index --no-assume-unchanged path... These forms manipulate only the CE_VALID bit; it does not change the object name recorded in the index file. Nor they add a new entry to the index. When the configuration variable "core.ignorestat = true" is set, the index entries are marked with CE_VALID bit automatically after: - update-index to explicitly register the current object name to the index file. - when update-index --refresh finds the path to be up-to-date. - when tools like read-tree -u and apply --index update the working tree file and register the current object name to the index file. The flag is dropped upon read-tree that does not check out the index entry. This happens regardless of the core.ignorestat settings. Index entries marked with CE_VALID bit are assumed to be unchanged most of the time. However, there are cases that CE_VALID bit is ignored for the sake of safety and usability: - while "git-read-tree -m" or git-apply need to make sure that the paths involved in the merge do not have local modifications. This sacrifices performance for safety. - when git-checkout-index -f -q -u -a tries to see if it needs to checkout the paths. Otherwise you can never check anything out ;-). - when git-update-index --really-refresh (a new flag) tries to see if the index entry is up to date. You can start with everything marked as CE_VALID and run this once to drop CE_VALID bit for paths that are modified. Most notably, "update-index --refresh" honours CE_VALID and does not actively stat, so after you modified a file in the working tree, update-index --refresh would not notice until you tell the index about it with "git-update-index path" or "git-update-index --no-assume-unchanged path". This version is not expected to be perfect. I think diff between index and/or tree and working files may need some adjustment, and there probably needs other cases we should automatically unmark paths that are marked to be CE_VALID. But the basics seem to work, and ready to be tested by people who asked for this feature. Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-02-09 06:15:24 +01:00
extern int assume_unchanged;
extern int prefer_symlink_refs;
extern int log_all_ref_updates;
extern int warn_ambiguous_refs;
extern int shared_repository;
extern const char *apply_default_whitespace;
extern int zlib_compression_level;
Custom compression levels for objects and packs Add config variables pack.compression and core.loosecompression , and switch --compression=level to pack-objects. Loose objects will be compressed using core.loosecompression if set, else core.compression if set, else Z_BEST_SPEED. Packed objects will be compressed using --compression=level if seen, else pack.compression if set, else core.compression if set, else Z_DEFAULT_COMPRESSION. This is the "pack compression level". Loose objects added to a pack undeltified will be recompressed to the pack compression level if it is unequal to the current loose compression level by the preceding rules, or if the loose object was written while core.legacyheaders = true. Newly deltified loose objects are always compressed to the current pack compression level. Previously packed objects added to a pack are recompressed to the current pack compression level exactly when their deltification status changes, since the previous pack data cannot be reused. In either case, the --no-reuse-object switch from the first patch below will always force recompression to the current pack compression level, instead of assuming the pack compression level hasn't changed and pack data can be reused when possible. This applies on top of the following patches from Nicolas Pitre: [PATCH] allow for undeltified objects not to be reused [PATCH] make "repack -f" imply "pack-objects --no-reuse-object" Signed-off-by: Dana L. How <danahow@gmail.com> Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-05-09 22:56:50 +02:00
extern int core_compression_level;
extern int core_compression_seen;
Fully activate the sliding window pack access. This finally turns on the sliding window behavior for packfile data access by mapping limited size windows and chaining them under the packed_git->windows list. We consider a given byte offset to be within the window only if there would be at least 20 bytes (one hash worth of data) accessible after the requested offset. This range selection relates to the contract that use_pack() makes with its callers, allowing them to access one hash or one object header without needing to call use_pack() for every byte of data obtained. In the worst case scenario we will map the same page of data twice into memory: once at the end of one window and once again at the start of the next window. This duplicate page mapping will happen only when an object header or a delta base reference is spanned over the end of a window and is always limited to just one page of duplication, as no sane operating system will ever have a page size smaller than a hash. I am assuming that the possible wasted page of virtual address space is going to perform faster than the alternatives, which would be to copy the object header or ref delta into a temporary buffer prior to parsing, or to check the window range on every byte during header parsing. We may decide to revisit this decision in the future since this is just a gut instinct decision and has not actually been proven out by experimental testing. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:28 +01:00
extern size_t packed_git_window_size;
extern size_t packed_git_limit;
extern size_t delta_base_cache_limit;
Lazy man's auto-CRLF It currently does NOT know about file attributes, so it does its conversion purely based on content. Maybe that is more in the "git philosophy" anyway, since content is king, but I think we should try to do the file attributes to turn it off on demand. Anyway, BY DEFAULT it is off regardless, because it requires a [core] AutoCRLF = true in your config file to be enabled. We could make that the default for Windows, of course, the same way we do some other things (filemode etc). But you can actually enable it on UNIX, and it will cause: - "git update-index" will write blobs without CRLF - "git diff" will diff working tree files without CRLF - "git checkout" will write files to the working tree _with_ CRLF and things work fine. Funnily, it actually shows an odd file in git itself: git clone -n git test-crlf cd test-crlf git config core.autocrlf true git checkout git diff shows a diff for "Documentation/docbook-xsl.css". Why? Because we have actually checked in that file *with* CRLF! So when "core.autocrlf" is true, we'll always generate a *different* hash for it in the index, because the index hash will be for the content _without_ CRLF. Is this complete? I dunno. It seems to work for me. It doesn't use the filename at all right now, and that's probably a deficiency (we could certainly make the "is_binary()" heuristics also take standard filename heuristics into account). I don't pass in the filename at all for the "index_fd()" case (git-update-index), so that would need to be passed around, but this actually works fine. NOTE NOTE NOTE! The "is_binary()" heuristics are totally made-up by yours truly. I will not guarantee that they work at all reasonable. Caveat emptor. But it _is_ simple, and it _is_ safe, since it's all off by default. The patch is pretty simple - the biggest part is the new "convert.c" file, but even that is really just basic stuff that anybody can write in "Teaching C 101" as a final project for their first class in programming. Not to say that it's bug-free, of course - but at least we're not talking about rocket surgery here. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-02-13 20:07:23 +01:00
extern int auto_crlf;
#define GIT_REPO_VERSION 0
extern int repository_format_version;
extern int check_repository_format(void);
#define MTIME_CHANGED 0x0001
#define CTIME_CHANGED 0x0002
#define OWNER_CHANGED 0x0004
#define MODE_CHANGED 0x0008
#define INODE_CHANGED 0x0010
#define DATA_CHANGED 0x0020
#define TYPE_CHANGED 0x0040
/* Return a statically allocated filename matching the sha1 signature */
extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
extern char *git_path(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
extern char *sha1_file_name(const unsigned char *sha1);
extern char *sha1_pack_name(const unsigned char *sha1);
extern char *sha1_pack_index_name(const unsigned char *sha1);
extern const char *find_unique_abbrev(const unsigned char *sha1, int);
extern const unsigned char null_sha1[20];
static inline int is_null_sha1(const unsigned char *sha1)
{
return !memcmp(sha1, null_sha1, 20);
}
static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2)
{
return memcmp(sha1, sha2, 20);
}
static inline void hashcpy(unsigned char *sha_dst, const unsigned char *sha_src)
{
memcpy(sha_dst, sha_src, 20);
}
static inline void hashclr(unsigned char *hash)
{
memset(hash, 0, 20);
}
int git_mkstemp(char *path, size_t n, const char *template);
enum sharedrepo {
PERM_UMASK = 0,
PERM_GROUP,
PERM_EVERYBODY
};
int git_config_perm(const char *var, const char *value);
int adjust_shared_perm(const char *path);
int safe_create_leading_directories(char *path);
char *enter_repo(char *path, int strict);
/* Read and unpack a sha1 file into memory, write memory to a sha1 file */
extern int sha1_object_info(const unsigned char *, unsigned long *);
extern void * read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size);
extern int hash_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1);
extern int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *return_sha1);
extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *);
extern int check_sha1_signature(const unsigned char *sha1, void *buf, unsigned long size, const char *type);
extern int write_sha1_from_fd(const unsigned char *sha1, int fd, char *buffer,
size_t bufsize, size_t *bufposn);
extern int write_sha1_to_fd(int fd, const unsigned char *sha1);
extern int move_temp_to_file(const char *tmpfile, const char *filename);
extern int has_sha1_pack(const unsigned char *sha1, const char **ignore);
extern int has_sha1_file(const unsigned char *sha1);
extern void *map_sha1_file(const unsigned char *sha1, unsigned long *);
extern int has_pack_file(const unsigned char *sha1);
extern int has_pack_index(const unsigned char *sha1);
extern const signed char hexval_table[256];
static inline unsigned int hexval(unsigned char c)
{
return hexval_table[c];
}
/* Convert to/from hex/sha1 representation */
#define MINIMUM_ABBREV 4
#define DEFAULT_ABBREV 7
extern int get_sha1(const char *str, unsigned char *sha1);
extern int get_sha1_with_mode(const char *str, unsigned char *sha1, unsigned *mode);
extern int get_sha1_hex(const char *hex, unsigned char *sha1);
extern char *sha1_to_hex(const unsigned char *sha1); /* static buffer result! */
extern int read_ref(const char *filename, unsigned char *sha1);
extern const char *resolve_ref(const char *path, unsigned char *sha1, int, int *);
extern int dwim_ref(const char *str, int len, unsigned char *sha1, char **ref);
extern int dwim_log(const char *str, int len, unsigned char *sha1, char **ref);
extern int create_symref(const char *ref, const char *refs_heads_master, const char *logmsg);
extern int validate_headref(const char *ref);
extern int base_name_compare(const char *name1, int len1, int mode1, const char *name2, int len2, int mode2);
extern int cache_name_compare(const char *name1, int len1, const char *name2, int len2);
extern void *read_object_with_reference(const unsigned char *sha1,
const char *required_type,
unsigned long *size,
unsigned char *sha1_ret);
enum date_mode { DATE_NORMAL = 0, DATE_RELATIVE, DATE_SHORT, DATE_LOCAL };
const char *show_date(unsigned long time, int timezone, enum date_mode mode);
const char *show_rfc2822_date(unsigned long time, int timezone);
int parse_date(const char *date, char *buf, int bufsize);
void datestamp(char *buf, int bufsize);
git's rev-parse.c function show_datestring presumes gnu date Ok. This is the insane patch to do this. It really isn't very careful, and the reason I call it "approxidate()" will become obvious when you look at the code. It is very liberal in what it accepts, to the point where sometimes the results may not make a whole lot of sense. It accepts "last week" as a date string, by virtue of "last" parsing as the number 1, and it totally ignoring superfluous fluff like "ago", so "last week" ends up being exactly the same thing as "1 week ago". Fine so far. It has strange side effects: "last december" will actually parse as "Dec 1", which actually _does_ turn out right, because it will then notice that it's not December yet, so it will decide that you must be talking about a date last year. So it actually gets it right, but it's kind of for the "wrong" reasons. It also accepts the numbers 1..10 in string format ("one" .. "ten"), so you can do "ten weeks ago" or "ten hours ago" and it will do the right thing. But it will do some really strange thigns too: the string "this will last forever", will not recognize anyting but "last", which is recognized as "1", which since it doesn't understand anything else it will think is the day of the month. So if you do gitk --since="this will last forever" the date will actually parse as the first day of the current month. And it will parse the string "now" as "now", but only because it doesn't understand it at all, and it makes everything relative to "now". Similarly, it doesn't actually parse the "ago" or "from now", so "2 weeks ago" is exactly the same as "2 weeks from now". It's the current date minus 14 days. But hey, it's probably better (and certainly faster) than depending on GNU date. So now you can portably do things like gitk --since="two weeks and three days ago" git log --since="July 5" git-whatchanged --since="10 hours ago" git log --since="last october" and it will actually do exactly what you thought it would do (I think). It will count 17 days backwards, and it will do so even if you don't have GNU date installed. (I don't do "last monday" or similar yet, but I can extend it to that too if people want). It was kind of fun trying to write code that uses such totally relaxed "understanding" of dates yet tries to get it right for the trivial cases. The result should be mixed with a few strange preprocessor tricks, and be submitted for the IOCCC ;) Feel free to try it out, and see how many strange dates it gets right. Or wrong. And if you find some interesting (and valid - not "interesting" as in "strange", but "interesting" as in "I'd be interested in actually doing this) thing it gets wrong - usually by not understanding it and silently just doing some strange things - please holler. Now, as usual this certainly hasn't been getting a lot of testing. But my code always works, no? Linus Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-11-15 04:29:06 +01:00
unsigned long approxidate(const char *);
extern const char *git_author_info(int);
extern const char *git_committer_info(int);
extern const char *fmt_ident(const char *name, const char *email, const char *date_str, int);
struct checkout {
const char *base_dir;
int base_dir_len;
unsigned force:1,
quiet:1,
not_new:1,
refresh_cache:1;
};
extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath);
Add has_symlink_leading_path() function. When we are applying a patch that creates a blob at a path, or when we are switching from a branch that does not have a blob at the path to another branch that has one, we need to make sure that there is nothing at the path in the working tree, as such a file is a local modification made by the user that would be lost by the operation. Normally, lstat() on the path and making sure ENOENT is returned is good enough for that purpose. However there is a twist. We may be creating a regular file arch/x86_64/boot/Makefile, while removing an existing symbolic link at arch/x86_64/boot that points at existing ../i386/boot directory that has Makefile in it. We always first check without touching filesystem and then perform the actual operation, so when we verify the new file, arch/x86_64/boot/Makefile, does not exist, we haven't removed the symbolic link arc/x86_64/boot symbolic link yet. lstat() on the file sees through the symbolic link and reports the file is there, which is not what we want. The function has_symlink_leading_path() function takes a path, and sees if any of the leading directory component is a symbolic link. When files in a new directory are created, we tend to process them together because both index and tree are sorted. The function takes advantage of this and allows the caller to cache and reuse which symbolic link on the filesystem caused the function to return true. The calling sequence would be: char last_symlink[PATH_MAX]; *last_symlink = '\0'; for each index entry { if (!lose) continue; if (lstat(it)) if (errno == ENOENT) ; /* happy */ else error; else if (has_symlink_leading_path(it, last_symlink)) ; /* happy */ else error; /* would lose local changes */ unlink_entry(it, last_symlink); } Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-05-12 07:11:07 +02:00
extern int has_symlink_leading_path(const char *name, char *last_symlink);
extern struct alternate_object_database {
struct alternate_object_database *next;
char *name;
char base[FLEX_ARRAY]; /* more */
} *alt_odb_list;
extern void prepare_alt_odb(void);
struct pack_window {
struct pack_window *next;
unsigned char *base;
off_t offset;
size_t len;
unsigned int last_used;
unsigned int inuse_cnt;
};
extern struct packed_git {
struct packed_git *next;
struct pack_window *windows;
off_t pack_size;
const void *index_data;
size_t index_size;
uint32_t num_objects;
int index_version;
time_t mtime;
int pack_fd;
int pack_local;
unsigned char sha1[20];
/* something like ".git/objects/pack/xxxxx.pack" */
char pack_name[FLEX_ARRAY]; /* more */
} *packed_git;
struct pack_entry {
off_t offset;
unsigned char sha1[20];
struct packed_git *p;
};
struct ref {
struct ref *next;
unsigned char old_sha1[20];
unsigned char new_sha1[20];
unsigned char force;
Renaming push. This allows git-send-pack to push local refs to a destination repository under different names. Here is the name mapping rules for refs. * If there is no ref mapping on the command line: - if '--all' is specified, it is equivalent to specifying <local> ":" <local> for all the existing local refs on the command line - otherwise, it is equivalent to specifying <ref> ":" <ref> for all the refs that exist on both sides. * <name> is just a shorthand for <name> ":" <name> * <src> ":" <dst> push ref that matches <src> to ref that matches <dst>. - It is an error if <src> does not match exactly one of local refs. - It is an error if <dst> matches more than one remote refs. - If <dst> does not match any remote refs, either - it has to start with "refs/"; <dst> is used as the destination literally in this case. - <src> == <dst> and the ref that matched the <src> must not exist in the set of remote refs; the ref matched <src> locally is used as the name of the destination. For example, - "git-send-pack --all <remote>" works exactly as before; - "git-send-pack <remote> master:upstream" pushes local master to remote ref that matches "upstream". If there is no such ref, it is an error. - "git-send-pack <remote> master:refs/heads/upstream" pushes local master to remote refs/heads/upstream, even when refs/heads/upstream does not exist. - "git-send-pack <remote> master" into an empty remote repository pushes the local ref/heads/master to the remote ref/heads/master. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-08-04 01:35:29 +02:00
struct ref *peer_ref; /* when renaming */
char name[FLEX_ARRAY]; /* more */
};
#define REF_NORMAL (1u << 0)
#define REF_HEADS (1u << 1)
#define REF_TAGS (1u << 2)
#define CONNECT_VERBOSE (1u << 0)
extern pid_t git_connect(int fd[2], char *url, const char *prog, int flags);
extern int finish_connect(pid_t pid);
extern int path_match(const char *path, int nr, char **match);
extern int get_ack(int fd, unsigned char *result_sha1);
extern struct ref **get_remote_heads(int in, struct ref **list, int nr_match, char **match, unsigned int flags);
extern int server_supports(const char *feature);
extern struct packed_git *parse_pack_index(unsigned char *sha1);
extern struct packed_git *parse_pack_index_file(const unsigned char *sha1,
const char *idx_path);
extern void prepare_packed_git(void);
extern void reprepare_packed_git(void);
extern void install_packed_git(struct packed_git *pack);
extern struct packed_git *find_sha1_pack(const unsigned char *sha1,
struct packed_git *packs);
extern void pack_report(void);
extern int open_pack_index(struct packed_git *);
extern unsigned char* use_pack(struct packed_git *, struct pack_window **, off_t, unsigned int *);
Replace use_packed_git with window cursors. Part of the implementation concept of the sliding mmap window for pack access is to permit multiple windows per pack to be mapped independently. Since the inuse_cnt is associated with the mmap and not with the file, this value is in struct pack_window and needs to be incremented/decremented for each pack_window accessed by any code. To faciliate that implementation we need to replace all uses of use_packed_git() and unuse_packed_git() with a different API that follows struct pack_window objects rather than struct packed_git. The way this works is when we need to start accessing a pack for the first time we should setup a new window 'cursor' by declaring a local and setting it to NULL: struct pack_windows *w_curs = NULL; To obtain the memory region which contains a specific section of the pack file we invoke use_pack(), supplying the address of our current window cursor: unsigned int len; unsigned char *addr = use_pack(p, &w_curs, offset, &len); the returned address `addr` will be the first byte at `offset` within the pack file. The optional variable len will also be updated with the number of bytes remaining following the address. Multiple calls to use_pack() with the same window cursor will update the window cursor, moving it from one window to another when necessary. In this way each window cursor variable maintains only one struct pack_window inuse at a time. Finally before exiting the scope which originally declared the window cursor we must invoke unuse_pack() to unuse the current window (which may be different from the one that was first obtained from use_pack): unuse_pack(&w_curs); This implementation is still not complete with regards to multiple windows, as only one window per pack file is supported right now. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:08 +01:00
extern void unuse_pack(struct pack_window **);
extern struct packed_git *add_packed_git(const char *, int, int);
extern const unsigned char *nth_packed_object_sha1(struct packed_git *, uint32_t);
extern off_t find_pack_entry_one(const unsigned char *, struct packed_git *);
extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *);
extern unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
extern const char *packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *);
/* Dumb servers support */
extern int update_server_info(int);
typedef int (*config_fn_t)(const char *, const char *);
extern int git_default_config(const char *, const char *);
extern int git_config_from_file(config_fn_t fn, const char *);
extern int git_config(config_fn_t fn);
extern int git_config_int(const char *, const char *);
extern int git_config_bool(const char *, const char *);
extern int git_config_set(const char *, const char *);
extern int git_config_set_multivar(const char *, const char *, const char *, int);
extern int git_config_rename_section(const char *, const char *);
extern int check_repository_format_version(const char *var, const char *value);
#define MAX_GITNAME (1000)
extern char git_default_email[MAX_GITNAME];
extern char git_default_name[MAX_GITNAME];
extern const char *git_commit_encoding;
extern const char *git_log_output_encoding;
extern int copy_fd(int ifd, int ofd);
extern int read_in_full(int fd, void *buf, size_t count);
extern int write_in_full(int fd, const void *buf, size_t count);
extern void write_or_die(int fd, const void *buf, size_t count);
extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg);
extern int write_or_whine_pipe(int fd, const void *buf, size_t count, const char *msg);
/* pager.c */
extern void setup_pager(void);
extern int pager_in_use;
extern int pager_use_color;
binary patch. This adds "binary patch" to the diff output and teaches apply what to do with them. On the diff generation side, traditionally, we said "Binary files differ\n" without giving anything other than the preimage and postimage object name on the index line. This was good enough for applying a patch generated from your own repository (very useful while rebasing), because the postimage would be available in such a case. However, this was not useful when the recipient of such a patch via e-mail were to apply it, even if the preimage was available. This patch allows the diff to generate "binary" patch when operating under --full-index option. The binary patch follows the usual extended git diff headers, and looks like this: "GIT binary patch\n" <length byte><data>"\n" ... "\n" Each line is prefixed with a "length-byte", whose value is upper or lowercase alphabet that encodes number of bytes that the data on the line decodes to (1..52 -- 'A' means 1, 'B' means 2, ..., 'Z' means 26, 'a' means 27, ...). <data> is 1 or more groups of 5-byte sequence, each of which encodes up to 4 bytes in base85 encoding. Because 52 / 4 * 5 = 65 and we have the length byte, an output line is capped to 66 characters. The payload is the same diff-delta as we use in the packfiles. On the consumption side, git-apply now can decode and apply the binary patch when --allow-binary-replacement is given, the diff was generated with --full-index, and the receiving repository has the preimage blob, which is the same condition as it always required when accepting an "Binary files differ\n" patch. Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-05-05 01:51:44 +02:00
/* base85 */
int decode_85(char *dst, const char *line, int linelen);
void encode_85(char *buf, const unsigned char *data, int bytes);
binary patch. This adds "binary patch" to the diff output and teaches apply what to do with them. On the diff generation side, traditionally, we said "Binary files differ\n" without giving anything other than the preimage and postimage object name on the index line. This was good enough for applying a patch generated from your own repository (very useful while rebasing), because the postimage would be available in such a case. However, this was not useful when the recipient of such a patch via e-mail were to apply it, even if the preimage was available. This patch allows the diff to generate "binary" patch when operating under --full-index option. The binary patch follows the usual extended git diff headers, and looks like this: "GIT binary patch\n" <length byte><data>"\n" ... "\n" Each line is prefixed with a "length-byte", whose value is upper or lowercase alphabet that encodes number of bytes that the data on the line decodes to (1..52 -- 'A' means 1, 'B' means 2, ..., 'Z' means 26, 'a' means 27, ...). <data> is 1 or more groups of 5-byte sequence, each of which encodes up to 4 bytes in base85 encoding. Because 52 / 4 * 5 = 65 and we have the length byte, an output line is capped to 66 characters. The payload is the same diff-delta as we use in the packfiles. On the consumption side, git-apply now can decode and apply the binary patch when --allow-binary-replacement is given, the diff was generated with --full-index, and the receiving repository has the preimage blob, which is the same condition as it always required when accepting an "Binary files differ\n" patch. Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-05-05 01:51:44 +02:00
Add specialized object allocator This creates a simple specialized object allocator for basic objects. This avoids wasting space with malloc overhead (metadata and extra alignment), since the specialized allocator knows the alignment, and that objects, once allocated, are never freed. It also allows us to track some basic statistics about object allocations. For example, for the mozilla import, it shows object usage as follows: blobs: 627629 (14710 kB) trees: 1119035 (34969 kB) commits: 196423 (8440 kB) tags: 1336 (46 kB) and the simpler allocator shaves off about 2.5% off the memory footprint off a "git-rev-list --all --objects", and is a bit faster too. [ Side note: this concludes the series of "save memory in object storage". The thing is, there simply isn't much more to be saved on the objects. Doing "git-rev-list --all --objects" on the mozilla archive has a final total RSS of 131498 pages for me: that's about 513MB. Of that, the object overhead is now just 56MB, the rest is going somewhere else (put another way: the fact that this patch shaves off 2.5% of the total memory overhead, considering that objects are now not much more than 10% of the total shows how big the wasted space really was: this makes object allocations much more memory- and time-efficient). I haven't looked at where the rest is, but I suspect the bulk of it is just the pack-file loading. It may be that we should pack the tree objects separately from the blob objects: for git-rev-list --objects, we don't actually ever need to even look at the blobs, but since trees and blobs are interspersed in the pack-file, we end up not being dense in the tree accesses, so we end up looking at more pages than we strictly need to. So with a 535MB pack-file, it's entirely possible - even likely - that most of the remaining RSS is just the mmap of the pack-file itself. We don't need to map in _all_ of it, but we do end up mapping a fair amount. ] Signed-off-by: Linus Torvalds <torvalds@osdl.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-19 19:44:15 +02:00
/* alloc.c */
extern void *alloc_blob_node(void);
extern void *alloc_tree_node(void);
extern void *alloc_commit_node(void);
extern void *alloc_tag_node(void);
extern void *alloc_object_node(void);
Add specialized object allocator This creates a simple specialized object allocator for basic objects. This avoids wasting space with malloc overhead (metadata and extra alignment), since the specialized allocator knows the alignment, and that objects, once allocated, are never freed. It also allows us to track some basic statistics about object allocations. For example, for the mozilla import, it shows object usage as follows: blobs: 627629 (14710 kB) trees: 1119035 (34969 kB) commits: 196423 (8440 kB) tags: 1336 (46 kB) and the simpler allocator shaves off about 2.5% off the memory footprint off a "git-rev-list --all --objects", and is a bit faster too. [ Side note: this concludes the series of "save memory in object storage". The thing is, there simply isn't much more to be saved on the objects. Doing "git-rev-list --all --objects" on the mozilla archive has a final total RSS of 131498 pages for me: that's about 513MB. Of that, the object overhead is now just 56MB, the rest is going somewhere else (put another way: the fact that this patch shaves off 2.5% of the total memory overhead, considering that objects are now not much more than 10% of the total shows how big the wasted space really was: this makes object allocations much more memory- and time-efficient). I haven't looked at where the rest is, but I suspect the bulk of it is just the pack-file loading. It may be that we should pack the tree objects separately from the blob objects: for git-rev-list --objects, we don't actually ever need to even look at the blobs, but since trees and blobs are interspersed in the pack-file, we end up not being dense in the tree accesses, so we end up looking at more pages than we strictly need to. So with a 535MB pack-file, it's entirely possible - even likely - that most of the remaining RSS is just the mmap of the pack-file itself. We don't need to map in _all_ of it, but we do end up mapping a fair amount. ] Signed-off-by: Linus Torvalds <torvalds@osdl.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-19 19:44:15 +02:00
extern void alloc_report(void);
/* trace.c */
extern int nfasprintf(char **str, const char *fmt, ...);
extern int nfvasprintf(char **str, const char *fmt, va_list va);
extern void trace_printf(const char *format, ...);
extern void trace_argv_printf(const char **argv, int count, const char *format, ...);
Lazy man's auto-CRLF It currently does NOT know about file attributes, so it does its conversion purely based on content. Maybe that is more in the "git philosophy" anyway, since content is king, but I think we should try to do the file attributes to turn it off on demand. Anyway, BY DEFAULT it is off regardless, because it requires a [core] AutoCRLF = true in your config file to be enabled. We could make that the default for Windows, of course, the same way we do some other things (filemode etc). But you can actually enable it on UNIX, and it will cause: - "git update-index" will write blobs without CRLF - "git diff" will diff working tree files without CRLF - "git checkout" will write files to the working tree _with_ CRLF and things work fine. Funnily, it actually shows an odd file in git itself: git clone -n git test-crlf cd test-crlf git config core.autocrlf true git checkout git diff shows a diff for "Documentation/docbook-xsl.css". Why? Because we have actually checked in that file *with* CRLF! So when "core.autocrlf" is true, we'll always generate a *different* hash for it in the index, because the index hash will be for the content _without_ CRLF. Is this complete? I dunno. It seems to work for me. It doesn't use the filename at all right now, and that's probably a deficiency (we could certainly make the "is_binary()" heuristics also take standard filename heuristics into account). I don't pass in the filename at all for the "index_fd()" case (git-update-index), so that would need to be passed around, but this actually works fine. NOTE NOTE NOTE! The "is_binary()" heuristics are totally made-up by yours truly. I will not guarantee that they work at all reasonable. Caveat emptor. But it _is_ simple, and it _is_ safe, since it's all off by default. The patch is pretty simple - the biggest part is the new "convert.c" file, but even that is really just basic stuff that anybody can write in "Teaching C 101" as a final project for their first class in programming. Not to say that it's bug-free, of course - but at least we're not talking about rocket surgery here. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-02-13 20:07:23 +01:00
/* convert.c */
extern char *convert_to_git(const char *path, const char *src, unsigned long *sizep);
extern char *convert_to_working_tree(const char *path, const char *src, unsigned long *sizep);
extern void *convert_sha1_file(const char *path, const unsigned char *sha1, unsigned int mode, enum object_type *type, unsigned long *size);
Lazy man's auto-CRLF It currently does NOT know about file attributes, so it does its conversion purely based on content. Maybe that is more in the "git philosophy" anyway, since content is king, but I think we should try to do the file attributes to turn it off on demand. Anyway, BY DEFAULT it is off regardless, because it requires a [core] AutoCRLF = true in your config file to be enabled. We could make that the default for Windows, of course, the same way we do some other things (filemode etc). But you can actually enable it on UNIX, and it will cause: - "git update-index" will write blobs without CRLF - "git diff" will diff working tree files without CRLF - "git checkout" will write files to the working tree _with_ CRLF and things work fine. Funnily, it actually shows an odd file in git itself: git clone -n git test-crlf cd test-crlf git config core.autocrlf true git checkout git diff shows a diff for "Documentation/docbook-xsl.css". Why? Because we have actually checked in that file *with* CRLF! So when "core.autocrlf" is true, we'll always generate a *different* hash for it in the index, because the index hash will be for the content _without_ CRLF. Is this complete? I dunno. It seems to work for me. It doesn't use the filename at all right now, and that's probably a deficiency (we could certainly make the "is_binary()" heuristics also take standard filename heuristics into account). I don't pass in the filename at all for the "index_fd()" case (git-update-index), so that would need to be passed around, but this actually works fine. NOTE NOTE NOTE! The "is_binary()" heuristics are totally made-up by yours truly. I will not guarantee that they work at all reasonable. Caveat emptor. But it _is_ simple, and it _is_ safe, since it's all off by default. The patch is pretty simple - the biggest part is the new "convert.c" file, but even that is really just basic stuff that anybody can write in "Teaching C 101" as a final project for their first class in programming. Not to say that it's bug-free, of course - but at least we're not talking about rocket surgery here. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-02-13 20:07:23 +01:00
/* match-trees.c */
void shift_tree(const unsigned char *, const unsigned char *, unsigned char *, int);
#endif /* CACHE_H */