2005-04-08 00:13:13 +02:00
|
|
|
#ifndef CACHE_H
|
|
|
|
#define CACHE_H
|
|
|
|
|
2005-12-05 20:54:29 +01:00
|
|
|
#include "git-compat-util.h"
|
Rewrite convert_to_{git,working_tree} to use strbuf's.
* Now, those functions take an "out" strbuf argument, where they store their
result if any. In that case, it also returns 1, else it returns 0.
* those functions support "in place" editing, in the sense that it's OK to
call them this way:
convert_to_git(path, sb->buf, sb->len, sb);
When doable, conversions are done in place for real, else the strbuf
content is just replaced with the new one, transparentely for the caller.
If you want to create a new filter working this way, being the accumulation
of filter1, filter2, ... filtern, then your meta_filter would be:
int meta_filter(..., const char *src, size_t len, struct strbuf *sb)
{
int ret = 0;
ret |= filter1(...., src, len, sb);
if (ret) {
src = sb->buf;
len = sb->len;
}
ret |= filter2(...., src, len, sb);
if (ret) {
src = sb->buf;
len = sb->len;
}
....
return ret | filtern(..., src, len, sb);
}
That's why subfilters the convert_to_* functions called were also rewritten
to work this way.
Signed-off-by: Pierre Habouzit <madcoder@debian.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-09-16 15:51:04 +02:00
|
|
|
#include "strbuf.h"
|
Create pathname-based hash-table lookup into index
This creates a hash index of every single file added to the index.
Right now that hash index isn't actually used for much: I implemented a
"cache_name_exists()" function that uses it to efficiently look up a
filename in the index without having to do the O(logn) binary search,
but quite frankly, that's not why this patch is interesting.
No, the whole and only reason to create the hash of the filenames in the
index is that by modifying the hash function, you can fairly easily do
things like making it always hash equivalent names into the same bucket.
That, in turn, means that suddenly questions like "does this name exist
in the index under an _equivalent_ name?" becomes much much cheaper.
Guiding principles behind this patch:
- it shouldn't be too costly. In fact, my primary goal here was to
actually speed up "git commit" with a fully populated kernel tree, by
being faster at checking whether a file already existed in the index. I
did succeed, but only barely:
Best before:
[torvalds@woody linux]$ time git commit > /dev/null
real 0m0.255s
user 0m0.168s
sys 0m0.088s
Best after:
[torvalds@woody linux]$ time ~/git/git commit > /dev/null
real 0m0.233s
user 0m0.144s
sys 0m0.088s
so some things are actually faster (~8%).
Caveat: that's really the best case. Other things are invariably going
to be slightly slower, since we populate that index cache, and quite
frankly, few things really use it to look things up.
That said, the cost is really quite small. The worst case is probably
doing a "git ls-files", which will do very little except puopulate the
index, and never actually looks anything up in it, just lists it.
Before:
[torvalds@woody linux]$ time git ls-files > /dev/null
real 0m0.016s
user 0m0.016s
sys 0m0.000s
After:
[torvalds@woody linux]$ time ~/git/git ls-files > /dev/null
real 0m0.021s
user 0m0.012s
sys 0m0.008s
and while the thing has really gotten relatively much slower, we're
still talking about something almost unmeasurable (eg 5ms). And that
really should be pretty much the worst case.
So we lose 5ms on one "benchmark", but win 22ms on another. Pick your
poison - this patch has the advantage that it will _likely_ speed up
the cases that are complex and expensive more than it slows down the
cases that are already so fast that nobody cares. But if you look at
relative speedups/slowdowns, it doesn't look so good.
- It should be simple and clean
The code may be a bit subtle (the reasons I do hash removal the way I
do etc), but it re-uses the existing hash.c files, so it really is
fairly small and straightforward apart from a few odd details.
Now, this patch on its own doesn't really do much, but I think it's worth
looking at, if only because if done correctly, the name hashing really can
make an improvement to the whole issue of "do we have a filename that
looks like this in the index already". And at least it gets real testing
by being used even by default (ie there is a real use-case for it even
without any insane filesystems).
NOTE NOTE NOTE! The current hash is a joke. I'm ashamed of it, I'm just
not ashamed of it enough to really care. I took all the numbers out of my
nether regions - I'm sure it's good enough that it works in practice, but
the whole point was that you can make a really much fancier hash that
hashes characters not directly, but by their upper-case value or something
like that, and thus you get a case-insensitive hash, while still keeping
the name and the index itself totally case sensitive.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-01-23 03:41:14 +01:00
|
|
|
#include "hash.h"
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2005-04-21 21:33:22 +02:00
|
|
|
#include SHA1_HEADER
|
2005-04-08 00:13:13 +02:00
|
|
|
#include <zlib.h>
|
|
|
|
|
2007-11-07 04:24:28 +01:00
|
|
|
#if defined(NO_DEFLATE_BOUND) || ZLIB_VERNUM < 0x1200
|
2005-04-30 18:51:03 +02:00
|
|
|
#define deflateBound(c,s) ((s) + (((s) + 7) >> 3) + (((s) + 63) >> 6) + 11)
|
|
|
|
#endif
|
|
|
|
|
2006-02-26 16:13:46 +01:00
|
|
|
#if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT)
|
2005-04-30 18:51:03 +02:00
|
|
|
#define DTYPE(de) ((de)->d_type)
|
|
|
|
#else
|
2006-01-20 22:33:20 +01:00
|
|
|
#undef DT_UNKNOWN
|
|
|
|
#undef DT_DIR
|
|
|
|
#undef DT_REG
|
|
|
|
#undef DT_LNK
|
2005-04-30 18:51:03 +02:00
|
|
|
#define DT_UNKNOWN 0
|
|
|
|
#define DT_DIR 1
|
|
|
|
#define DT_REG 2
|
2005-05-13 02:16:04 +02:00
|
|
|
#define DT_LNK 3
|
2005-04-30 18:51:03 +02:00
|
|
|
#define DTYPE(de) DT_UNKNOWN
|
|
|
|
#endif
|
|
|
|
|
2007-04-22 18:43:56 +02:00
|
|
|
/* unknown mode (impossible combination S_IFIFO|S_IFCHR) */
|
|
|
|
#define S_IFINVALID 0030000
|
|
|
|
|
2007-04-10 06:14:58 +02:00
|
|
|
/*
|
|
|
|
* A "directory link" is a link to another git directory.
|
|
|
|
*
|
|
|
|
* The value 0160000 is not normally a valid mode, and
|
|
|
|
* also just happens to be S_IFDIR + S_IFLNK
|
|
|
|
*
|
|
|
|
* NOTE! We *really* shouldn't depend on the S_IFxxx macros
|
|
|
|
* always having the same values everywhere. We should use
|
|
|
|
* our internal git values for these things, and then we can
|
|
|
|
* translate that to the OS-specific value. It just so
|
|
|
|
* happens that everybody shares the same bit representation
|
|
|
|
* in the UNIX world (and apparently wider too..)
|
|
|
|
*/
|
2007-05-21 22:08:28 +02:00
|
|
|
#define S_IFGITLINK 0160000
|
|
|
|
#define S_ISGITLINK(m) (((m) & S_IFMT) == S_IFGITLINK)
|
2007-04-10 06:14:58 +02:00
|
|
|
|
2005-07-14 03:46:20 +02:00
|
|
|
/*
|
|
|
|
* Intensive research over the course of many years has shown that
|
|
|
|
* port 9418 is totally unused by anything else. Or
|
|
|
|
*
|
|
|
|
* Your search - "port 9418" - did not match any documents.
|
|
|
|
*
|
|
|
|
* as www.google.com puts it.
|
2005-09-12 20:23:00 +02:00
|
|
|
*
|
|
|
|
* This port has been properly assigned for git use by IANA:
|
|
|
|
* git (Assigned-9418) [I06-050728-0001].
|
|
|
|
*
|
|
|
|
* git 9418/tcp git pack transfer service
|
|
|
|
* git 9418/udp git pack transfer service
|
|
|
|
*
|
|
|
|
* with Linus Torvalds <torvalds@osdl.org> as the point of
|
|
|
|
* contact. September 2005.
|
|
|
|
*
|
|
|
|
* See http://www.iana.org/assignments/port-numbers
|
2005-07-14 03:46:20 +02:00
|
|
|
*/
|
|
|
|
#define DEFAULT_GIT_PORT 9418
|
|
|
|
|
2005-04-08 00:13:13 +02:00
|
|
|
/*
|
|
|
|
* Basic data structures for the directory cache
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define CACHE_SIGNATURE 0x44495243 /* "DIRC" */
|
|
|
|
struct cache_header {
|
2005-04-15 19:44:27 +02:00
|
|
|
unsigned int hdr_signature;
|
|
|
|
unsigned int hdr_version;
|
|
|
|
unsigned int hdr_entries;
|
2005-04-08 00:13:13 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The "cache_time" is just the low 32 bits of the
|
|
|
|
* time. It doesn't matter if it overflows - we only
|
|
|
|
* check it for equality in the 32 bits we save.
|
|
|
|
*/
|
|
|
|
struct cache_time {
|
|
|
|
unsigned int sec;
|
|
|
|
unsigned int nsec;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* dev/ino/uid/gid/size are also just tracked to the low 32 bits
|
|
|
|
* Again - this is just a (very strong in practice) heuristic that
|
|
|
|
* the inode hasn't changed.
|
2005-04-15 19:44:27 +02:00
|
|
|
*
|
|
|
|
* We save the fields in big-endian order to allow using the
|
|
|
|
* index file over NFS transparently.
|
2005-04-08 00:13:13 +02:00
|
|
|
*/
|
2008-01-15 01:03:17 +01:00
|
|
|
struct ondisk_cache_entry {
|
|
|
|
struct cache_time ctime;
|
|
|
|
struct cache_time mtime;
|
|
|
|
unsigned int dev;
|
|
|
|
unsigned int ino;
|
|
|
|
unsigned int mode;
|
|
|
|
unsigned int uid;
|
|
|
|
unsigned int gid;
|
|
|
|
unsigned int size;
|
|
|
|
unsigned char sha1[20];
|
|
|
|
unsigned short flags;
|
|
|
|
char name[FLEX_ARRAY]; /* more */
|
|
|
|
};
|
|
|
|
|
2005-04-08 00:13:13 +02:00
|
|
|
struct cache_entry {
|
2008-01-15 01:03:17 +01:00
|
|
|
unsigned int ce_ctime;
|
|
|
|
unsigned int ce_mtime;
|
2005-04-15 19:44:27 +02:00
|
|
|
unsigned int ce_dev;
|
|
|
|
unsigned int ce_ino;
|
|
|
|
unsigned int ce_mode;
|
|
|
|
unsigned int ce_uid;
|
|
|
|
unsigned int ce_gid;
|
|
|
|
unsigned int ce_size;
|
2008-01-15 01:03:17 +01:00
|
|
|
unsigned int ce_flags;
|
2005-04-08 00:13:13 +02:00
|
|
|
unsigned char sha1[20];
|
2008-02-23 05:41:17 +01:00
|
|
|
struct cache_entry *next;
|
2006-01-07 10:33:54 +01:00
|
|
|
char name[FLEX_ARRAY]; /* more */
|
2005-04-08 00:13:13 +02:00
|
|
|
};
|
|
|
|
|
2005-04-16 07:51:44 +02:00
|
|
|
#define CE_NAMEMASK (0x0fff)
|
|
|
|
#define CE_STAGEMASK (0x3000)
|
2006-02-09 06:15:24 +01:00
|
|
|
#define CE_VALID (0x8000)
|
2005-04-16 17:33:23 +02:00
|
|
|
#define CE_STAGESHIFT 12
|
2005-04-16 07:51:44 +02:00
|
|
|
|
2008-01-15 01:03:17 +01:00
|
|
|
/* In-memory only */
|
|
|
|
#define CE_UPDATE (0x10000)
|
|
|
|
#define CE_REMOVE (0x20000)
|
2008-01-19 08:45:24 +01:00
|
|
|
#define CE_UPTODATE (0x40000)
|
Fix name re-hashing semantics
We handled the case of removing and re-inserting cache entries badly,
which is something that merging commonly needs to do (removing the
different stages, and then re-inserting one of them as the merged
state).
We even had a rather ugly special case for this failure case, where
replace_index_entry() basically turned itself into a no-op if the new
and the old entries were the same, exactly because the hash routines
didn't handle it on their own.
So what this patch does is to not just have the UNHASHED bit, but a
HASHED bit too, and when you insert an entry into the name hash, that
involves:
- clear the UNHASHED bit, because now it's valid again for lookup
(which is really all that UNHASHED meant)
- if we're being lazy, we're done here (but we still want to clear the
UNHASHED bit regardless of lazy mode, since we can become unlazy
later, and so we need the UNHASHED bit to always be set correctly,
even if we never actually insert the entry into the hash list)
- if it was already hashed, we just leave it on the list
- otherwise mark it HASHED and insert it into the list
this all means that unhashing and rehashing a name all just works
automatically. Obviously, you cannot change the name of an entry (that
would be a serious bug), but nothing can validly do that anyway (you'd
have to allocate a new struct cache_entry anyway since the name length
could change), so that's not a new limitation.
The code actually gets simpler in many ways, although the lazy hashing
does mean that there are a few odd cases (ie something can be marked
unhashed even though it was never on the hash in the first place, and
isn't actually marked hashed!).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-02-23 05:37:40 +01:00
|
|
|
|
|
|
|
#define CE_HASHED (0x100000)
|
|
|
|
#define CE_UNHASHED (0x200000)
|
2008-01-15 01:03:17 +01:00
|
|
|
|
2008-02-23 05:41:17 +01:00
|
|
|
/*
|
|
|
|
* Copy the sha1 and stat state of a cache entry from one to
|
|
|
|
* another. But we never change the name, or the hash state!
|
|
|
|
*/
|
|
|
|
#define CE_STATE_MASK (CE_HASHED | CE_UNHASHED)
|
|
|
|
static inline void copy_cache_entry(struct cache_entry *dst, struct cache_entry *src)
|
|
|
|
{
|
|
|
|
unsigned int state = dst->ce_flags & CE_STATE_MASK;
|
|
|
|
|
|
|
|
/* Don't copy hash chain and name */
|
|
|
|
memcpy(dst, src, offsetof(struct cache_entry, next));
|
|
|
|
|
|
|
|
/* Restore the hash state */
|
|
|
|
dst->ce_flags = (dst->ce_flags & ~CE_STATE_MASK) | state;
|
|
|
|
}
|
|
|
|
|
2008-02-23 05:39:21 +01:00
|
|
|
/*
|
|
|
|
* We don't actually *remove* it, we can just mark it invalid so that
|
|
|
|
* we won't find it in lookups.
|
|
|
|
*
|
|
|
|
* Not only would we have to search the lists (simple enough), but
|
|
|
|
* we'd also have to rehash other hash buckets in case this makes the
|
|
|
|
* hash bucket empty (common). So it's much better to just mark
|
|
|
|
* it.
|
|
|
|
*/
|
|
|
|
static inline void remove_index_entry(struct cache_entry *ce)
|
|
|
|
{
|
|
|
|
ce->ce_flags |= CE_UNHASHED;
|
|
|
|
}
|
|
|
|
|
2008-01-19 08:42:00 +01:00
|
|
|
static inline unsigned create_ce_flags(size_t len, unsigned stage)
|
|
|
|
{
|
|
|
|
if (len >= CE_NAMEMASK)
|
|
|
|
len = CE_NAMEMASK;
|
|
|
|
return (len | (stage << CE_STAGESHIFT));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline size_t ce_namelen(const struct cache_entry *ce)
|
|
|
|
{
|
|
|
|
size_t len = ce->ce_flags & CE_NAMEMASK;
|
|
|
|
if (len < CE_NAMEMASK)
|
|
|
|
return len;
|
|
|
|
return strlen(ce->name + CE_NAMEMASK) + CE_NAMEMASK;
|
|
|
|
}
|
|
|
|
|
2005-04-16 17:33:23 +02:00
|
|
|
#define ce_size(ce) cache_entry_size(ce_namelen(ce))
|
2008-01-15 01:03:17 +01:00
|
|
|
#define ondisk_ce_size(ce) ondisk_cache_entry_size(ce_namelen(ce))
|
|
|
|
#define ce_stage(ce) ((CE_STAGEMASK & (ce)->ce_flags) >> CE_STAGESHIFT)
|
2008-01-19 08:45:24 +01:00
|
|
|
#define ce_uptodate(ce) ((ce)->ce_flags & CE_UPTODATE)
|
|
|
|
#define ce_mark_uptodate(ce) ((ce)->ce_flags |= CE_UPTODATE)
|
2005-04-16 17:33:23 +02:00
|
|
|
|
2005-04-17 07:26:31 +02:00
|
|
|
#define ce_permissions(mode) (((mode) & 0100) ? 0755 : 0644)
|
2005-05-05 14:38:25 +02:00
|
|
|
static inline unsigned int create_ce_mode(unsigned int mode)
|
|
|
|
{
|
|
|
|
if (S_ISLNK(mode))
|
2008-01-15 01:03:17 +01:00
|
|
|
return S_IFLNK;
|
2007-05-21 22:08:28 +02:00
|
|
|
if (S_ISDIR(mode) || S_ISGITLINK(mode))
|
2008-01-15 01:03:17 +01:00
|
|
|
return S_IFGITLINK;
|
|
|
|
return S_IFREG | ce_permissions(mode);
|
2005-05-05 14:38:25 +02:00
|
|
|
}
|
2007-02-17 07:43:48 +01:00
|
|
|
static inline unsigned int ce_mode_from_stat(struct cache_entry *ce, unsigned int mode)
|
|
|
|
{
|
2007-03-02 22:11:30 +01:00
|
|
|
extern int trust_executable_bit, has_symlinks;
|
|
|
|
if (!has_symlinks && S_ISREG(mode) &&
|
2008-01-15 01:03:17 +01:00
|
|
|
ce && S_ISLNK(ce->ce_mode))
|
2007-03-02 22:11:30 +01:00
|
|
|
return ce->ce_mode;
|
2007-02-17 07:43:48 +01:00
|
|
|
if (!trust_executable_bit && S_ISREG(mode)) {
|
2008-01-15 01:03:17 +01:00
|
|
|
if (ce && S_ISREG(ce->ce_mode))
|
2007-02-17 07:43:48 +01:00
|
|
|
return ce->ce_mode;
|
|
|
|
return create_ce_mode(0666);
|
|
|
|
}
|
|
|
|
return create_ce_mode(mode);
|
|
|
|
}
|
2008-01-31 10:17:48 +01:00
|
|
|
static inline int ce_to_dtype(const struct cache_entry *ce)
|
|
|
|
{
|
|
|
|
unsigned ce_mode = ntohl(ce->ce_mode);
|
|
|
|
if (S_ISREG(ce_mode))
|
|
|
|
return DT_REG;
|
|
|
|
else if (S_ISDIR(ce_mode) || S_ISGITLINK(ce_mode))
|
|
|
|
return DT_DIR;
|
|
|
|
else if (S_ISLNK(ce_mode))
|
|
|
|
return DT_LNK;
|
|
|
|
else
|
|
|
|
return DT_UNKNOWN;
|
|
|
|
}
|
2006-03-30 08:55:43 +02:00
|
|
|
#define canon_mode(mode) \
|
|
|
|
(S_ISREG(mode) ? (S_IFREG | ce_permissions(mode)) : \
|
2007-05-21 22:08:28 +02:00
|
|
|
S_ISLNK(mode) ? S_IFLNK : S_ISDIR(mode) ? S_IFDIR : S_IFGITLINK)
|
2005-04-17 07:26:31 +02:00
|
|
|
|
2005-04-16 17:33:23 +02:00
|
|
|
#define cache_entry_size(len) ((offsetof(struct cache_entry,name) + (len) + 8) & ~7)
|
2008-01-15 01:03:17 +01:00
|
|
|
#define ondisk_cache_entry_size(len) ((offsetof(struct ondisk_cache_entry,name) + (len) + 8) & ~7)
|
2005-04-16 06:45:38 +02:00
|
|
|
|
2007-04-02 03:14:06 +02:00
|
|
|
struct index_state {
|
|
|
|
struct cache_entry **cache;
|
|
|
|
unsigned int cache_nr, cache_alloc, cache_changed;
|
|
|
|
struct cache_tree *cache_tree;
|
|
|
|
time_t timestamp;
|
2008-01-15 01:03:17 +01:00
|
|
|
void *alloc;
|
2008-01-23 08:01:13 +01:00
|
|
|
unsigned name_hash_initialized : 1;
|
Create pathname-based hash-table lookup into index
This creates a hash index of every single file added to the index.
Right now that hash index isn't actually used for much: I implemented a
"cache_name_exists()" function that uses it to efficiently look up a
filename in the index without having to do the O(logn) binary search,
but quite frankly, that's not why this patch is interesting.
No, the whole and only reason to create the hash of the filenames in the
index is that by modifying the hash function, you can fairly easily do
things like making it always hash equivalent names into the same bucket.
That, in turn, means that suddenly questions like "does this name exist
in the index under an _equivalent_ name?" becomes much much cheaper.
Guiding principles behind this patch:
- it shouldn't be too costly. In fact, my primary goal here was to
actually speed up "git commit" with a fully populated kernel tree, by
being faster at checking whether a file already existed in the index. I
did succeed, but only barely:
Best before:
[torvalds@woody linux]$ time git commit > /dev/null
real 0m0.255s
user 0m0.168s
sys 0m0.088s
Best after:
[torvalds@woody linux]$ time ~/git/git commit > /dev/null
real 0m0.233s
user 0m0.144s
sys 0m0.088s
so some things are actually faster (~8%).
Caveat: that's really the best case. Other things are invariably going
to be slightly slower, since we populate that index cache, and quite
frankly, few things really use it to look things up.
That said, the cost is really quite small. The worst case is probably
doing a "git ls-files", which will do very little except puopulate the
index, and never actually looks anything up in it, just lists it.
Before:
[torvalds@woody linux]$ time git ls-files > /dev/null
real 0m0.016s
user 0m0.016s
sys 0m0.000s
After:
[torvalds@woody linux]$ time ~/git/git ls-files > /dev/null
real 0m0.021s
user 0m0.012s
sys 0m0.008s
and while the thing has really gotten relatively much slower, we're
still talking about something almost unmeasurable (eg 5ms). And that
really should be pretty much the worst case.
So we lose 5ms on one "benchmark", but win 22ms on another. Pick your
poison - this patch has the advantage that it will _likely_ speed up
the cases that are complex and expensive more than it slows down the
cases that are already so fast that nobody cares. But if you look at
relative speedups/slowdowns, it doesn't look so good.
- It should be simple and clean
The code may be a bit subtle (the reasons I do hash removal the way I
do etc), but it re-uses the existing hash.c files, so it really is
fairly small and straightforward apart from a few odd details.
Now, this patch on its own doesn't really do much, but I think it's worth
looking at, if only because if done correctly, the name hashing really can
make an improvement to the whole issue of "do we have a filename that
looks like this in the index already". And at least it gets real testing
by being used even by default (ie there is a real use-case for it even
without any insane filesystems).
NOTE NOTE NOTE! The current hash is a joke. I'm ashamed of it, I'm just
not ashamed of it enough to really care. I took all the numbers out of my
nether regions - I'm sure it's good enough that it works in practice, but
the whole point was that you can make a really much fancier hash that
hashes characters not directly, but by their upper-case value or something
like that, and thus you get a case-insensitive hash, while still keeping
the name and the index itself totally case sensitive.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-01-23 03:41:14 +01:00
|
|
|
struct hash_table name_hash;
|
2007-04-02 03:14:06 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
extern struct index_state the_index;
|
|
|
|
|
2007-04-02 08:26:07 +02:00
|
|
|
#ifndef NO_THE_INDEX_COMPATIBILITY_MACROS
|
2007-04-02 03:14:06 +02:00
|
|
|
#define active_cache (the_index.cache)
|
|
|
|
#define active_nr (the_index.cache_nr)
|
|
|
|
#define active_alloc (the_index.cache_alloc)
|
|
|
|
#define active_cache_changed (the_index.cache_changed)
|
|
|
|
#define active_cache_tree (the_index.cache_tree)
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2007-04-02 08:26:07 +02:00
|
|
|
#define read_cache() read_index(&the_index)
|
|
|
|
#define read_cache_from(path) read_index_from(&the_index, (path))
|
|
|
|
#define write_cache(newfd, cache, entries) write_index(&the_index, (newfd))
|
|
|
|
#define discard_cache() discard_index(&the_index)
|
|
|
|
#define cache_name_pos(name, namelen) index_name_pos(&the_index,(name),(namelen))
|
|
|
|
#define add_cache_entry(ce, option) add_index_entry(&the_index, (ce), (option))
|
|
|
|
#define remove_cache_entry_at(pos) remove_index_entry_at(&the_index, (pos))
|
|
|
|
#define remove_file_from_cache(path) remove_file_from_index(&the_index, (path))
|
|
|
|
#define add_file_to_cache(path, verbose) add_file_to_index(&the_index, (path), (verbose))
|
2007-08-11 23:59:01 +02:00
|
|
|
#define refresh_cache(flags) refresh_index(&the_index, (flags), NULL, NULL)
|
2007-11-10 09:15:03 +01:00
|
|
|
#define ce_match_stat(ce, st, options) ie_match_stat(&the_index, (ce), (st), (options))
|
|
|
|
#define ce_modified(ce, st, options) ie_modified(&the_index, (ce), (st), (options))
|
Create pathname-based hash-table lookup into index
This creates a hash index of every single file added to the index.
Right now that hash index isn't actually used for much: I implemented a
"cache_name_exists()" function that uses it to efficiently look up a
filename in the index without having to do the O(logn) binary search,
but quite frankly, that's not why this patch is interesting.
No, the whole and only reason to create the hash of the filenames in the
index is that by modifying the hash function, you can fairly easily do
things like making it always hash equivalent names into the same bucket.
That, in turn, means that suddenly questions like "does this name exist
in the index under an _equivalent_ name?" becomes much much cheaper.
Guiding principles behind this patch:
- it shouldn't be too costly. In fact, my primary goal here was to
actually speed up "git commit" with a fully populated kernel tree, by
being faster at checking whether a file already existed in the index. I
did succeed, but only barely:
Best before:
[torvalds@woody linux]$ time git commit > /dev/null
real 0m0.255s
user 0m0.168s
sys 0m0.088s
Best after:
[torvalds@woody linux]$ time ~/git/git commit > /dev/null
real 0m0.233s
user 0m0.144s
sys 0m0.088s
so some things are actually faster (~8%).
Caveat: that's really the best case. Other things are invariably going
to be slightly slower, since we populate that index cache, and quite
frankly, few things really use it to look things up.
That said, the cost is really quite small. The worst case is probably
doing a "git ls-files", which will do very little except puopulate the
index, and never actually looks anything up in it, just lists it.
Before:
[torvalds@woody linux]$ time git ls-files > /dev/null
real 0m0.016s
user 0m0.016s
sys 0m0.000s
After:
[torvalds@woody linux]$ time ~/git/git ls-files > /dev/null
real 0m0.021s
user 0m0.012s
sys 0m0.008s
and while the thing has really gotten relatively much slower, we're
still talking about something almost unmeasurable (eg 5ms). And that
really should be pretty much the worst case.
So we lose 5ms on one "benchmark", but win 22ms on another. Pick your
poison - this patch has the advantage that it will _likely_ speed up
the cases that are complex and expensive more than it slows down the
cases that are already so fast that nobody cares. But if you look at
relative speedups/slowdowns, it doesn't look so good.
- It should be simple and clean
The code may be a bit subtle (the reasons I do hash removal the way I
do etc), but it re-uses the existing hash.c files, so it really is
fairly small and straightforward apart from a few odd details.
Now, this patch on its own doesn't really do much, but I think it's worth
looking at, if only because if done correctly, the name hashing really can
make an improvement to the whole issue of "do we have a filename that
looks like this in the index already". And at least it gets real testing
by being used even by default (ie there is a real use-case for it even
without any insane filesystems).
NOTE NOTE NOTE! The current hash is a joke. I'm ashamed of it, I'm just
not ashamed of it enough to really care. I took all the numbers out of my
nether regions - I'm sure it's good enough that it works in practice, but
the whole point was that you can make a really much fancier hash that
hashes characters not directly, but by their upper-case value or something
like that, and thus you get a case-insensitive hash, while still keeping
the name and the index itself totally case sensitive.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-01-23 03:41:14 +01:00
|
|
|
#define cache_name_exists(name, namelen) index_name_exists(&the_index, (name), (namelen))
|
2007-04-02 08:26:07 +02:00
|
|
|
#endif
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2007-02-28 20:45:56 +01:00
|
|
|
enum object_type {
|
|
|
|
OBJ_BAD = -1,
|
|
|
|
OBJ_NONE = 0,
|
|
|
|
OBJ_COMMIT = 1,
|
|
|
|
OBJ_TREE = 2,
|
|
|
|
OBJ_BLOB = 3,
|
|
|
|
OBJ_TAG = 4,
|
|
|
|
/* 5 for future expansion */
|
|
|
|
OBJ_OFS_DELTA = 6,
|
|
|
|
OBJ_REF_DELTA = 7,
|
|
|
|
OBJ_MAX,
|
|
|
|
};
|
|
|
|
|
2007-12-01 07:22:38 +01:00
|
|
|
static inline enum object_type object_type(unsigned int mode)
|
|
|
|
{
|
|
|
|
return S_ISDIR(mode) ? OBJ_TREE :
|
|
|
|
S_ISGITLINK(mode) ? OBJ_COMMIT :
|
|
|
|
OBJ_BLOB;
|
|
|
|
}
|
|
|
|
|
2005-05-10 07:57:58 +02:00
|
|
|
#define GIT_DIR_ENVIRONMENT "GIT_DIR"
|
2007-06-06 09:10:42 +02:00
|
|
|
#define GIT_WORK_TREE_ENVIRONMENT "GIT_WORK_TREE"
|
2005-05-10 07:57:58 +02:00
|
|
|
#define DEFAULT_GIT_DIR_ENVIRONMENT ".git"
|
2005-05-10 02:57:56 +02:00
|
|
|
#define DB_ENVIRONMENT "GIT_OBJECT_DIRECTORY"
|
2005-04-21 19:55:18 +02:00
|
|
|
#define INDEX_ENVIRONMENT "GIT_INDEX_FILE"
|
2005-07-30 09:58:28 +02:00
|
|
|
#define GRAFT_ENVIRONMENT "GIT_GRAFT_FILE"
|
2006-12-19 10:28:15 +01:00
|
|
|
#define TEMPLATE_DIR_ENVIRONMENT "GIT_TEMPLATE_DIR"
|
|
|
|
#define CONFIG_ENVIRONMENT "GIT_CONFIG"
|
|
|
|
#define CONFIG_LOCAL_ENVIRONMENT "GIT_CONFIG_LOCAL"
|
|
|
|
#define EXEC_PATH_ENVIRONMENT "GIT_EXEC_PATH"
|
Add basic infrastructure to assign attributes to paths
This adds the basic infrastructure to assign attributes to
paths, in a way similar to what the exclusion mechanism does
based on $GIT_DIR/info/exclude and .gitignore files.
An attribute is just a simple string that does not contain any
whitespace. They can be specified in $GIT_DIR/info/attributes
file, and .gitattributes file in each directory.
Each line in these files defines a pattern matching rule.
Similar to the exclusion mechanism, a later match overrides an
earlier match in the same file, and entries from .gitattributes
file in the same directory takes precedence over the ones from
parent directories. Lines in $GIT_DIR/info/attributes file are
used as the lowest precedence default rules.
A line is either a comment (an empty line, or a line that begins
with a '#'), or a rule, which is a whitespace separated list of
tokens. The first token on the line is a shell glob pattern.
The rest are names of attributes, each of which can optionally
be prefixed with '!'. Such a line means "if a path matches this
glob, this attribute is set (or unset -- if the attribute name
is prefixed with '!'). For glob matching, the same "if the
pattern does not have a slash in it, the basename of the path is
matched with fnmatch(3) against the pattern, otherwise, the path
is matched with the pattern with FNM_PATHNAME" rule as the
exclusion mechanism is used.
This does not define what an attribute means. Tying an
attribute to various effects it has on git operation for paths
that have it will be specified separately.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-04-12 10:07:32 +02:00
|
|
|
#define GITATTRIBUTES_FILE ".gitattributes"
|
|
|
|
#define INFOATTRIBUTES_FILE "info/attributes"
|
attribute macro support
This adds "attribute macros" (for lack of better name). So far,
we have low-level attributes such as crlf and diff, which are
defined in operational terms --- setting or unsetting them on a
particular path directly affects what is done to the path. For
example, in order to decline diffs or crlf conversions on a
binary blob, no diffs on PostScript files, and treat all other
files normally, you would have something like these:
* diff crlf
*.ps !diff
proprietary.o !diff !crlf
That is fine as the operation goes, but gets unwieldy rather
rapidly, when we start adding more low-level attributes that are
defined in operational terms. A near-term example of such an
attribute would be 'merge-3way' which would control if git
should attempt the usual 3-way file-level merge internally, or
leave merging to a specialized external program of user's
choice. When it is added, we do _not_ want to force the users
to update the above to:
* diff crlf merge-3way
*.ps !diff
proprietary.o !diff !crlf !merge-3way
The way this patch solves this issue is to realize that the
attributes the user is assigning to paths are not defined in
terms of operations but in terms of what they are.
All of the three low-level attributes usually make sense for
most of the files that sane SCM users have git operate on (these
files are typically called "text'). Only a few cases, such as
binary blob, need exception to decline the "usual treatment
given to text files" -- and people mark them as "binary".
So this allows the $GIT_DIR/info/alternates and .gitattributes
at the toplevel of the project to also specify attributes that
assigns other attributes. The syntax is '[attr]' followed by an
attribute name followed by a list of attribute names:
[attr] binary !diff !crlf !merge-3way
When "binary" attribute is set to a path, if the path has not
got diff/crlf/merge-3way attribute set or unset by other rules,
this rule unsets the three low-level attributes.
It is expected that the user level .gitattributes will be
expressed mostly in terms of attributes based on what the files
are, and the above sample would become like this:
(built-in attribute configuration)
[attr] binary !diff !crlf !merge-3way
* diff crlf merge-3way
(project specific .gitattributes)
proprietary.o binary
(user preference $GIT_DIR/info/attributes)
*.ps !diff
There are a few caveats.
* As described above, you can define these macros only in
$GIT_DIR/info/attributes and toplevel .gitattributes.
* There is no attempt to detect circular definition of macro
attributes, and definitions are evaluated from bottom to top
as usual to fill in other attributes that have not yet got
values. The following would work as expected:
[attr] text diff crlf
[attr] ps text !diff
*.ps ps
while this would most likely not (I haven't tried):
[attr] ps text !diff
[attr] text diff crlf
*.ps ps
* When a macro says "[attr] A B !C", saying that a path does
not have attribute A does not let you tell anything about
attributes B or C. That is, given this:
[attr] text diff crlf
[attr] ps text !diff
*.txt !ps
path hello.txt, which would match "*.txt" pattern, would have
"ps" attribute set to zero, but that does not make text
attribute of hello.txt set to false (nor diff attribute set to
true).
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-04-14 17:54:37 +02:00
|
|
|
#define ATTRIBUTE_MACRO_PREFIX "[attr]"
|
2005-04-21 19:55:18 +02:00
|
|
|
|
2007-01-07 11:00:28 +01:00
|
|
|
extern int is_bare_repository_cfg;
|
|
|
|
extern int is_bare_repository(void);
|
2007-01-20 03:09:34 +01:00
|
|
|
extern int is_inside_git_dir(void);
|
Clean up work-tree handling
The old version of work-tree support was an unholy mess, barely readable,
and not to the point.
For example, why do you have to provide a worktree, when it is not used?
As in "git status". Now it works.
Another riddle was: if you can have work trees inside the git dir, why
are some programs complaining that they need a work tree?
IOW it is allowed to call
$ git --git-dir=../ --work-tree=. bla
when you really want to. In this case, you are both in the git directory
and in the working tree. So, programs have to actually test for the right
thing, namely if they are inside a working tree, and not if they are
inside a git directory.
Also, GIT_DIR=../.git should behave the same as if no GIT_DIR was
specified, unless there is a repository in the current working directory.
It does now.
The logic to determine if a repository is bare, or has a work tree
(tertium non datur), is this:
--work-tree=bla overrides GIT_WORK_TREE, which overrides core.bare = true,
which overrides core.worktree, which overrides GIT_DIR/.. when GIT_DIR
ends in /.git, which overrides the directory in which .git/ was found.
In related news, a long standing bug was fixed: when in .git/bla/x.git/,
which is a bare repository, git formerly assumed ../.. to be the
appropriate git dir. This problem was reported by Shawn Pearce to have
caused much pain, where a colleague mistakenly ran "git init" in "/" a
long time ago, and bare repositories just would not work.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-08-01 02:30:14 +02:00
|
|
|
extern char *git_work_tree_cfg;
|
2007-06-06 09:10:42 +02:00
|
|
|
extern int is_inside_work_tree(void);
|
2006-08-23 12:39:11 +02:00
|
|
|
extern const char *get_git_dir(void);
|
2005-05-10 07:57:58 +02:00
|
|
|
extern char *get_object_directory(void);
|
2005-06-06 22:31:29 +02:00
|
|
|
extern char *get_refs_directory(void);
|
2005-05-10 07:57:58 +02:00
|
|
|
extern char *get_index_file(void);
|
2005-07-30 09:58:28 +02:00
|
|
|
extern char *get_graft_file(void);
|
2007-08-01 02:29:38 +02:00
|
|
|
extern int set_git_dir(const char *path);
|
Clean up work-tree handling
The old version of work-tree support was an unholy mess, barely readable,
and not to the point.
For example, why do you have to provide a worktree, when it is not used?
As in "git status". Now it works.
Another riddle was: if you can have work trees inside the git dir, why
are some programs complaining that they need a work tree?
IOW it is allowed to call
$ git --git-dir=../ --work-tree=. bla
when you really want to. In this case, you are both in the git directory
and in the working tree. So, programs have to actually test for the right
thing, namely if they are inside a working tree, and not if they are
inside a git directory.
Also, GIT_DIR=../.git should behave the same as if no GIT_DIR was
specified, unless there is a repository in the current working directory.
It does now.
The logic to determine if a repository is bare, or has a work tree
(tertium non datur), is this:
--work-tree=bla overrides GIT_WORK_TREE, which overrides core.bare = true,
which overrides core.worktree, which overrides GIT_DIR/.. when GIT_DIR
ends in /.git, which overrides the directory in which .git/ was found.
In related news, a long standing bug was fixed: when in .git/bla/x.git/,
which is a bare repository, git formerly assumed ../.. to be the
appropriate git dir. This problem was reported by Shawn Pearce to have
caused much pain, where a colleague mistakenly ran "git init" in "/" a
long time ago, and bare repositories just would not work.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-08-01 02:30:14 +02:00
|
|
|
extern const char *get_git_work_tree(void);
|
2005-05-10 07:57:58 +02:00
|
|
|
|
|
|
|
#define ALTERNATE_DB_ENVIRONMENT "GIT_ALTERNATE_OBJECT_DIRECTORIES"
|
2005-04-21 19:55:18 +02:00
|
|
|
|
2005-09-21 09:00:47 +02:00
|
|
|
extern const char **get_pathspec(const char *prefix, const char **pathspec);
|
2007-11-03 12:23:11 +01:00
|
|
|
extern void setup_work_tree(void);
|
2005-11-26 08:14:15 +01:00
|
|
|
extern const char *setup_git_directory_gently(int *);
|
2005-08-17 03:06:34 +02:00
|
|
|
extern const char *setup_git_directory(void);
|
2005-09-21 09:00:47 +02:00
|
|
|
extern const char *prefix_path(const char *prefix, int len, const char *path);
|
2005-11-26 08:14:15 +01:00
|
|
|
extern const char *prefix_filename(const char *prefix, int len, const char *path);
|
2006-04-26 19:15:54 +02:00
|
|
|
extern void verify_filename(const char *prefix, const char *name);
|
2006-04-27 00:09:27 +02:00
|
|
|
extern void verify_non_filename(const char *prefix, const char *name);
|
2005-08-17 03:06:34 +02:00
|
|
|
|
2005-04-08 00:13:13 +02:00
|
|
|
#define alloc_nr(x) (((x)+16)*3/2)
|
|
|
|
|
2007-06-11 15:39:44 +02:00
|
|
|
/*
|
|
|
|
* Realloc the buffer pointed at by variable 'x' so that it can hold
|
|
|
|
* at least 'nr' entries; the number of entries currently allocated
|
|
|
|
* is 'alloc', using the standard growing factor alloc_nr() macro.
|
|
|
|
*
|
|
|
|
* DO NOT USE any expression with side-effect for 'x' or 'alloc'.
|
|
|
|
*/
|
|
|
|
#define ALLOC_GROW(x, nr, alloc) \
|
|
|
|
do { \
|
2007-06-17 00:37:39 +02:00
|
|
|
if ((nr) > alloc) { \
|
Extend --pretty=oneline to cover the first paragraph,
so that an ugly commit message like this can be
handled sanely.
Currently, --pretty=oneline and --pretty=email (hence
format-patch) take and use only the first line of the commit log
message. This changes them to:
- Take the first paragraph, where the definition of the first
paragraph is "skip all blank lines from the beginning, and
then grab everything up to the next empty line".
- Replace all line breaks with a whitespace.
This change would not affect a well-behaved commit message that
adheres to the convention of "single line summary, a blank line,
and then body of message", as its first paragraph always
consists of a single line. Commit messages from different
culture, such as the ones imported from CVS/SVN, can however get
chomped with the existing behaviour at the first linebreak in
the middle of sentence right now, which would become much easier
to see with this change.
The Subject: and --pretty=oneline output would become very long
and unsightly for non-conforming commits, but their messages are
already ugly anyway, and thischange at least avoids the loss of
information.
The Subject: line from a multi-line paragraph is folded using
RFC2822 line folding rules at the places where line breaks were
in the original.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-06-12 07:10:55 +02:00
|
|
|
if (alloc_nr(alloc) < (nr)) \
|
|
|
|
alloc = (nr); \
|
|
|
|
else \
|
|
|
|
alloc = alloc_nr(alloc); \
|
2007-06-11 15:39:44 +02:00
|
|
|
x = xrealloc((x), alloc * sizeof(*(x))); \
|
|
|
|
} \
|
|
|
|
} while(0)
|
|
|
|
|
2005-04-09 18:48:20 +02:00
|
|
|
/* Initialize and use the cache information */
|
2007-04-02 08:26:07 +02:00
|
|
|
extern int read_index(struct index_state *);
|
|
|
|
extern int read_index_from(struct index_state *, const char *path);
|
|
|
|
extern int write_index(struct index_state *, int newfd);
|
|
|
|
extern int discard_index(struct index_state *);
|
2006-05-18 21:07:31 +02:00
|
|
|
extern int verify_path(const char *path);
|
Create pathname-based hash-table lookup into index
This creates a hash index of every single file added to the index.
Right now that hash index isn't actually used for much: I implemented a
"cache_name_exists()" function that uses it to efficiently look up a
filename in the index without having to do the O(logn) binary search,
but quite frankly, that's not why this patch is interesting.
No, the whole and only reason to create the hash of the filenames in the
index is that by modifying the hash function, you can fairly easily do
things like making it always hash equivalent names into the same bucket.
That, in turn, means that suddenly questions like "does this name exist
in the index under an _equivalent_ name?" becomes much much cheaper.
Guiding principles behind this patch:
- it shouldn't be too costly. In fact, my primary goal here was to
actually speed up "git commit" with a fully populated kernel tree, by
being faster at checking whether a file already existed in the index. I
did succeed, but only barely:
Best before:
[torvalds@woody linux]$ time git commit > /dev/null
real 0m0.255s
user 0m0.168s
sys 0m0.088s
Best after:
[torvalds@woody linux]$ time ~/git/git commit > /dev/null
real 0m0.233s
user 0m0.144s
sys 0m0.088s
so some things are actually faster (~8%).
Caveat: that's really the best case. Other things are invariably going
to be slightly slower, since we populate that index cache, and quite
frankly, few things really use it to look things up.
That said, the cost is really quite small. The worst case is probably
doing a "git ls-files", which will do very little except puopulate the
index, and never actually looks anything up in it, just lists it.
Before:
[torvalds@woody linux]$ time git ls-files > /dev/null
real 0m0.016s
user 0m0.016s
sys 0m0.000s
After:
[torvalds@woody linux]$ time ~/git/git ls-files > /dev/null
real 0m0.021s
user 0m0.012s
sys 0m0.008s
and while the thing has really gotten relatively much slower, we're
still talking about something almost unmeasurable (eg 5ms). And that
really should be pretty much the worst case.
So we lose 5ms on one "benchmark", but win 22ms on another. Pick your
poison - this patch has the advantage that it will _likely_ speed up
the cases that are complex and expensive more than it slows down the
cases that are already so fast that nobody cares. But if you look at
relative speedups/slowdowns, it doesn't look so good.
- It should be simple and clean
The code may be a bit subtle (the reasons I do hash removal the way I
do etc), but it re-uses the existing hash.c files, so it really is
fairly small and straightforward apart from a few odd details.
Now, this patch on its own doesn't really do much, but I think it's worth
looking at, if only because if done correctly, the name hashing really can
make an improvement to the whole issue of "do we have a filename that
looks like this in the index already". And at least it gets real testing
by being used even by default (ie there is a real use-case for it even
without any insane filesystems).
NOTE NOTE NOTE! The current hash is a joke. I'm ashamed of it, I'm just
not ashamed of it enough to really care. I took all the numbers out of my
nether regions - I'm sure it's good enough that it works in practice, but
the whole point was that you can make a really much fancier hash that
hashes characters not directly, but by their upper-case value or something
like that, and thus you get a case-insensitive hash, while still keeping
the name and the index itself totally case sensitive.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-01-23 03:41:14 +01:00
|
|
|
extern int index_name_exists(struct index_state *istate, const char *name, int namelen);
|
2007-04-02 08:26:07 +02:00
|
|
|
extern int index_name_pos(struct index_state *, const char *name, int namelen);
|
2005-05-08 06:55:21 +02:00
|
|
|
#define ADD_CACHE_OK_TO_ADD 1 /* Ok to add */
|
|
|
|
#define ADD_CACHE_OK_TO_REPLACE 2 /* Ok to replace file/directory */
|
2005-06-25 11:25:29 +02:00
|
|
|
#define ADD_CACHE_SKIP_DFCHECK 4 /* Ok to skip DF conflict checks */
|
2007-08-09 22:42:50 +02:00
|
|
|
#define ADD_CACHE_JUST_APPEND 8 /* Append only; tree.c::read_tree() */
|
2007-04-02 08:26:07 +02:00
|
|
|
extern int add_index_entry(struct index_state *, struct cache_entry *ce, int option);
|
2006-07-26 06:32:18 +02:00
|
|
|
extern struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int really);
|
2007-04-02 08:26:07 +02:00
|
|
|
extern int remove_index_entry_at(struct index_state *, int pos);
|
|
|
|
extern int remove_file_from_index(struct index_state *, const char *path);
|
|
|
|
extern int add_file_to_index(struct index_state *, const char *path, int verbose);
|
2007-09-11 05:17:28 +02:00
|
|
|
extern struct cache_entry *make_cache_entry(unsigned int mode, const unsigned char *sha1, const char *path, int stage, int refresh);
|
2005-05-15 04:04:25 +02:00
|
|
|
extern int ce_same_name(struct cache_entry *a, struct cache_entry *b);
|
2007-11-10 09:15:03 +01:00
|
|
|
|
|
|
|
/* do stat comparison even if CE_VALID is true */
|
|
|
|
#define CE_MATCH_IGNORE_VALID 01
|
|
|
|
/* do not check the contents but report dirty on racily-clean entries */
|
|
|
|
#define CE_MATCH_RACY_IS_DIRTY 02
|
|
|
|
extern int ie_match_stat(struct index_state *, struct cache_entry *, struct stat *, unsigned int);
|
|
|
|
extern int ie_modified(struct index_state *, struct cache_entry *, struct stat *, unsigned int);
|
|
|
|
|
2005-07-15 01:55:06 +02:00
|
|
|
extern int ce_path_match(const struct cache_entry *ce, const char **pathspec);
|
2007-02-28 20:52:04 +01:00
|
|
|
extern int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, enum object_type type, const char *path);
|
2005-12-10 23:25:24 +01:00
|
|
|
extern int index_pipe(unsigned char *sha1, int fd, const char *type, int write_object);
|
2005-10-07 12:42:00 +02:00
|
|
|
extern int index_path(unsigned char *sha1, const char *path, struct stat *st, int write_object);
|
2005-05-15 23:23:12 +02:00
|
|
|
extern void fill_stat_cache_info(struct cache_entry *ce, struct stat *st);
|
|
|
|
|
2006-05-19 18:56:35 +02:00
|
|
|
#define REFRESH_REALLY 0x0001 /* ignore_valid */
|
|
|
|
#define REFRESH_UNMERGED 0x0002 /* allow unmerged */
|
|
|
|
#define REFRESH_QUIET 0x0004 /* be quiet about it */
|
|
|
|
#define REFRESH_IGNORE_MISSING 0x0008 /* ignore non-existent */
|
2007-08-11 23:59:01 +02:00
|
|
|
extern int refresh_index(struct index_state *, unsigned int flags, const char **pathspec, char *seen);
|
2006-05-19 18:56:35 +02:00
|
|
|
|
2006-06-06 21:51:49 +02:00
|
|
|
struct lock_file {
|
|
|
|
struct lock_file *next;
|
2007-11-13 21:05:03 +01:00
|
|
|
int fd;
|
2007-04-21 12:11:10 +02:00
|
|
|
pid_t owner;
|
2007-01-02 20:19:05 +01:00
|
|
|
char on_list;
|
2006-06-06 21:51:49 +02:00
|
|
|
char filename[PATH_MAX];
|
2005-05-15 23:23:12 +02:00
|
|
|
};
|
2006-08-12 10:03:47 +02:00
|
|
|
extern int hold_lock_file_for_update(struct lock_file *, const char *path, int);
|
2006-06-06 21:51:49 +02:00
|
|
|
extern int commit_lock_file(struct lock_file *);
|
_GIT_INDEX_OUTPUT: allow plumbing to output to an alternative index file.
When defined, this allows plumbing commands that update the
index (add, apply, checkout-index, merge-recursive, mv,
read-tree, rm, update-index, and write-tree) to write their
resulting index to an alternative index file while holding a
lock to the original index file. With this, git-commit that
jumps the index does not have to make an extra copy of the index
file, and more importantly, it can do the update while holding
the lock on the index.
However, I think the interface to let an environment variable
specify the output is a mistake, as shown in the documentation.
If a curious user has the environment variable set to something
other than the file GIT_INDEX_FILE points at, almost everything
will break. This should instead be a command line parameter to
tell these plumbing commands to write the result in the named
file, to prevent stupid mistakes.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-04-01 08:09:02 +02:00
|
|
|
|
|
|
|
extern int hold_locked_index(struct lock_file *, int);
|
|
|
|
extern int commit_locked_index(struct lock_file *);
|
2007-04-01 08:27:41 +02:00
|
|
|
extern void set_alternate_index_output(const char *);
|
2008-01-16 20:05:32 +01:00
|
|
|
extern int close_lock_file(struct lock_file *);
|
2006-06-06 21:51:49 +02:00
|
|
|
extern void rollback_lock_file(struct lock_file *);
|
2007-04-18 05:34:34 +02:00
|
|
|
extern int delete_ref(const char *, const unsigned char *sha1);
|
2005-04-09 18:48:20 +02:00
|
|
|
|
2006-02-27 23:47:45 +01:00
|
|
|
/* Environment bits from configuration mechanism */
|
2005-10-11 01:31:08 +02:00
|
|
|
extern int trust_executable_bit;
|
2007-06-25 00:11:24 +02:00
|
|
|
extern int quote_path_fully;
|
2007-03-02 22:11:30 +01:00
|
|
|
extern int has_symlinks;
|
2006-02-09 06:15:24 +01:00
|
|
|
extern int assume_unchanged;
|
2006-05-02 09:40:24 +02:00
|
|
|
extern int prefer_symlink_refs;
|
2006-05-17 11:55:40 +02:00
|
|
|
extern int log_all_ref_updates;
|
2006-03-21 03:45:47 +01:00
|
|
|
extern int warn_ambiguous_refs;
|
2005-12-22 23:13:56 +01:00
|
|
|
extern int shared_repository;
|
2006-02-27 23:47:45 +01:00
|
|
|
extern const char *apply_default_whitespace;
|
2006-07-03 22:11:47 +02:00
|
|
|
extern int zlib_compression_level;
|
Custom compression levels for objects and packs
Add config variables pack.compression and core.loosecompression ,
and switch --compression=level to pack-objects.
Loose objects will be compressed using core.loosecompression if set,
else core.compression if set, else Z_BEST_SPEED.
Packed objects will be compressed using --compression=level if seen,
else pack.compression if set, else core.compression if set,
else Z_DEFAULT_COMPRESSION. This is the "pack compression level".
Loose objects added to a pack undeltified will be recompressed
to the pack compression level if it is unequal to the current
loose compression level by the preceding rules, or if the loose
object was written while core.legacyheaders = true. Newly
deltified loose objects are always compressed to the current
pack compression level.
Previously packed objects added to a pack are recompressed
to the current pack compression level exactly when their
deltification status changes, since the previous pack data
cannot be reused.
In either case, the --no-reuse-object switch from the first
patch below will always force recompression to the current pack
compression level, instead of assuming the pack compression level
hasn't changed and pack data can be reused when possible.
This applies on top of the following patches from Nicolas Pitre:
[PATCH] allow for undeltified objects not to be reused
[PATCH] make "repack -f" imply "pack-objects --no-reuse-object"
Signed-off-by: Dana L. How <danahow@gmail.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-05-09 22:56:50 +02:00
|
|
|
extern int core_compression_level;
|
|
|
|
extern int core_compression_seen;
|
2006-12-23 08:34:28 +01:00
|
|
|
extern size_t packed_git_window_size;
|
2006-12-23 08:33:35 +01:00
|
|
|
extern size_t packed_git_limit;
|
2007-03-19 06:14:37 +01:00
|
|
|
extern size_t delta_base_cache_limit;
|
Lazy man's auto-CRLF
It currently does NOT know about file attributes, so it does its
conversion purely based on content. Maybe that is more in the "git
philosophy" anyway, since content is king, but I think we should try to do
the file attributes to turn it off on demand.
Anyway, BY DEFAULT it is off regardless, because it requires a
[core]
AutoCRLF = true
in your config file to be enabled. We could make that the default for
Windows, of course, the same way we do some other things (filemode etc).
But you can actually enable it on UNIX, and it will cause:
- "git update-index" will write blobs without CRLF
- "git diff" will diff working tree files without CRLF
- "git checkout" will write files to the working tree _with_ CRLF
and things work fine.
Funnily, it actually shows an odd file in git itself:
git clone -n git test-crlf
cd test-crlf
git config core.autocrlf true
git checkout
git diff
shows a diff for "Documentation/docbook-xsl.css". Why? Because we have
actually checked in that file *with* CRLF! So when "core.autocrlf" is
true, we'll always generate a *different* hash for it in the index,
because the index hash will be for the content _without_ CRLF.
Is this complete? I dunno. It seems to work for me. It doesn't use the
filename at all right now, and that's probably a deficiency (we could
certainly make the "is_binary()" heuristics also take standard filename
heuristics into account).
I don't pass in the filename at all for the "index_fd()" case
(git-update-index), so that would need to be passed around, but this
actually works fine.
NOTE NOTE NOTE! The "is_binary()" heuristics are totally made-up by yours
truly. I will not guarantee that they work at all reasonable. Caveat
emptor. But it _is_ simple, and it _is_ safe, since it's all off by
default.
The patch is pretty simple - the biggest part is the new "convert.c" file,
but even that is really just basic stuff that anybody can write in
"Teaching C 101" as a final project for their first class in programming.
Not to say that it's bug-free, of course - but at least we're not talking
about rocket surgery here.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-02-13 20:07:23 +01:00
|
|
|
extern int auto_crlf;
|
2005-10-11 01:31:08 +02:00
|
|
|
|
safecrlf: Add mechanism to warn about irreversible crlf conversions
CRLF conversion bears a slight chance of corrupting data.
autocrlf=true will convert CRLF to LF during commit and LF to
CRLF during checkout. A file that contains a mixture of LF and
CRLF before the commit cannot be recreated by git. For text
files this is the right thing to do: it corrects line endings
such that we have only LF line endings in the repository.
But for binary files that are accidentally classified as text the
conversion can corrupt data.
If you recognize such corruption early you can easily fix it by
setting the conversion type explicitly in .gitattributes. Right
after committing you still have the original file in your work
tree and this file is not yet corrupted. You can explicitly tell
git that this file is binary and git will handle the file
appropriately.
Unfortunately, the desired effect of cleaning up text files with
mixed line endings and the undesired effect of corrupting binary
files cannot be distinguished. In both cases CRLFs are removed
in an irreversible way. For text files this is the right thing
to do because CRLFs are line endings, while for binary files
converting CRLFs corrupts data.
This patch adds a mechanism that can either warn the user about
an irreversible conversion or can even refuse to convert. The
mechanism is controlled by the variable core.safecrlf, with the
following values:
- false: disable safecrlf mechanism
- warn: warn about irreversible conversions
- true: refuse irreversible conversions
The default is to warn. Users are only affected by this default
if core.autocrlf is set. But the current default of git is to
leave core.autocrlf unset, so users will not see warnings unless
they deliberately chose to activate the autocrlf mechanism.
The safecrlf mechanism's details depend on the git command. The
general principles when safecrlf is active (not false) are:
- we warn/error out if files in the work tree can modified in an
irreversible way without giving the user a chance to backup the
original file.
- for read-only operations that do not modify files in the work tree
we do not not print annoying warnings.
There are exceptions. Even though...
- "git add" itself does not touch the files in the work tree, the
next checkout would, so the safety triggers;
- "git apply" to update a text file with a patch does touch the files
in the work tree, but the operation is about text files and CRLF
conversion is about fixing the line ending inconsistencies, so the
safety does not trigger;
- "git diff" itself does not touch the files in the work tree, it is
often run to inspect the changes you intend to next "git add". To
catch potential problems early, safety triggers.
The concept of a safety check was originally proposed in a similar
way by Linus Torvalds. Thanks to Dimitry Potapov for insisting
on getting the naked LF/autocrlf=true case right.
Signed-off-by: Steffen Prohaska <prohaska@zib.de>
2008-02-06 12:25:58 +01:00
|
|
|
enum safe_crlf {
|
|
|
|
SAFE_CRLF_FALSE = 0,
|
|
|
|
SAFE_CRLF_FAIL = 1,
|
|
|
|
SAFE_CRLF_WARN = 2,
|
|
|
|
};
|
|
|
|
|
|
|
|
extern enum safe_crlf safe_crlf;
|
|
|
|
|
2005-11-26 00:59:09 +01:00
|
|
|
#define GIT_REPO_VERSION 0
|
|
|
|
extern int repository_format_version;
|
|
|
|
extern int check_repository_format(void);
|
|
|
|
|
2005-04-09 18:48:20 +02:00
|
|
|
#define MTIME_CHANGED 0x0001
|
|
|
|
#define CTIME_CHANGED 0x0002
|
|
|
|
#define OWNER_CHANGED 0x0004
|
|
|
|
#define MODE_CHANGED 0x0008
|
|
|
|
#define INODE_CHANGED 0x0010
|
|
|
|
#define DATA_CHANGED 0x0020
|
2005-05-05 14:38:25 +02:00
|
|
|
#define TYPE_CHANGED 0x0040
|
2005-04-08 00:13:13 +02:00
|
|
|
|
|
|
|
/* Return a statically allocated filename matching the sha1 signature */
|
2005-08-09 17:30:22 +02:00
|
|
|
extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
|
|
|
|
extern char *git_path(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
|
2005-04-10 23:03:58 +02:00
|
|
|
extern char *sha1_file_name(const unsigned char *sha1);
|
2005-08-01 02:53:44 +02:00
|
|
|
extern char *sha1_pack_name(const unsigned char *sha1);
|
|
|
|
extern char *sha1_pack_index_name(const unsigned char *sha1);
|
2005-10-12 00:22:48 +02:00
|
|
|
extern const char *find_unique_abbrev(const unsigned char *sha1, int);
|
2005-09-30 23:02:47 +02:00
|
|
|
extern const unsigned char null_sha1[20];
|
2006-08-15 22:37:19 +02:00
|
|
|
static inline int is_null_sha1(const unsigned char *sha1)
|
|
|
|
{
|
|
|
|
return !memcmp(sha1, null_sha1, 20);
|
|
|
|
}
|
2006-08-17 20:54:57 +02:00
|
|
|
static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2)
|
|
|
|
{
|
|
|
|
return memcmp(sha1, sha2, 20);
|
|
|
|
}
|
2006-08-23 08:49:00 +02:00
|
|
|
static inline void hashcpy(unsigned char *sha_dst, const unsigned char *sha_src)
|
|
|
|
{
|
|
|
|
memcpy(sha_dst, sha_src, 20);
|
|
|
|
}
|
2006-08-23 22:57:23 +02:00
|
|
|
static inline void hashclr(unsigned char *hash)
|
|
|
|
{
|
|
|
|
memset(hash, 0, 20);
|
|
|
|
}
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2005-08-04 22:43:03 +02:00
|
|
|
int git_mkstemp(char *path, size_t n, const char *template);
|
|
|
|
|
2006-06-10 08:09:49 +02:00
|
|
|
enum sharedrepo {
|
|
|
|
PERM_UMASK = 0,
|
|
|
|
PERM_GROUP,
|
|
|
|
PERM_EVERYBODY
|
|
|
|
};
|
|
|
|
int git_config_perm(const char *var, const char *value);
|
2005-12-22 23:13:56 +01:00
|
|
|
int adjust_shared_perm(const char *path);
|
2005-07-06 10:11:52 +02:00
|
|
|
int safe_create_leading_directories(char *path);
|
2005-11-21 01:52:52 +01:00
|
|
|
char *enter_repo(char *path, int strict);
|
2007-08-01 02:28:59 +02:00
|
|
|
static inline int is_absolute_path(const char *path)
|
|
|
|
{
|
|
|
|
return path[0] == '/';
|
|
|
|
}
|
|
|
|
const char *make_absolute_path(const char *path);
|
2005-07-06 10:11:52 +02:00
|
|
|
|
2005-04-08 00:13:13 +02:00
|
|
|
/* Read and unpack a sha1 file into memory, write memory to a sha1 file */
|
2007-02-26 20:55:59 +01:00
|
|
|
extern int sha1_object_info(const unsigned char *, unsigned long *);
|
|
|
|
extern void * read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size);
|
2007-03-20 21:02:09 +01:00
|
|
|
extern int hash_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1);
|
2005-05-18 14:14:09 +02:00
|
|
|
extern int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *return_sha1);
|
2007-02-26 20:55:59 +01:00
|
|
|
extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *);
|
2005-04-24 03:47:23 +02:00
|
|
|
|
2005-06-03 17:05:39 +02:00
|
|
|
extern int check_sha1_signature(const unsigned char *sha1, void *buf, unsigned long size, const char *type);
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2005-08-03 01:46:29 +02:00
|
|
|
extern int write_sha1_from_fd(const unsigned char *sha1, int fd, char *buffer,
|
|
|
|
size_t bufsize, size_t *bufposn);
|
2005-07-11 00:25:38 +02:00
|
|
|
extern int write_sha1_to_fd(int fd, const unsigned char *sha1);
|
2006-09-01 09:17:47 +02:00
|
|
|
extern int move_temp_to_file(const char *tmpfile, const char *filename);
|
2005-04-24 03:47:23 +02:00
|
|
|
|
2006-09-06 11:12:09 +02:00
|
|
|
extern int has_sha1_pack(const unsigned char *sha1, const char **ignore);
|
2005-04-24 03:47:23 +02:00
|
|
|
extern int has_sha1_file(const unsigned char *sha1);
|
|
|
|
|
2005-08-01 02:53:44 +02:00
|
|
|
extern int has_pack_file(const unsigned char *sha1);
|
|
|
|
extern int has_pack_index(const unsigned char *sha1);
|
|
|
|
|
2007-05-30 19:32:19 +02:00
|
|
|
extern const signed char hexval_table[256];
|
|
|
|
static inline unsigned int hexval(unsigned char c)
|
2006-09-21 01:04:46 +02:00
|
|
|
{
|
|
|
|
return hexval_table[c];
|
|
|
|
}
|
|
|
|
|
2005-04-08 00:13:13 +02:00
|
|
|
/* Convert to/from hex/sha1 representation */
|
2006-01-25 10:03:18 +01:00
|
|
|
#define MINIMUM_ABBREV 4
|
|
|
|
#define DEFAULT_ABBREV 7
|
|
|
|
|
2005-05-02 01:36:56 +02:00
|
|
|
extern int get_sha1(const char *str, unsigned char *sha1);
|
2007-04-23 22:55:05 +02:00
|
|
|
extern int get_sha1_with_mode(const char *str, unsigned char *sha1, unsigned *mode);
|
2005-04-09 21:09:27 +02:00
|
|
|
extern int get_sha1_hex(const char *hex, unsigned char *sha1);
|
|
|
|
extern char *sha1_to_hex(const unsigned char *sha1); /* static buffer result! */
|
2005-09-25 18:59:37 +02:00
|
|
|
extern int read_ref(const char *filename, unsigned char *sha1);
|
2006-09-21 07:02:01 +02:00
|
|
|
extern const char *resolve_ref(const char *path, unsigned char *sha1, int, int *);
|
2007-01-19 10:15:15 +01:00
|
|
|
extern int dwim_ref(const char *str, int len, unsigned char *sha1, char **ref);
|
2007-02-09 01:28:23 +01:00
|
|
|
extern int dwim_log(const char *str, int len, unsigned char *sha1, char **ref);
|
2007-01-19 10:15:15 +01:00
|
|
|
|
add refname_match()
We use at least two rulesets for matching abbreviated refnames with
full refnames (starting with 'refs/'). git-rev-parse and git-fetch
use slightly different rules.
This commit introduces a new function refname_match
(const char *abbrev_name, const char *full_name, const char **rules).
abbrev_name is expanded using the rules and matched against full_name.
If a match is found the function returns true. rules is a NULL-terminate
list of format patterns with "%.*s", for example:
const char *ref_rev_parse_rules[] = {
"%.*s",
"refs/%.*s",
"refs/tags/%.*s",
"refs/heads/%.*s",
"refs/remotes/%.*s",
"refs/remotes/%.*s/HEAD",
NULL
};
Asterisks are included in the format strings because this is the form
required in sha1_name.c. Sharing the list with the functions there is
a good idea to avoid duplicating the rules. Hopefully this
facilitates unified matching rules in the future.
This commit makes the rules used by rev-parse for resolving refs to
sha1s available for string comparison. Before this change, the rules
were buried in get_sha1*() and dwim_ref().
A follow-up commit will refactor the rules used by fetch.
refname_match() will be used for matching refspecs in git-send-pack.
Thanks to Daniel Barkalow <barkalow@iabervon.org> for pointing
out that ref_matches_abbrev in remote.c solves a similar problem
and care should be taken to avoid confusion.
Signed-off-by: Steffen Prohaska <prohaska@zib.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-11 15:01:46 +01:00
|
|
|
extern int refname_match(const char *abbrev_name, const char *full_name, const char **rules);
|
|
|
|
extern const char *ref_rev_parse_rules[];
|
2007-11-11 15:01:48 +01:00
|
|
|
extern const char *ref_fetch_rules[];
|
add refname_match()
We use at least two rulesets for matching abbreviated refnames with
full refnames (starting with 'refs/'). git-rev-parse and git-fetch
use slightly different rules.
This commit introduces a new function refname_match
(const char *abbrev_name, const char *full_name, const char **rules).
abbrev_name is expanded using the rules and matched against full_name.
If a match is found the function returns true. rules is a NULL-terminate
list of format patterns with "%.*s", for example:
const char *ref_rev_parse_rules[] = {
"%.*s",
"refs/%.*s",
"refs/tags/%.*s",
"refs/heads/%.*s",
"refs/remotes/%.*s",
"refs/remotes/%.*s/HEAD",
NULL
};
Asterisks are included in the format strings because this is the form
required in sha1_name.c. Sharing the list with the functions there is
a good idea to avoid duplicating the rules. Hopefully this
facilitates unified matching rules in the future.
This commit makes the rules used by rev-parse for resolving refs to
sha1s available for string comparison. Before this change, the rules
were buried in get_sha1*() and dwim_ref().
A follow-up commit will refactor the rules used by fetch.
refname_match() will be used for matching refspecs in git-send-pack.
Thanks to Daniel Barkalow <barkalow@iabervon.org> for pointing
out that ref_matches_abbrev in remote.c solves a similar problem
and care should be taken to avoid confusion.
Signed-off-by: Steffen Prohaska <prohaska@zib.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-11 15:01:46 +01:00
|
|
|
|
2007-01-26 23:26:10 +01:00
|
|
|
extern int create_symref(const char *ref, const char *refs_heads_master, const char *logmsg);
|
2007-01-02 08:31:08 +01:00
|
|
|
extern int validate_headref(const char *ref);
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2005-05-20 18:09:18 +02:00
|
|
|
extern int base_name_compare(const char *name1, int len1, int mode1, const char *name2, int len2, int mode2);
|
2005-04-09 21:59:11 +02:00
|
|
|
extern int cache_name_compare(const char *name1, int len1, const char *name2, int len2);
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2005-04-29 01:42:27 +02:00
|
|
|
extern void *read_object_with_reference(const unsigned char *sha1,
|
2005-05-18 14:14:09 +02:00
|
|
|
const char *required_type,
|
2005-04-29 01:42:27 +02:00
|
|
|
unsigned long *size,
|
|
|
|
unsigned char *sha1_ret);
|
2005-04-21 03:06:49 +02:00
|
|
|
|
2007-07-14 08:14:52 +02:00
|
|
|
enum date_mode {
|
|
|
|
DATE_NORMAL = 0,
|
|
|
|
DATE_RELATIVE,
|
|
|
|
DATE_SHORT,
|
|
|
|
DATE_LOCAL,
|
|
|
|
DATE_ISO8601,
|
|
|
|
DATE_RFC2822
|
|
|
|
};
|
|
|
|
|
2007-02-27 16:21:04 +01:00
|
|
|
const char *show_date(unsigned long time, int timezone, enum date_mode mode);
|
2005-09-20 00:53:50 +02:00
|
|
|
int parse_date(const char *date, char *buf, int bufsize);
|
2005-04-30 18:46:49 +02:00
|
|
|
void datestamp(char *buf, int bufsize);
|
git's rev-parse.c function show_datestring presumes gnu date
Ok. This is the insane patch to do this.
It really isn't very careful, and the reason I call it "approxidate()"
will become obvious when you look at the code. It is very liberal in what
it accepts, to the point where sometimes the results may not make a whole
lot of sense.
It accepts "last week" as a date string, by virtue of "last" parsing as
the number 1, and it totally ignoring superfluous fluff like "ago", so
"last week" ends up being exactly the same thing as "1 week ago". Fine so
far.
It has strange side effects: "last december" will actually parse as "Dec
1", which actually _does_ turn out right, because it will then notice that
it's not December yet, so it will decide that you must be talking about a
date last year. So it actually gets it right, but it's kind of for the
"wrong" reasons.
It also accepts the numbers 1..10 in string format ("one" .. "ten"), so
you can do "ten weeks ago" or "ten hours ago" and it will do the right
thing.
But it will do some really strange thigns too: the string "this will last
forever", will not recognize anyting but "last", which is recognized as
"1", which since it doesn't understand anything else it will think is the
day of the month. So if you do
gitk --since="this will last forever"
the date will actually parse as the first day of the current month.
And it will parse the string "now" as "now", but only because it doesn't
understand it at all, and it makes everything relative to "now".
Similarly, it doesn't actually parse the "ago" or "from now", so "2 weeks
ago" is exactly the same as "2 weeks from now". It's the current date
minus 14 days.
But hey, it's probably better (and certainly faster) than depending on GNU
date. So now you can portably do things like
gitk --since="two weeks and three days ago"
git log --since="July 5"
git-whatchanged --since="10 hours ago"
git log --since="last october"
and it will actually do exactly what you thought it would do (I think). It
will count 17 days backwards, and it will do so even if you don't have GNU
date installed.
(I don't do "last monday" or similar yet, but I can extend it to that too
if people want).
It was kind of fun trying to write code that uses such totally relaxed
"understanding" of dates yet tries to get it right for the trivial cases.
The result should be mixed with a few strange preprocessor tricks, and be
submitted for the IOCCC ;)
Feel free to try it out, and see how many strange dates it gets right. Or
wrong.
And if you find some interesting (and valid - not "interesting" as in
"strange", but "interesting" as in "I'd be interested in actually doing
this) thing it gets wrong - usually by not understanding it and silently
just doing some strange things - please holler.
Now, as usual this certainly hasn't been getting a lot of testing. But my
code always works, no?
Linus
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-11-15 04:29:06 +01:00
|
|
|
unsigned long approxidate(const char *);
|
2007-09-28 16:17:31 +02:00
|
|
|
enum date_mode parse_date_format(const char *format);
|
2005-04-30 18:46:49 +02:00
|
|
|
|
2007-12-09 02:32:08 +01:00
|
|
|
#define IDENT_WARN_ON_NO_NAME 1
|
|
|
|
#define IDENT_ERROR_ON_NO_NAME 2
|
|
|
|
#define IDENT_NO_DATE 4
|
2006-02-19 05:31:05 +01:00
|
|
|
extern const char *git_author_info(int);
|
|
|
|
extern const char *git_committer_info(int);
|
2007-02-05 02:50:14 +01:00
|
|
|
extern const char *fmt_ident(const char *name, const char *email, const char *date_str, int);
|
2007-12-02 22:43:34 +01:00
|
|
|
extern const char *fmt_name(const char *name, const char *email);
|
2005-07-12 20:49:27 +02:00
|
|
|
|
2005-06-06 06:59:54 +02:00
|
|
|
struct checkout {
|
|
|
|
const char *base_dir;
|
|
|
|
int base_dir_len;
|
|
|
|
unsigned force:1,
|
|
|
|
quiet:1,
|
|
|
|
not_new:1,
|
|
|
|
refresh_cache:1;
|
|
|
|
};
|
|
|
|
|
2007-04-25 16:18:08 +02:00
|
|
|
extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath);
|
Add has_symlink_leading_path() function.
When we are applying a patch that creates a blob at a path, or
when we are switching from a branch that does not have a blob at
the path to another branch that has one, we need to make sure
that there is nothing at the path in the working tree, as such a
file is a local modification made by the user that would be lost
by the operation.
Normally, lstat() on the path and making sure ENOENT is returned
is good enough for that purpose. However there is a twist. We
may be creating a regular file arch/x86_64/boot/Makefile, while
removing an existing symbolic link at arch/x86_64/boot that
points at existing ../i386/boot directory that has Makefile in
it. We always first check without touching filesystem and then
perform the actual operation, so when we verify the new file,
arch/x86_64/boot/Makefile, does not exist, we haven't removed
the symbolic link arc/x86_64/boot symbolic link yet. lstat() on
the file sees through the symbolic link and reports the file is
there, which is not what we want.
The function has_symlink_leading_path() function takes a path,
and sees if any of the leading directory component is a symbolic
link.
When files in a new directory are created, we tend to process
them together because both index and tree are sorted. The
function takes advantage of this and allows the caller to cache
and reuse which symbolic link on the filesystem caused the
function to return true.
The calling sequence would be:
char last_symlink[PATH_MAX];
*last_symlink = '\0';
for each index entry {
if (!lose)
continue;
if (lstat(it))
if (errno == ENOENT)
; /* happy */
else
error;
else if (has_symlink_leading_path(it, last_symlink))
; /* happy */
else
error; /* would lose local changes */
unlink_entry(it, last_symlink);
}
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-05-12 07:11:07 +02:00
|
|
|
extern int has_symlink_leading_path(const char *name, char *last_symlink);
|
2005-06-06 06:59:54 +02:00
|
|
|
|
2005-06-28 23:56:57 +02:00
|
|
|
extern struct alternate_object_database {
|
2005-08-15 02:25:57 +02:00
|
|
|
struct alternate_object_database *next;
|
2005-06-28 23:56:57 +02:00
|
|
|
char *name;
|
2006-01-07 10:33:54 +01:00
|
|
|
char base[FLEX_ARRAY]; /* more */
|
2005-08-15 02:25:57 +02:00
|
|
|
} *alt_odb_list;
|
2005-06-28 23:56:57 +02:00
|
|
|
extern void prepare_alt_odb(void);
|
|
|
|
|
2006-12-23 08:33:44 +01:00
|
|
|
struct pack_window {
|
|
|
|
struct pack_window *next;
|
|
|
|
unsigned char *base;
|
|
|
|
off_t offset;
|
|
|
|
size_t len;
|
|
|
|
unsigned int last_used;
|
|
|
|
unsigned int inuse_cnt;
|
|
|
|
};
|
|
|
|
|
2005-06-28 23:56:57 +02:00
|
|
|
extern struct packed_git {
|
|
|
|
struct packed_git *next;
|
2006-12-23 08:33:44 +01:00
|
|
|
struct pack_window *windows;
|
2006-12-23 08:33:47 +01:00
|
|
|
off_t pack_size;
|
2007-04-09 07:06:28 +02:00
|
|
|
const void *index_data;
|
|
|
|
size_t index_size;
|
|
|
|
uint32_t num_objects;
|
2007-03-16 21:42:50 +01:00
|
|
|
int index_version;
|
2007-04-09 07:06:28 +02:00
|
|
|
time_t mtime;
|
2006-12-23 08:34:01 +01:00
|
|
|
int pack_fd;
|
2005-10-14 00:38:28 +02:00
|
|
|
int pack_local;
|
2005-08-01 02:53:44 +02:00
|
|
|
unsigned char sha1[20];
|
2006-01-07 10:33:54 +01:00
|
|
|
/* something like ".git/objects/pack/xxxxx.pack" */
|
|
|
|
char pack_name[FLEX_ARRAY]; /* more */
|
2005-06-28 23:56:57 +02:00
|
|
|
} *packed_git;
|
2005-07-01 02:15:39 +02:00
|
|
|
|
|
|
|
struct pack_entry {
|
2007-03-07 02:44:30 +01:00
|
|
|
off_t offset;
|
2005-07-01 02:15:39 +02:00
|
|
|
unsigned char sha1[20];
|
|
|
|
struct packed_git *p;
|
|
|
|
};
|
|
|
|
|
2005-07-16 22:55:50 +02:00
|
|
|
struct ref {
|
|
|
|
struct ref *next;
|
|
|
|
unsigned char old_sha1[20];
|
|
|
|
unsigned char new_sha1[20];
|
2007-11-18 10:31:37 +01:00
|
|
|
unsigned int force:1,
|
|
|
|
merge:1,
|
|
|
|
nonfastforward:1,
|
|
|
|
deletion:1;
|
2007-11-17 13:54:27 +01:00
|
|
|
enum {
|
|
|
|
REF_STATUS_NONE = 0,
|
|
|
|
REF_STATUS_OK,
|
|
|
|
REF_STATUS_REJECT_NONFASTFORWARD,
|
|
|
|
REF_STATUS_REJECT_NODELETE,
|
|
|
|
REF_STATUS_UPTODATE,
|
2007-11-17 13:56:03 +01:00
|
|
|
REF_STATUS_REMOTE_REJECT,
|
send-pack: tighten remote error reporting
Previously, we set all ref pushes to 'OK', and then marked
them as errors if the remote reported so. This has the
problem that if the remote dies or fails to report a ref, we
just assume it was OK.
Instead, we use a new non-OK state to indicate that we are
expecting status (if the remote doesn't support the
report-status feature, we fall back on the old behavior).
Thus we can flag refs for which we expected a status, but
got none (conversely, we now also print a warning for refs
for which we get a status, but weren't expecting one).
This also allows us to simplify the receive_status exit
code, since each ref is individually marked with failure
until we get a success response. We can just print the usual
status table, so the user still gets a sense of what we were
trying to do when the failure happened.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-18 08:16:52 +01:00
|
|
|
REF_STATUS_EXPECTING_REPORT,
|
2007-11-17 13:54:27 +01:00
|
|
|
} status;
|
send-pack: tighten remote error reporting
Previously, we set all ref pushes to 'OK', and then marked
them as errors if the remote reported so. This has the
problem that if the remote dies or fails to report a ref, we
just assume it was OK.
Instead, we use a new non-OK state to indicate that we are
expecting status (if the remote doesn't support the
report-status feature, we fall back on the old behavior).
Thus we can flag refs for which we expected a status, but
got none (conversely, we now also print a warning for refs
for which we get a status, but weren't expecting one).
This also allows us to simplify the receive_status exit
code, since each ref is individually marked with failure
until we get a success response. We can just print the usual
status table, so the user still gets a sense of what we were
trying to do when the failure happened.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-18 08:16:52 +01:00
|
|
|
char *remote_status;
|
2005-08-04 01:35:29 +02:00
|
|
|
struct ref *peer_ref; /* when renaming */
|
2006-01-07 10:33:54 +01:00
|
|
|
char name[FLEX_ARRAY]; /* more */
|
2005-07-16 22:55:50 +02:00
|
|
|
};
|
|
|
|
|
Improve git-peek-remote
This makes git-peek-remote able to basically do everything that
git-ls-remote does (but obviously just for the native protocol, so no
http[s]: or rsync: support).
The default behaviour is the same, but you can now give a mixture of
"--refs", "--tags" and "--heads" flags, where "--refs" forces
git-peek-remote to only show real refs (ie none of the fakey tag lookups,
but also not the special pseudo-refs like HEAD and MERGE_HEAD).
The "--tags" and "--heads" flags respectively limit the output to just
regular tags and heads, of course.
You can still also ask to limit them by name too.
You can combine the flags, so
git peek-remote --refs --tags .
will show all local _true_ tags, without the generated tag lookups
(compare the output without the "--refs" flag).
And "--tags --heads" will show both tags and heads, but will avoid (for
example) any special refs outside of the standard locations.
I'm also planning on adding a "--ignore-local" flag that allows us to ask
it to ignore any refs that we already have in the local tree, but that's
an independent thing.
All this is obviously gearing up to making "git fetch" cheaper.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-07-04 21:29:10 +02:00
|
|
|
#define REF_NORMAL (1u << 0)
|
|
|
|
#define REF_HEADS (1u << 1)
|
|
|
|
#define REF_TAGS (1u << 2)
|
|
|
|
|
2007-11-18 08:13:10 +01:00
|
|
|
extern struct ref *find_ref_by_name(struct ref *list, const char *name);
|
|
|
|
|
2007-05-16 19:09:41 +02:00
|
|
|
#define CONNECT_VERBOSE (1u << 0)
|
2007-10-30 02:05:40 +01:00
|
|
|
extern struct child_process *git_connect(int fd[2], const char *url, const char *prog, int flags);
|
2007-10-19 21:47:53 +02:00
|
|
|
extern int finish_connect(struct child_process *conn);
|
2005-07-04 22:24:30 +02:00
|
|
|
extern int path_match(const char *path, int nr, char **match);
|
2005-07-06 00:44:09 +02:00
|
|
|
extern int get_ack(int fd, unsigned char *result_sha1);
|
Improve git-peek-remote
This makes git-peek-remote able to basically do everything that
git-ls-remote does (but obviously just for the native protocol, so no
http[s]: or rsync: support).
The default behaviour is the same, but you can now give a mixture of
"--refs", "--tags" and "--heads" flags, where "--refs" forces
git-peek-remote to only show real refs (ie none of the fakey tag lookups,
but also not the special pseudo-refs like HEAD and MERGE_HEAD).
The "--tags" and "--heads" flags respectively limit the output to just
regular tags and heads, of course.
You can still also ask to limit them by name too.
You can combine the flags, so
git peek-remote --refs --tags .
will show all local _true_ tags, without the generated tag lookups
(compare the output without the "--refs" flag).
And "--tags --heads" will show both tags and heads, but will avoid (for
example) any special refs outside of the standard locations.
I'm also planning on adding a "--ignore-local" flag that allows us to ask
it to ignore any refs that we already have in the local tree, but that's
an independent thing.
All this is obviously gearing up to making "git fetch" cheaper.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-07-04 21:29:10 +02:00
|
|
|
extern struct ref **get_remote_heads(int in, struct ref **list, int nr_match, char **match, unsigned int flags);
|
2005-10-28 04:48:54 +02:00
|
|
|
extern int server_supports(const char *feature);
|
2005-07-04 20:57:58 +02:00
|
|
|
|
2005-08-01 02:53:44 +02:00
|
|
|
extern struct packed_git *parse_pack_index(unsigned char *sha1);
|
2005-09-02 14:17:10 +02:00
|
|
|
extern struct packed_git *parse_pack_index_file(const unsigned char *sha1,
|
2007-03-16 21:42:50 +01:00
|
|
|
const char *idx_path);
|
2005-08-01 02:53:44 +02:00
|
|
|
|
2005-06-28 23:56:57 +02:00
|
|
|
extern void prepare_packed_git(void);
|
2006-11-01 23:06:21 +01:00
|
|
|
extern void reprepare_packed_git(void);
|
2005-08-01 02:53:44 +02:00
|
|
|
extern void install_packed_git(struct packed_git *pack);
|
|
|
|
|
2007-06-07 09:04:01 +02:00
|
|
|
extern struct packed_git *find_sha1_pack(const unsigned char *sha1,
|
2005-08-01 02:53:44 +02:00
|
|
|
struct packed_git *packs);
|
|
|
|
|
2007-01-17 07:28:02 +01:00
|
|
|
extern void pack_report(void);
|
2007-05-26 07:24:19 +02:00
|
|
|
extern int open_pack_index(struct packed_git *);
|
2007-03-07 02:44:30 +01:00
|
|
|
extern unsigned char* use_pack(struct packed_git *, struct pack_window **, off_t, unsigned int *);
|
2008-01-18 04:57:00 +01:00
|
|
|
extern void close_pack_windows(struct packed_git *);
|
2006-12-23 08:34:08 +01:00
|
|
|
extern void unuse_pack(struct pack_window **);
|
2007-03-16 21:42:50 +01:00
|
|
|
extern struct packed_git *add_packed_git(const char *, int, int);
|
2007-05-26 07:24:19 +02:00
|
|
|
extern const unsigned char *nth_packed_object_sha1(struct packed_git *, uint32_t);
|
2007-03-07 02:44:30 +01:00
|
|
|
extern off_t find_pack_entry_one(const unsigned char *, struct packed_git *);
|
|
|
|
extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *);
|
2006-09-04 06:09:18 +02:00
|
|
|
extern unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
|
2007-04-16 18:31:56 +02:00
|
|
|
extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
|
2007-03-07 02:44:30 +01:00
|
|
|
extern const char *packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *);
|
2007-09-17 08:15:19 +02:00
|
|
|
extern int matches_pack_name(struct packed_git *p, const char *name);
|
2005-06-28 23:56:57 +02:00
|
|
|
|
[PATCH] Add update-server-info.
The git-update-server-info command prepares informational files
to help clients discover the contents of a repository, and pull
from it via a dumb transport protocols. Currently, the
following files are produced.
- The $repo/info/refs file lists the name of heads and tags
available in the $repo/refs/ directory, along with their
SHA1. This can be used by git-ls-remote command running on
the client side.
- The $repo/info/rev-cache file describes the commit ancestry
reachable from references in the $repo/refs/ directory. This
file is in an append-only binary format to make the server
side friendly to rsync mirroring scheme, and can be read by
git-show-rev-cache command.
- The $repo/objects/info/pack file lists the name of the packs
available, the interdependencies among them, and the head
commits and tags contained in them. Along with the other two
files, this is designed to help clients to make smart pull
decisions.
The git-receive-pack command is changed to invoke it at the end,
so just after a push to a public repository finishes via "git
push", the server info is automatically updated.
In addition, building of the rev-cache file can be done by a
standalone git-build-rev-cache command separately.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-07-24 02:54:41 +02:00
|
|
|
/* Dumb servers support */
|
|
|
|
extern int update_server_info(int);
|
|
|
|
|
2005-10-11 01:31:08 +02:00
|
|
|
typedef int (*config_fn_t)(const char *, const char *);
|
|
|
|
extern int git_default_config(const char *, const char *);
|
2005-11-26 01:03:56 +01:00
|
|
|
extern int git_config_from_file(config_fn_t fn, const char *);
|
2005-10-11 01:31:08 +02:00
|
|
|
extern int git_config(config_fn_t fn);
|
2007-07-12 15:32:26 +02:00
|
|
|
extern int git_parse_long(const char *, long *);
|
|
|
|
extern int git_parse_ulong(const char *, unsigned long *);
|
2005-10-11 01:31:08 +02:00
|
|
|
extern int git_config_int(const char *, const char *);
|
2007-07-12 15:32:26 +02:00
|
|
|
extern unsigned long git_config_ulong(const char *, const char *);
|
2005-10-11 01:31:08 +02:00
|
|
|
extern int git_config_bool(const char *, const char *);
|
2008-02-16 06:00:24 +01:00
|
|
|
extern int git_config_string(const char **, const char *, const char *);
|
2005-11-17 22:32:36 +01:00
|
|
|
extern int git_config_set(const char *, const char *);
|
2005-11-20 06:52:22 +01:00
|
|
|
extern int git_config_set_multivar(const char *, const char *, const char *, int);
|
2006-12-16 15:14:14 +01:00
|
|
|
extern int git_config_rename_section(const char *, const char *);
|
2007-11-13 21:05:05 +01:00
|
|
|
extern const char *git_etc_gitconfig(void);
|
2005-11-26 00:59:09 +01:00
|
|
|
extern int check_repository_format_version(const char *var, const char *value);
|
2008-02-06 11:11:18 +01:00
|
|
|
extern int git_env_bool(const char *, int);
|
|
|
|
extern int git_config_system(void);
|
|
|
|
extern int git_config_global(void);
|
2008-02-11 19:41:18 +01:00
|
|
|
extern int config_error_nonbool(const char *);
|
2005-10-11 01:31:08 +02:00
|
|
|
|
2005-10-12 03:47:34 +02:00
|
|
|
#define MAX_GITNAME (1000)
|
|
|
|
extern char git_default_email[MAX_GITNAME];
|
|
|
|
extern char git_default_name[MAX_GITNAME];
|
|
|
|
|
2007-03-12 20:33:18 +01:00
|
|
|
extern const char *git_commit_encoding;
|
2007-03-07 02:44:17 +01:00
|
|
|
extern const char *git_log_output_encoding;
|
2005-11-28 01:09:40 +01:00
|
|
|
|
2007-06-29 19:40:46 +02:00
|
|
|
/* IO helper functions */
|
|
|
|
extern void maybe_flush_or_die(FILE *, const char *);
|
2005-10-22 10:28:13 +02:00
|
|
|
extern int copy_fd(int ifd, int ofd);
|
2008-02-25 20:24:48 +01:00
|
|
|
extern int copy_file(const char *dst, const char *src, int mode);
|
2007-01-08 16:58:08 +01:00
|
|
|
extern int read_in_full(int fd, void *buf, size_t count);
|
2007-01-08 16:57:52 +01:00
|
|
|
extern int write_in_full(int fd, const void *buf, size_t count);
|
2006-08-21 20:43:43 +02:00
|
|
|
extern void write_or_die(int fd, const void *buf, size_t count);
|
2006-09-02 18:23:48 +02:00
|
|
|
extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg);
|
2007-01-08 16:57:52 +01:00
|
|
|
extern int write_or_whine_pipe(int fd, const void *buf, size_t count, const char *msg);
|
2005-12-15 07:17:38 +01:00
|
|
|
|
2006-02-28 20:26:21 +01:00
|
|
|
/* pager.c */
|
|
|
|
extern void setup_pager(void);
|
2008-02-16 06:01:11 +01:00
|
|
|
extern const char *pager_program;
|
2007-12-11 07:27:33 +01:00
|
|
|
extern int pager_in_use(void);
|
2006-07-30 00:27:43 +02:00
|
|
|
extern int pager_use_color;
|
2006-02-28 20:26:21 +01:00
|
|
|
|
2008-02-16 06:01:41 +01:00
|
|
|
extern const char *editor_program;
|
2008-02-16 06:01:59 +01:00
|
|
|
extern const char *excludes_file;
|
2007-07-20 14:06:09 +02:00
|
|
|
|
binary patch.
This adds "binary patch" to the diff output and teaches apply
what to do with them.
On the diff generation side, traditionally, we said "Binary
files differ\n" without giving anything other than the preimage
and postimage object name on the index line. This was good
enough for applying a patch generated from your own repository
(very useful while rebasing), because the postimage would be
available in such a case. However, this was not useful when the
recipient of such a patch via e-mail were to apply it, even if
the preimage was available.
This patch allows the diff to generate "binary" patch when
operating under --full-index option. The binary patch follows
the usual extended git diff headers, and looks like this:
"GIT binary patch\n"
<length byte><data>"\n"
...
"\n"
Each line is prefixed with a "length-byte", whose value is upper
or lowercase alphabet that encodes number of bytes that the data
on the line decodes to (1..52 -- 'A' means 1, 'B' means 2, ...,
'Z' means 26, 'a' means 27, ...). <data> is 1 or more groups of
5-byte sequence, each of which encodes up to 4 bytes in base85
encoding. Because 52 / 4 * 5 = 65 and we have the length byte,
an output line is capped to 66 characters. The payload is the
same diff-delta as we use in the packfiles.
On the consumption side, git-apply now can decode and apply the
binary patch when --allow-binary-replacement is given, the diff
was generated with --full-index, and the receiving repository
has the preimage blob, which is the same condition as it always
required when accepting an "Binary files differ\n" patch.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-05-05 01:51:44 +02:00
|
|
|
/* base85 */
|
2007-04-10 00:56:33 +02:00
|
|
|
int decode_85(char *dst, const char *line, int linelen);
|
|
|
|
void encode_85(char *buf, const unsigned char *data, int bytes);
|
binary patch.
This adds "binary patch" to the diff output and teaches apply
what to do with them.
On the diff generation side, traditionally, we said "Binary
files differ\n" without giving anything other than the preimage
and postimage object name on the index line. This was good
enough for applying a patch generated from your own repository
(very useful while rebasing), because the postimage would be
available in such a case. However, this was not useful when the
recipient of such a patch via e-mail were to apply it, even if
the preimage was available.
This patch allows the diff to generate "binary" patch when
operating under --full-index option. The binary patch follows
the usual extended git diff headers, and looks like this:
"GIT binary patch\n"
<length byte><data>"\n"
...
"\n"
Each line is prefixed with a "length-byte", whose value is upper
or lowercase alphabet that encodes number of bytes that the data
on the line decodes to (1..52 -- 'A' means 1, 'B' means 2, ...,
'Z' means 26, 'a' means 27, ...). <data> is 1 or more groups of
5-byte sequence, each of which encodes up to 4 bytes in base85
encoding. Because 52 / 4 * 5 = 65 and we have the length byte,
an output line is capped to 66 characters. The payload is the
same diff-delta as we use in the packfiles.
On the consumption side, git-apply now can decode and apply the
binary patch when --allow-binary-replacement is given, the diff
was generated with --full-index, and the receiving repository
has the preimage blob, which is the same condition as it always
required when accepting an "Binary files differ\n" patch.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-05-05 01:51:44 +02:00
|
|
|
|
Add specialized object allocator
This creates a simple specialized object allocator for basic
objects.
This avoids wasting space with malloc overhead (metadata and
extra alignment), since the specialized allocator knows the
alignment, and that objects, once allocated, are never freed.
It also allows us to track some basic statistics about object
allocations. For example, for the mozilla import, it shows
object usage as follows:
blobs: 627629 (14710 kB)
trees: 1119035 (34969 kB)
commits: 196423 (8440 kB)
tags: 1336 (46 kB)
and the simpler allocator shaves off about 2.5% off the memory
footprint off a "git-rev-list --all --objects", and is a bit
faster too.
[ Side note: this concludes the series of "save memory in object storage".
The thing is, there simply isn't much more to be saved on the objects.
Doing "git-rev-list --all --objects" on the mozilla archive has a final
total RSS of 131498 pages for me: that's about 513MB. Of that, the
object overhead is now just 56MB, the rest is going somewhere else (put
another way: the fact that this patch shaves off 2.5% of the total
memory overhead, considering that objects are now not much more than 10%
of the total shows how big the wasted space really was: this makes
object allocations much more memory- and time-efficient).
I haven't looked at where the rest is, but I suspect the bulk of it is
just the pack-file loading. It may be that we should pack the tree
objects separately from the blob objects: for git-rev-list --objects, we
don't actually ever need to even look at the blobs, but since trees and
blobs are interspersed in the pack-file, we end up not being dense in
the tree accesses, so we end up looking at more pages than we strictly
need to.
So with a 535MB pack-file, it's entirely possible - even likely - that
most of the remaining RSS is just the mmap of the pack-file itself. We
don't need to map in _all_ of it, but we do end up mapping a fair
amount. ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-19 19:44:15 +02:00
|
|
|
/* alloc.c */
|
2007-04-17 07:11:43 +02:00
|
|
|
extern void *alloc_blob_node(void);
|
|
|
|
extern void *alloc_tree_node(void);
|
|
|
|
extern void *alloc_commit_node(void);
|
|
|
|
extern void *alloc_tag_node(void);
|
|
|
|
extern void *alloc_object_node(void);
|
Add specialized object allocator
This creates a simple specialized object allocator for basic
objects.
This avoids wasting space with malloc overhead (metadata and
extra alignment), since the specialized allocator knows the
alignment, and that objects, once allocated, are never freed.
It also allows us to track some basic statistics about object
allocations. For example, for the mozilla import, it shows
object usage as follows:
blobs: 627629 (14710 kB)
trees: 1119035 (34969 kB)
commits: 196423 (8440 kB)
tags: 1336 (46 kB)
and the simpler allocator shaves off about 2.5% off the memory
footprint off a "git-rev-list --all --objects", and is a bit
faster too.
[ Side note: this concludes the series of "save memory in object storage".
The thing is, there simply isn't much more to be saved on the objects.
Doing "git-rev-list --all --objects" on the mozilla archive has a final
total RSS of 131498 pages for me: that's about 513MB. Of that, the
object overhead is now just 56MB, the rest is going somewhere else (put
another way: the fact that this patch shaves off 2.5% of the total
memory overhead, considering that objects are now not much more than 10%
of the total shows how big the wasted space really was: this makes
object allocations much more memory- and time-efficient).
I haven't looked at where the rest is, but I suspect the bulk of it is
just the pack-file loading. It may be that we should pack the tree
objects separately from the blob objects: for git-rev-list --objects, we
don't actually ever need to even look at the blobs, but since trees and
blobs are interspersed in the pack-file, we end up not being dense in
the tree accesses, so we end up looking at more pages than we strictly
need to.
So with a 535MB pack-file, it's entirely possible - even likely - that
most of the remaining RSS is just the mmap of the pack-file itself. We
don't need to map in _all_ of it, but we do end up mapping a fair
amount. ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-19 19:44:15 +02:00
|
|
|
extern void alloc_report(void);
|
|
|
|
|
2006-09-02 18:23:48 +02:00
|
|
|
/* trace.c */
|
|
|
|
extern void trace_printf(const char *format, ...);
|
2007-12-03 05:51:50 +01:00
|
|
|
extern void trace_argv_printf(const char **argv, const char *format, ...);
|
2006-09-02 18:23:48 +02:00
|
|
|
|
Lazy man's auto-CRLF
It currently does NOT know about file attributes, so it does its
conversion purely based on content. Maybe that is more in the "git
philosophy" anyway, since content is king, but I think we should try to do
the file attributes to turn it off on demand.
Anyway, BY DEFAULT it is off regardless, because it requires a
[core]
AutoCRLF = true
in your config file to be enabled. We could make that the default for
Windows, of course, the same way we do some other things (filemode etc).
But you can actually enable it on UNIX, and it will cause:
- "git update-index" will write blobs without CRLF
- "git diff" will diff working tree files without CRLF
- "git checkout" will write files to the working tree _with_ CRLF
and things work fine.
Funnily, it actually shows an odd file in git itself:
git clone -n git test-crlf
cd test-crlf
git config core.autocrlf true
git checkout
git diff
shows a diff for "Documentation/docbook-xsl.css". Why? Because we have
actually checked in that file *with* CRLF! So when "core.autocrlf" is
true, we'll always generate a *different* hash for it in the index,
because the index hash will be for the content _without_ CRLF.
Is this complete? I dunno. It seems to work for me. It doesn't use the
filename at all right now, and that's probably a deficiency (we could
certainly make the "is_binary()" heuristics also take standard filename
heuristics into account).
I don't pass in the filename at all for the "index_fd()" case
(git-update-index), so that would need to be passed around, but this
actually works fine.
NOTE NOTE NOTE! The "is_binary()" heuristics are totally made-up by yours
truly. I will not guarantee that they work at all reasonable. Caveat
emptor. But it _is_ simple, and it _is_ safe, since it's all off by
default.
The patch is pretty simple - the biggest part is the new "convert.c" file,
but even that is really just basic stuff that anybody can write in
"Teaching C 101" as a final project for their first class in programming.
Not to say that it's bug-free, of course - but at least we're not talking
about rocket surgery here.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-02-13 20:07:23 +01:00
|
|
|
/* convert.c */
|
Rewrite convert_to_{git,working_tree} to use strbuf's.
* Now, those functions take an "out" strbuf argument, where they store their
result if any. In that case, it also returns 1, else it returns 0.
* those functions support "in place" editing, in the sense that it's OK to
call them this way:
convert_to_git(path, sb->buf, sb->len, sb);
When doable, conversions are done in place for real, else the strbuf
content is just replaced with the new one, transparentely for the caller.
If you want to create a new filter working this way, being the accumulation
of filter1, filter2, ... filtern, then your meta_filter would be:
int meta_filter(..., const char *src, size_t len, struct strbuf *sb)
{
int ret = 0;
ret |= filter1(...., src, len, sb);
if (ret) {
src = sb->buf;
len = sb->len;
}
ret |= filter2(...., src, len, sb);
if (ret) {
src = sb->buf;
len = sb->len;
}
....
return ret | filtern(..., src, len, sb);
}
That's why subfilters the convert_to_* functions called were also rewritten
to work this way.
Signed-off-by: Pierre Habouzit <madcoder@debian.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-09-16 15:51:04 +02:00
|
|
|
/* returns 1 if *dst was used */
|
safecrlf: Add mechanism to warn about irreversible crlf conversions
CRLF conversion bears a slight chance of corrupting data.
autocrlf=true will convert CRLF to LF during commit and LF to
CRLF during checkout. A file that contains a mixture of LF and
CRLF before the commit cannot be recreated by git. For text
files this is the right thing to do: it corrects line endings
such that we have only LF line endings in the repository.
But for binary files that are accidentally classified as text the
conversion can corrupt data.
If you recognize such corruption early you can easily fix it by
setting the conversion type explicitly in .gitattributes. Right
after committing you still have the original file in your work
tree and this file is not yet corrupted. You can explicitly tell
git that this file is binary and git will handle the file
appropriately.
Unfortunately, the desired effect of cleaning up text files with
mixed line endings and the undesired effect of corrupting binary
files cannot be distinguished. In both cases CRLFs are removed
in an irreversible way. For text files this is the right thing
to do because CRLFs are line endings, while for binary files
converting CRLFs corrupts data.
This patch adds a mechanism that can either warn the user about
an irreversible conversion or can even refuse to convert. The
mechanism is controlled by the variable core.safecrlf, with the
following values:
- false: disable safecrlf mechanism
- warn: warn about irreversible conversions
- true: refuse irreversible conversions
The default is to warn. Users are only affected by this default
if core.autocrlf is set. But the current default of git is to
leave core.autocrlf unset, so users will not see warnings unless
they deliberately chose to activate the autocrlf mechanism.
The safecrlf mechanism's details depend on the git command. The
general principles when safecrlf is active (not false) are:
- we warn/error out if files in the work tree can modified in an
irreversible way without giving the user a chance to backup the
original file.
- for read-only operations that do not modify files in the work tree
we do not not print annoying warnings.
There are exceptions. Even though...
- "git add" itself does not touch the files in the work tree, the
next checkout would, so the safety triggers;
- "git apply" to update a text file with a patch does touch the files
in the work tree, but the operation is about text files and CRLF
conversion is about fixing the line ending inconsistencies, so the
safety does not trigger;
- "git diff" itself does not touch the files in the work tree, it is
often run to inspect the changes you intend to next "git add". To
catch potential problems early, safety triggers.
The concept of a safety check was originally proposed in a similar
way by Linus Torvalds. Thanks to Dimitry Potapov for insisting
on getting the naked LF/autocrlf=true case right.
Signed-off-by: Steffen Prohaska <prohaska@zib.de>
2008-02-06 12:25:58 +01:00
|
|
|
extern int convert_to_git(const char *path, const char *src, size_t len,
|
|
|
|
struct strbuf *dst, enum safe_crlf checksafe);
|
Rewrite convert_to_{git,working_tree} to use strbuf's.
* Now, those functions take an "out" strbuf argument, where they store their
result if any. In that case, it also returns 1, else it returns 0.
* those functions support "in place" editing, in the sense that it's OK to
call them this way:
convert_to_git(path, sb->buf, sb->len, sb);
When doable, conversions are done in place for real, else the strbuf
content is just replaced with the new one, transparentely for the caller.
If you want to create a new filter working this way, being the accumulation
of filter1, filter2, ... filtern, then your meta_filter would be:
int meta_filter(..., const char *src, size_t len, struct strbuf *sb)
{
int ret = 0;
ret |= filter1(...., src, len, sb);
if (ret) {
src = sb->buf;
len = sb->len;
}
ret |= filter2(...., src, len, sb);
if (ret) {
src = sb->buf;
len = sb->len;
}
....
return ret | filtern(..., src, len, sb);
}
That's why subfilters the convert_to_* functions called were also rewritten
to work this way.
Signed-off-by: Pierre Habouzit <madcoder@debian.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-09-16 15:51:04 +02:00
|
|
|
extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst);
|
Lazy man's auto-CRLF
It currently does NOT know about file attributes, so it does its
conversion purely based on content. Maybe that is more in the "git
philosophy" anyway, since content is king, but I think we should try to do
the file attributes to turn it off on demand.
Anyway, BY DEFAULT it is off regardless, because it requires a
[core]
AutoCRLF = true
in your config file to be enabled. We could make that the default for
Windows, of course, the same way we do some other things (filemode etc).
But you can actually enable it on UNIX, and it will cause:
- "git update-index" will write blobs without CRLF
- "git diff" will diff working tree files without CRLF
- "git checkout" will write files to the working tree _with_ CRLF
and things work fine.
Funnily, it actually shows an odd file in git itself:
git clone -n git test-crlf
cd test-crlf
git config core.autocrlf true
git checkout
git diff
shows a diff for "Documentation/docbook-xsl.css". Why? Because we have
actually checked in that file *with* CRLF! So when "core.autocrlf" is
true, we'll always generate a *different* hash for it in the index,
because the index hash will be for the content _without_ CRLF.
Is this complete? I dunno. It seems to work for me. It doesn't use the
filename at all right now, and that's probably a deficiency (we could
certainly make the "is_binary()" heuristics also take standard filename
heuristics into account).
I don't pass in the filename at all for the "index_fd()" case
(git-update-index), so that would need to be passed around, but this
actually works fine.
NOTE NOTE NOTE! The "is_binary()" heuristics are totally made-up by yours
truly. I will not guarantee that they work at all reasonable. Caveat
emptor. But it _is_ simple, and it _is_ safe, since it's all off by
default.
The patch is pretty simple - the biggest part is the new "convert.c" file,
but even that is really just basic stuff that anybody can write in
"Teaching C 101" as a final project for their first class in programming.
Not to say that it's bug-free, of course - but at least we're not talking
about rocket surgery here.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-02-13 20:07:23 +01:00
|
|
|
|
2007-11-18 10:12:04 +01:00
|
|
|
/* add */
|
|
|
|
void add_files_to_cache(int verbose, const char *prefix, const char **pathspec);
|
|
|
|
|
2007-08-31 22:13:42 +02:00
|
|
|
/* diff.c */
|
|
|
|
extern int diff_auto_refresh_index;
|
|
|
|
|
2007-02-16 01:32:45 +01:00
|
|
|
/* match-trees.c */
|
|
|
|
void shift_tree(const unsigned char *, const unsigned char *, unsigned char *, int);
|
|
|
|
|
2007-11-02 08:24:27 +01:00
|
|
|
/*
|
|
|
|
* whitespace rules.
|
|
|
|
* used by both diff and apply
|
|
|
|
*/
|
|
|
|
#define WS_TRAILING_SPACE 01
|
|
|
|
#define WS_SPACE_BEFORE_TAB 02
|
2007-10-03 03:00:27 +02:00
|
|
|
#define WS_INDENT_WITH_NON_TAB 04
|
2008-01-15 09:59:05 +01:00
|
|
|
#define WS_CR_AT_EOL 010
|
2007-11-02 08:24:27 +01:00
|
|
|
#define WS_DEFAULT_RULE (WS_TRAILING_SPACE|WS_SPACE_BEFORE_TAB)
|
2007-12-06 09:14:14 +01:00
|
|
|
extern unsigned whitespace_rule_cfg;
|
|
|
|
extern unsigned whitespace_rule(const char *);
|
|
|
|
extern unsigned parse_whitespace_rule(const char *);
|
2007-12-13 14:32:29 +01:00
|
|
|
extern unsigned check_and_emit_line(const char *line, int len, unsigned ws_rule,
|
|
|
|
FILE *stream, const char *set,
|
|
|
|
const char *reset, const char *ws);
|
|
|
|
extern char *whitespace_error_string(unsigned ws);
|
2008-02-24 01:59:16 +01:00
|
|
|
extern int ws_fix_copy(char *, const char *, int, unsigned, int *);
|
2007-11-02 08:24:27 +01:00
|
|
|
|
2007-11-18 10:13:32 +01:00
|
|
|
/* ls-files */
|
|
|
|
int pathspec_match(const char **spec, char *matched, const char *filename, int skiplen);
|
|
|
|
int report_path_error(const char *ps_matched, const char **pathspec, int prefix_offset);
|
|
|
|
void overlay_tree_on_cache(const char *tree_name, const char *prefix);
|
|
|
|
|
2005-04-08 00:13:13 +02:00
|
|
|
#endif /* CACHE_H */
|