2005-04-08 00:13:13 +02:00
|
|
|
#ifndef CACHE_H
|
|
|
|
#define CACHE_H
|
|
|
|
|
2005-12-05 20:54:29 +01:00
|
|
|
#include "git-compat-util.h"
|
Rewrite convert_to_{git,working_tree} to use strbuf's.
* Now, those functions take an "out" strbuf argument, where they store their
result if any. In that case, it also returns 1, else it returns 0.
* those functions support "in place" editing, in the sense that it's OK to
call them this way:
convert_to_git(path, sb->buf, sb->len, sb);
When doable, conversions are done in place for real, else the strbuf
content is just replaced with the new one, transparentely for the caller.
If you want to create a new filter working this way, being the accumulation
of filter1, filter2, ... filtern, then your meta_filter would be:
int meta_filter(..., const char *src, size_t len, struct strbuf *sb)
{
int ret = 0;
ret |= filter1(...., src, len, sb);
if (ret) {
src = sb->buf;
len = sb->len;
}
ret |= filter2(...., src, len, sb);
if (ret) {
src = sb->buf;
len = sb->len;
}
....
return ret | filtern(..., src, len, sb);
}
That's why subfilters the convert_to_* functions called were also rewritten
to work this way.
Signed-off-by: Pierre Habouzit <madcoder@debian.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-09-16 15:51:04 +02:00
|
|
|
#include "strbuf.h"
|
2013-11-14 20:20:58 +01:00
|
|
|
#include "hashmap.h"
|
2009-09-09 13:38:58 +02:00
|
|
|
#include "advice.h"
|
2011-02-23 00:41:20 +01:00
|
|
|
#include "gettext.h"
|
2011-05-20 21:59:01 +02:00
|
|
|
#include "convert.h"
|
2014-06-11 09:56:49 +02:00
|
|
|
#include "trace.h"
|
2014-08-07 13:59:17 +02:00
|
|
|
#include "string-list.h"
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2005-04-21 21:33:22 +02:00
|
|
|
#include SHA1_HEADER
|
2008-10-01 20:05:20 +02:00
|
|
|
#ifndef git_SHA_CTX
|
|
|
|
#define git_SHA_CTX SHA_CTX
|
|
|
|
#define git_SHA1_Init SHA1_Init
|
|
|
|
#define git_SHA1_Update SHA1_Update
|
|
|
|
#define git_SHA1_Final SHA1_Final
|
|
|
|
#endif
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2008-10-01 20:05:20 +02:00
|
|
|
#include <zlib.h>
|
2011-06-10 20:52:15 +02:00
|
|
|
typedef struct git_zstream {
|
|
|
|
z_stream z;
|
|
|
|
unsigned long avail_in;
|
|
|
|
unsigned long avail_out;
|
|
|
|
unsigned long total_in;
|
|
|
|
unsigned long total_out;
|
|
|
|
unsigned char *next_in;
|
|
|
|
unsigned char *next_out;
|
|
|
|
} git_zstream;
|
|
|
|
|
|
|
|
void git_inflate_init(git_zstream *);
|
|
|
|
void git_inflate_init_gzip_only(git_zstream *);
|
|
|
|
void git_inflate_end(git_zstream *);
|
|
|
|
int git_inflate(git_zstream *, int flush);
|
|
|
|
|
|
|
|
void git_deflate_init(git_zstream *, int level);
|
|
|
|
void git_deflate_init_gzip(git_zstream *, int level);
|
2013-03-15 23:21:51 +01:00
|
|
|
void git_deflate_init_raw(git_zstream *, int level);
|
2011-06-10 20:52:15 +02:00
|
|
|
void git_deflate_end(git_zstream *);
|
2011-10-28 23:48:40 +02:00
|
|
|
int git_deflate_abort(git_zstream *);
|
2011-06-10 20:52:15 +02:00
|
|
|
int git_deflate_end_gently(git_zstream *);
|
|
|
|
int git_deflate(git_zstream *, int flush);
|
|
|
|
unsigned long git_deflate_bound(git_zstream *, unsigned long);
|
2009-01-08 04:54:47 +01:00
|
|
|
|
2015-03-14 00:39:27 +01:00
|
|
|
/* The length in bytes and in hex digits of an object name (SHA-1 value). */
|
|
|
|
#define GIT_SHA1_RAWSZ 20
|
|
|
|
#define GIT_SHA1_HEXSZ (2 * GIT_SHA1_RAWSZ)
|
|
|
|
|
|
|
|
struct object_id {
|
|
|
|
unsigned char hash[GIT_SHA1_RAWSZ];
|
|
|
|
};
|
|
|
|
|
2006-02-26 16:13:46 +01:00
|
|
|
#if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT)
|
2005-04-30 18:51:03 +02:00
|
|
|
#define DTYPE(de) ((de)->d_type)
|
|
|
|
#else
|
2006-01-20 22:33:20 +01:00
|
|
|
#undef DT_UNKNOWN
|
|
|
|
#undef DT_DIR
|
|
|
|
#undef DT_REG
|
|
|
|
#undef DT_LNK
|
2005-04-30 18:51:03 +02:00
|
|
|
#define DT_UNKNOWN 0
|
|
|
|
#define DT_DIR 1
|
|
|
|
#define DT_REG 2
|
2005-05-13 02:16:04 +02:00
|
|
|
#define DT_LNK 3
|
2005-04-30 18:51:03 +02:00
|
|
|
#define DTYPE(de) DT_UNKNOWN
|
|
|
|
#endif
|
|
|
|
|
2007-04-22 18:43:56 +02:00
|
|
|
/* unknown mode (impossible combination S_IFIFO|S_IFCHR) */
|
|
|
|
#define S_IFINVALID 0030000
|
|
|
|
|
2007-04-10 06:14:58 +02:00
|
|
|
/*
|
|
|
|
* A "directory link" is a link to another git directory.
|
|
|
|
*
|
|
|
|
* The value 0160000 is not normally a valid mode, and
|
|
|
|
* also just happens to be S_IFDIR + S_IFLNK
|
|
|
|
*/
|
2007-05-21 22:08:28 +02:00
|
|
|
#define S_IFGITLINK 0160000
|
|
|
|
#define S_ISGITLINK(m) (((m) & S_IFMT) == S_IFGITLINK)
|
2007-04-10 06:14:58 +02:00
|
|
|
|
tree-diff: rework diff_tree() to generate diffs for multiparent cases as well
Previously diff_tree(), which is now named ll_diff_tree_sha1(), was
generating diff_filepair(s) for two trees t1 and t2, and that was
usually used for a commit as t1=HEAD~, and t2=HEAD - i.e. to see changes
a commit introduces.
In Git, however, we have fundamentally built flexibility in that a
commit can have many parents - 1 for a plain commit, 2 for a simple merge,
but also more than 2 for merging several heads at once.
For merges there is a so called combine-diff, which shows diff, a merge
introduces by itself, omitting changes done by any parent. That works
through first finding paths, that are different to all parents, and then
showing generalized diff, with separate columns for +/- for each parent.
The code lives in combine-diff.c .
There is an impedance mismatch, however, in that a commit could
generally have any number of parents, and that while diffing trees, we
divide cases for 2-tree diffs and more-than-2-tree diffs. I mean there
is no special casing for multiple parents commits in e.g.
revision-walker .
That impedance mismatch *hurts* *performance* *badly* for generating
combined diffs - in "combine-diff: optimize combine_diff_path
sets intersection" I've already removed some slowness from it, but from
the timings provided there, it could be seen, that combined diffs still
cost more than an order of magnitude more cpu time, compared to diff for
usual commits, and that would only be an optimistic estimate, if we take
into account that for e.g. linux.git there is only one merge for several
dozens of plain commits.
That slowness comes from the fact that currently, while generating
combined diff, a lot of time is spent computing diff(commit,commit^2)
just to only then intersect that huge diff to almost small set of files
from diff(commit,commit^1).
That's because at present, to compute combine-diff, for first finding
paths, that "every parent touches", we use the following combine-diff
property/definition:
D(A,P1...Pn) = D(A,P1) ^ ... ^ D(A,Pn) (w.r.t. paths)
where
D(A,P1...Pn) is combined diff between commit A, and parents Pi
and
D(A,Pi) is usual two-tree diff Pi..A
So if any of that D(A,Pi) is huge, tracting 1 n-parent combine-diff as n
1-parent diffs and intersecting results will be slow.
And usually, for linux.git and other topic-based workflows, that
D(A,P2) is huge, because, if merge-base of A and P2, is several dozens
of merges (from A, via first parent) below, that D(A,P2) will be diffing
sum of merges from several subsystems to 1 subsystem.
The solution is to avoid computing n 1-parent diffs, and to find
changed-to-all-parents paths via scanning A's and all Pi's trees
simultaneously, at each step comparing their entries, and based on that
comparison, populate paths result, and deduce we could *skip*
*recursing* into subdirectories, if at least for 1 parent, sha1 of that
dir tree is the same as in A. That would save us from doing significant
amount of needless work.
Such approach is very similar to what diff_tree() does, only there we
deal with scanning only 2 trees simultaneously, and for n+1 tree, the
logic is a bit more complex:
D(T,P1...Pn) calculation scheme
-------------------------------
D(T,P1...Pn) = D(T,P1) ^ ... ^ D(T,Pn) (regarding resulting paths set)
D(T,Pj) - diff between T..Pj
D(T,P1...Pn) - combined diff from T to parents P1,...,Pn
We start from all trees, which are sorted, and compare their entries in
lock-step:
T P1 Pn
- - -
|t| |p1| |pn|
|-| |--| ... |--| imin = argmin(p1...pn)
| | | | | |
|-| |--| |--|
|.| |. | |. |
. . .
. . .
at any time there could be 3 cases:
1) t < p[imin];
2) t > p[imin];
3) t = p[imin].
Schematic deduction of what every case means, and what to do, follows:
1) t < p[imin] -> ∀j t ∉ Pj -> "+t" ∈ D(T,Pj) -> D += "+t"; t↓
2) t > p[imin]
2.1) ∃j: pj > p[imin] -> "-p[imin]" ∉ D(T,Pj) -> D += ø; ∀ pi=p[imin] pi↓
2.2) ∀i pi = p[imin] -> pi ∉ T -> "-pi" ∈ D(T,Pi) -> D += "-p[imin]"; ∀i pi↓
3) t = p[imin]
3.1) ∃j: pj > p[imin] -> "+t" ∈ D(T,Pj) -> only pi=p[imin] remains to investigate
3.2) pi = p[imin] -> investigate δ(t,pi)
|
|
v
3.1+3.2) looking at δ(t,pi) ∀i: pi=p[imin] - if all != ø ->
⎧δ(t,pi) - if pi=p[imin]
-> D += ⎨
⎩"+t" - if pi>p[imin]
in any case t↓ ∀ pi=p[imin] pi↓
~
For comparison, here is how diff_tree() works:
D(A,B) calculation scheme
-------------------------
A B
- -
|a| |b| a < b -> a ∉ B -> D(A,B) += +a a↓
|-| |-| a > b -> b ∉ A -> D(A,B) += -b b↓
| | | | a = b -> investigate δ(a,b) a↓ b↓
|-| |-|
|.| |.|
. .
. .
~~~~~~~~
This patch generalizes diff tree-walker to work with arbitrary number of
parents as described above - i.e. now there is a resulting tree t, and
some parents trees tp[i] i=[0..nparent). The generalization builds on
the fact that usual diff
D(A,B)
is by definition the same as combined diff
D(A,[B]),
so if we could rework the code for common case and make it be not slower
for nparent=1 case, usual diff(t1,t2) generation will not be slower, and
multiparent diff tree-walker would greatly benefit generating
combine-diff.
What we do is as follows:
1) diff tree-walker ll_diff_tree_sha1() is internally reworked to be
a paths generator (new name diff_tree_paths()), with each generated path
being `struct combine_diff_path` with info for path, new sha1,mode and for
every parent which sha1,mode it was in it.
2) From that info, we can still generate usual diff queue with
struct diff_filepairs, via "exporting" generated
combine_diff_path, if we know we run for nparent=1 case.
(see emit_diff() which is now named emit_diff_first_parent_only())
3) In order for diff_can_quit_early(), which checks
DIFF_OPT_TST(opt, HAS_CHANGES))
to work, that exporting have to be happening not in bulk, but
incrementally, one diff path at a time.
For such consumers, there is a new callback in diff_options
introduced:
->pathchange(opt, struct combine_diff_path *)
which, if set to !NULL, is called for every generated path.
(see new compat ll_diff_tree_sha1() wrapper around new paths
generator for setup)
4) The paths generation itself, is reworked from previous
ll_diff_tree_sha1() code according to "D(A,P1...Pn) calculation
scheme" provided above:
On the start we allocate [nparent] arrays in place what was
earlier just for one parent tree.
then we just generalize loops, and comparison according to the
algorithm.
Some notes(*):
1) alloca(), for small arrays, is used for "runs not slower for
nparent=1 case than before" goal - if we change it to xmalloc()/free()
the timings get ~1% worse. For alloca() we use just-introduced
xalloca/xalloca_free compatibility wrappers, so it should not be a
portability problem.
2) For every parent tree, we need to keep a tag, whether entry from that
parent equals to entry from minimal parent. For performance reasons I'm
keeping that tag in entry's mode field in unused bit - see S_IFXMIN_NEQ.
Not doing so, we'd need to alloca another [nparent] array, which hurts
performance.
3) For emitted paths, memory could be reused, if we know the path was
processed via callback and will not be needed later. We use efficient
hand-made realloc-style path_appendnew(), that saves us from ~1-1.5%
of potential additional slowdown.
4) goto(s) are used in several places, as the code executes a little bit
faster with lowered register pressure.
Also
- we should now check for FIND_COPIES_HARDER not only when two entries
names are the same, and their hashes are equal, but also for a case,
when a path was removed from some of all parents having it.
The reason is, if we don't, that path won't be emitted at all (see
"a > xi" case), and we'll just skip it, and FIND_COPIES_HARDER wants
all paths - with diff or without - to be emitted, to be later analyzed
for being copies sources.
The new check is only necessary for nparent >1, as for nparent=1 case
xmin_eqtotal always =1 =nparent, and a path is always added to diff as
removal.
~~~~~~~~
Timings for
# without -c, i.e. testing only nparent=1 case
`git log --raw --no-abbrev --no-renames`
before and after the patch are as follows:
navy.git linux.git v3.10..v3.11
before 0.611s 1.889s
after 0.619s 1.907s
slowdown 1.3% 0.9%
This timings show we did no harm to usual diff(tree1,tree2) generation.
From the table we can see that we actually did ~1% slowdown, but I think
I've "earned" that 1% in the previous patch ("tree-diff: reuse base
str(buf) memory on sub-tree recursion", HEAD~~) so for nparent=1 case,
net timings stays approximately the same.
The output also stayed the same.
(*) If we revert 1)-4) to more usual techniques, for nparent=1 case,
we'll get ~2-2.5% of additional slowdown, which I've tried to avoid, as
"do no harm for nparent=1 case" rule.
For linux.git, combined diff will run an order of magnitude faster and
appropriate timings will be provided in the next commit, as we'll be
taking advantage of the new diff tree-walker for combined-diff
generation there.
P.S. and combined diff is not some exotic/for-play-only stuff - for
example for a program I write to represent Git archives as readonly
filesystem, there is initial scan with
`git log --reverse --raw --no-abbrev --no-renames -c`
to extract log of what was created/changed when, as a result building a
map
{} sha1 -> in which commit (and date) a content was added
that `-c` means also show combined diff for merges, and without them, if
a merge is non-trivial (merges changes from two parents with both having
separate changes to a file), or an evil one, the map will not be full,
i.e. some valid sha1 would be absent from it.
That case was my initial motivation for combined diffs speedup.
Signed-off-by: Kirill Smelkov <kirr@mns.spb.ru>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-04-06 23:46:26 +02:00
|
|
|
/*
|
|
|
|
* Some mode bits are also used internally for computations.
|
|
|
|
*
|
|
|
|
* They *must* not overlap with any valid modes, and they *must* not be emitted
|
|
|
|
* to outside world - i.e. appear on disk or network. In other words, it's just
|
|
|
|
* temporary fields, which we internally use, but they have to stay in-house.
|
|
|
|
*
|
|
|
|
* ( such approach is valid, as standard S_IF* fits into 16 bits, and in Git
|
|
|
|
* codebase mode is `unsigned int` which is assumed to be at least 32 bits )
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* used internally in tree-diff */
|
|
|
|
#define S_DIFFTREE_IFXMIN_NEQ 0x80000000
|
|
|
|
|
|
|
|
|
2005-07-14 03:46:20 +02:00
|
|
|
/*
|
|
|
|
* Intensive research over the course of many years has shown that
|
|
|
|
* port 9418 is totally unused by anything else. Or
|
|
|
|
*
|
|
|
|
* Your search - "port 9418" - did not match any documents.
|
|
|
|
*
|
|
|
|
* as www.google.com puts it.
|
2005-09-12 20:23:00 +02:00
|
|
|
*
|
|
|
|
* This port has been properly assigned for git use by IANA:
|
|
|
|
* git (Assigned-9418) [I06-050728-0001].
|
|
|
|
*
|
|
|
|
* git 9418/tcp git pack transfer service
|
|
|
|
* git 9418/udp git pack transfer service
|
|
|
|
*
|
|
|
|
* with Linus Torvalds <torvalds@osdl.org> as the point of
|
|
|
|
* contact. September 2005.
|
|
|
|
*
|
|
|
|
* See http://www.iana.org/assignments/port-numbers
|
2005-07-14 03:46:20 +02:00
|
|
|
*/
|
|
|
|
#define DEFAULT_GIT_PORT 9418
|
|
|
|
|
2005-04-08 00:13:13 +02:00
|
|
|
/*
|
|
|
|
* Basic data structures for the directory cache
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define CACHE_SIGNATURE 0x44495243 /* "DIRC" */
|
|
|
|
struct cache_header {
|
2013-08-18 21:41:51 +02:00
|
|
|
uint32_t hdr_signature;
|
|
|
|
uint32_t hdr_version;
|
|
|
|
uint32_t hdr_entries;
|
2005-04-08 00:13:13 +02:00
|
|
|
};
|
|
|
|
|
2012-04-04 18:12:43 +02:00
|
|
|
#define INDEX_FORMAT_LB 2
|
|
|
|
#define INDEX_FORMAT_UB 4
|
|
|
|
|
2005-04-08 00:13:13 +02:00
|
|
|
/*
|
|
|
|
* The "cache_time" is just the low 32 bits of the
|
|
|
|
* time. It doesn't matter if it overflows - we only
|
|
|
|
* check it for equality in the 32 bits we save.
|
|
|
|
*/
|
|
|
|
struct cache_time {
|
2013-08-18 21:41:51 +02:00
|
|
|
uint32_t sec;
|
|
|
|
uint32_t nsec;
|
2005-04-08 00:13:13 +02:00
|
|
|
};
|
|
|
|
|
2013-06-20 10:37:50 +02:00
|
|
|
struct stat_data {
|
|
|
|
struct cache_time sd_ctime;
|
|
|
|
struct cache_time sd_mtime;
|
|
|
|
unsigned int sd_dev;
|
|
|
|
unsigned int sd_ino;
|
|
|
|
unsigned int sd_uid;
|
|
|
|
unsigned int sd_gid;
|
|
|
|
unsigned int sd_size;
|
|
|
|
};
|
|
|
|
|
2005-04-08 00:13:13 +02:00
|
|
|
struct cache_entry {
|
2013-11-14 20:21:58 +01:00
|
|
|
struct hashmap_entry ent;
|
2013-06-20 10:37:50 +02:00
|
|
|
struct stat_data ce_stat_data;
|
2005-04-15 19:44:27 +02:00
|
|
|
unsigned int ce_mode;
|
2008-01-15 01:03:17 +01:00
|
|
|
unsigned int ce_flags;
|
2012-07-11 11:22:37 +02:00
|
|
|
unsigned int ce_namelen;
|
2014-06-13 14:19:36 +02:00
|
|
|
unsigned int index; /* for link extension */
|
2005-04-08 00:13:13 +02:00
|
|
|
unsigned char sha1[20];
|
2006-01-07 10:33:54 +01:00
|
|
|
char name[FLEX_ARRAY]; /* more */
|
2005-04-08 00:13:13 +02:00
|
|
|
};
|
|
|
|
|
2005-04-16 07:51:44 +02:00
|
|
|
#define CE_STAGEMASK (0x3000)
|
2008-08-17 08:02:08 +02:00
|
|
|
#define CE_EXTENDED (0x4000)
|
2006-02-09 06:15:24 +01:00
|
|
|
#define CE_VALID (0x8000)
|
2005-04-16 17:33:23 +02:00
|
|
|
#define CE_STAGESHIFT 12
|
2005-04-16 07:51:44 +02:00
|
|
|
|
2008-10-01 06:04:01 +02:00
|
|
|
/*
|
2014-06-13 14:19:25 +02:00
|
|
|
* Range 0xFFFF0FFF in ce_flags is divided into
|
2008-10-01 06:04:01 +02:00
|
|
|
* two parts: in-memory flags and on-disk ones.
|
|
|
|
* Flags in CE_EXTENDED_FLAGS will get saved on-disk
|
|
|
|
* if you want to save a new flag, add it in
|
|
|
|
* CE_EXTENDED_FLAGS
|
|
|
|
*
|
|
|
|
* In-memory only flags
|
|
|
|
*/
|
2010-11-27 07:22:16 +01:00
|
|
|
#define CE_UPDATE (1 << 16)
|
|
|
|
#define CE_REMOVE (1 << 17)
|
|
|
|
#define CE_UPTODATE (1 << 18)
|
|
|
|
#define CE_ADDED (1 << 19)
|
Fix name re-hashing semantics
We handled the case of removing and re-inserting cache entries badly,
which is something that merging commonly needs to do (removing the
different stages, and then re-inserting one of them as the merged
state).
We even had a rather ugly special case for this failure case, where
replace_index_entry() basically turned itself into a no-op if the new
and the old entries were the same, exactly because the hash routines
didn't handle it on their own.
So what this patch does is to not just have the UNHASHED bit, but a
HASHED bit too, and when you insert an entry into the name hash, that
involves:
- clear the UNHASHED bit, because now it's valid again for lookup
(which is really all that UNHASHED meant)
- if we're being lazy, we're done here (but we still want to clear the
UNHASHED bit regardless of lazy mode, since we can become unlazy
later, and so we need the UNHASHED bit to always be set correctly,
even if we never actually insert the entry into the hash list)
- if it was already hashed, we just leave it on the list
- otherwise mark it HASHED and insert it into the list
this all means that unhashing and rehashing a name all just works
automatically. Obviously, you cannot change the name of an entry (that
would be a serious bug), but nothing can validly do that anyway (you'd
have to allocate a new struct cache_entry anyway since the name length
could change), so that's not a new limitation.
The code actually gets simpler in many ways, although the lazy hashing
does mean that there are a few odd cases (ie something can be marked
unhashed even though it was never on the hash in the first place, and
isn't actually marked hashed!).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-02-23 05:37:40 +01:00
|
|
|
|
2010-11-27 07:22:16 +01:00
|
|
|
#define CE_HASHED (1 << 20)
|
|
|
|
#define CE_WT_REMOVE (1 << 22) /* remove in work directory */
|
|
|
|
#define CE_CONFLICTED (1 << 23)
|
2008-01-15 01:03:17 +01:00
|
|
|
|
2010-11-27 07:22:16 +01:00
|
|
|
#define CE_UNPACKED (1 << 24)
|
unpack-trees: move all skip-worktree checks back to unpack_trees()
Earlier, the will_have_skip_worktree() checks are done in various
places, which makes it hard to traverse the index tree-alike, required
by excluded_from_list(). This patch moves all the checks into two
loops in unpack_trees().
Entries in index in this operation can be classified into two
groups: ones already in index before unpack_trees() is called and ones
added to index after traverse_trees() is called.
In both groups, before checking file status on worktree, the future
skip-worktree bit must be checked, so that if an entry will be outside
worktree, worktree should not be checked.
For the first group, the future skip-worktree bit is precomputed and
stored as CE_NEW_SKIP_WORKTREE in the first loop before
traverse_trees() is called so that *way_merge() function does not need
to compute it again.
For the second group, because we don't know what entries will be in
this group until traverse_trees() finishes, operations that need
future skip-worktree check is delayed until CE_NEW_SKIP_WORKTREE is
computed in the second loop. CE_ADDED is used to mark entries in the
second group.
CE_ADDED and CE_NEW_SKIP_WORKTREE are temporary flags used in
unpack_trees(). CE_ADDED is only used by add_to_index(), which should
not be called while unpack_trees() is running.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-27 07:24:04 +01:00
|
|
|
#define CE_NEW_SKIP_WORKTREE (1 << 25)
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-07 23:59:54 +01:00
|
|
|
|
checkout: avoid unnecessary match_pathspec calls
In checkout_paths() we do this
- for all updated items, call match_pathspec
- for all items, call match_pathspec (inside unmerge_cache)
- for all items, call match_pathspec (for showing "path .. is unmerged)
- for updated items, call match_pathspec and update paths
That's a lot of duplicate match_pathspec(s) and the function is not
exactly cheap to be called so many times, especially on large indexes.
This patch makes it call match_pathspec once per updated index entry,
save the result in ce_flags and reuse the results in the following
loops.
The changes in 0a1283b (checkout $tree $path: do not clobber local
changes in $path not in $tree - 2011-09-30) limit the affected paths
to ones we read from $tree. We do not do anything to other modified
entries in this case, so the "for all items" above could be modified
to "for all updated items". But..
The command's behavior now is modified slightly: unmerged entries that
match $path, but not updated by $tree, are now NOT touched. Although
this should be considered a bug fix, not a regression. A new test is
added for this change.
And while at there, free ps_matched after use.
The following command is tested on webkit, 215k entries. The pattern
is chosen mainly to make match_pathspec sweat:
git checkout -- "*[a-zA-Z]*[a-zA-Z]*[a-zA-Z]*"
before after
real 0m3.493s 0m2.737s
user 0m2.239s 0m1.586s
sys 0m1.252s 0m1.151s
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-27 06:58:21 +01:00
|
|
|
/* used to temporarily mark paths matched by pathspecs */
|
|
|
|
#define CE_MATCHED (1 << 26)
|
|
|
|
|
2014-06-13 14:19:39 +02:00
|
|
|
#define CE_UPDATE_IN_BASE (1 << 27)
|
2014-06-13 14:19:43 +02:00
|
|
|
#define CE_STRIP_NAME (1 << 28)
|
2014-06-13 14:19:39 +02:00
|
|
|
|
2008-10-01 06:04:01 +02:00
|
|
|
/*
|
|
|
|
* Extended on-disk flags
|
|
|
|
*/
|
2010-11-27 07:22:16 +01:00
|
|
|
#define CE_INTENT_TO_ADD (1 << 29)
|
|
|
|
#define CE_SKIP_WORKTREE (1 << 30)
|
2008-10-01 06:04:01 +02:00
|
|
|
/* CE_EXTENDED2 is for future extension */
|
2010-11-27 07:22:16 +01:00
|
|
|
#define CE_EXTENDED2 (1 << 31)
|
2008-10-01 06:04:01 +02:00
|
|
|
|
2009-08-20 15:46:57 +02:00
|
|
|
#define CE_EXTENDED_FLAGS (CE_INTENT_TO_ADD | CE_SKIP_WORKTREE)
|
2008-10-01 06:04:01 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Safeguard to avoid saving wrong flags:
|
|
|
|
* - CE_EXTENDED2 won't get saved until its semantic is known
|
|
|
|
* - Bits in 0x0000FFFF have been saved in ce_flags already
|
|
|
|
* - Bits in 0x003F0000 are currently in-memory flags
|
|
|
|
*/
|
|
|
|
#if CE_EXTENDED_FLAGS & 0x803FFFFF
|
|
|
|
#error "CE_EXTENDED_FLAGS out of range"
|
|
|
|
#endif
|
|
|
|
|
2013-07-14 10:35:25 +02:00
|
|
|
struct pathspec;
|
|
|
|
|
2008-02-23 05:41:17 +01:00
|
|
|
/*
|
|
|
|
* Copy the sha1 and stat state of a cache entry from one to
|
|
|
|
* another. But we never change the name, or the hash state!
|
|
|
|
*/
|
2013-06-02 17:46:51 +02:00
|
|
|
static inline void copy_cache_entry(struct cache_entry *dst,
|
|
|
|
const struct cache_entry *src)
|
2008-02-23 05:41:17 +01:00
|
|
|
{
|
2013-11-14 20:22:27 +01:00
|
|
|
unsigned int state = dst->ce_flags & CE_HASHED;
|
2008-02-23 05:41:17 +01:00
|
|
|
|
|
|
|
/* Don't copy hash chain and name */
|
2013-11-14 20:21:58 +01:00
|
|
|
memcpy(&dst->ce_stat_data, &src->ce_stat_data,
|
|
|
|
offsetof(struct cache_entry, name) -
|
|
|
|
offsetof(struct cache_entry, ce_stat_data));
|
2008-02-23 05:41:17 +01:00
|
|
|
|
|
|
|
/* Restore the hash state */
|
2013-11-14 20:22:27 +01:00
|
|
|
dst->ce_flags = (dst->ce_flags & ~CE_HASHED) | state;
|
2008-02-23 05:41:17 +01:00
|
|
|
}
|
|
|
|
|
2012-07-11 11:22:37 +02:00
|
|
|
static inline unsigned create_ce_flags(unsigned stage)
|
2008-01-19 08:42:00 +01:00
|
|
|
{
|
2012-07-11 11:22:37 +02:00
|
|
|
return (stage << CE_STAGESHIFT);
|
2008-01-19 08:42:00 +01:00
|
|
|
}
|
|
|
|
|
2012-07-11 11:22:37 +02:00
|
|
|
#define ce_namelen(ce) ((ce)->ce_namelen)
|
2005-04-16 17:33:23 +02:00
|
|
|
#define ce_size(ce) cache_entry_size(ce_namelen(ce))
|
2008-01-15 01:03:17 +01:00
|
|
|
#define ce_stage(ce) ((CE_STAGEMASK & (ce)->ce_flags) >> CE_STAGESHIFT)
|
2008-01-19 08:45:24 +01:00
|
|
|
#define ce_uptodate(ce) ((ce)->ce_flags & CE_UPTODATE)
|
2009-08-20 15:46:57 +02:00
|
|
|
#define ce_skip_worktree(ce) ((ce)->ce_flags & CE_SKIP_WORKTREE)
|
2008-01-19 08:45:24 +01:00
|
|
|
#define ce_mark_uptodate(ce) ((ce)->ce_flags |= CE_UPTODATE)
|
2005-04-16 17:33:23 +02:00
|
|
|
|
2005-04-17 07:26:31 +02:00
|
|
|
#define ce_permissions(mode) (((mode) & 0100) ? 0755 : 0644)
|
2005-05-05 14:38:25 +02:00
|
|
|
static inline unsigned int create_ce_mode(unsigned int mode)
|
|
|
|
{
|
|
|
|
if (S_ISLNK(mode))
|
2008-01-15 01:03:17 +01:00
|
|
|
return S_IFLNK;
|
2007-05-21 22:08:28 +02:00
|
|
|
if (S_ISDIR(mode) || S_ISGITLINK(mode))
|
2008-01-15 01:03:17 +01:00
|
|
|
return S_IFGITLINK;
|
|
|
|
return S_IFREG | ce_permissions(mode);
|
2005-05-05 14:38:25 +02:00
|
|
|
}
|
2013-06-02 17:46:51 +02:00
|
|
|
static inline unsigned int ce_mode_from_stat(const struct cache_entry *ce,
|
|
|
|
unsigned int mode)
|
2007-02-17 07:43:48 +01:00
|
|
|
{
|
2007-03-02 22:11:30 +01:00
|
|
|
extern int trust_executable_bit, has_symlinks;
|
|
|
|
if (!has_symlinks && S_ISREG(mode) &&
|
2008-01-15 01:03:17 +01:00
|
|
|
ce && S_ISLNK(ce->ce_mode))
|
2007-03-02 22:11:30 +01:00
|
|
|
return ce->ce_mode;
|
2007-02-17 07:43:48 +01:00
|
|
|
if (!trust_executable_bit && S_ISREG(mode)) {
|
2008-01-15 01:03:17 +01:00
|
|
|
if (ce && S_ISREG(ce->ce_mode))
|
2007-02-17 07:43:48 +01:00
|
|
|
return ce->ce_mode;
|
|
|
|
return create_ce_mode(0666);
|
|
|
|
}
|
|
|
|
return create_ce_mode(mode);
|
|
|
|
}
|
2008-01-31 10:17:48 +01:00
|
|
|
static inline int ce_to_dtype(const struct cache_entry *ce)
|
|
|
|
{
|
|
|
|
unsigned ce_mode = ntohl(ce->ce_mode);
|
|
|
|
if (S_ISREG(ce_mode))
|
|
|
|
return DT_REG;
|
|
|
|
else if (S_ISDIR(ce_mode) || S_ISGITLINK(ce_mode))
|
|
|
|
return DT_DIR;
|
|
|
|
else if (S_ISLNK(ce_mode))
|
|
|
|
return DT_LNK;
|
|
|
|
else
|
|
|
|
return DT_UNKNOWN;
|
|
|
|
}
|
2010-10-04 12:53:11 +02:00
|
|
|
static inline unsigned int canon_mode(unsigned int mode)
|
|
|
|
{
|
|
|
|
if (S_ISREG(mode))
|
|
|
|
return S_IFREG | ce_permissions(mode);
|
|
|
|
if (S_ISLNK(mode))
|
|
|
|
return S_IFLNK;
|
|
|
|
if (S_ISDIR(mode))
|
|
|
|
return S_IFDIR;
|
|
|
|
return S_IFGITLINK;
|
|
|
|
}
|
2005-04-17 07:26:31 +02:00
|
|
|
|
2011-10-25 20:00:04 +02:00
|
|
|
#define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1)
|
2005-04-16 06:45:38 +02:00
|
|
|
|
2014-06-13 14:19:27 +02:00
|
|
|
#define SOMETHING_CHANGED (1 << 0) /* unclassified changes go here */
|
|
|
|
#define CE_ENTRY_CHANGED (1 << 1)
|
|
|
|
#define CE_ENTRY_REMOVED (1 << 2)
|
|
|
|
#define CE_ENTRY_ADDED (1 << 3)
|
2014-06-13 14:19:29 +02:00
|
|
|
#define RESOLVE_UNDO_CHANGED (1 << 4)
|
2014-06-13 14:19:31 +02:00
|
|
|
#define CACHE_TREE_CHANGED (1 << 5)
|
2014-06-13 14:19:44 +02:00
|
|
|
#define SPLIT_INDEX_ORDERED (1 << 6)
|
2014-06-13 14:19:27 +02:00
|
|
|
|
2014-06-13 14:19:36 +02:00
|
|
|
struct split_index;
|
2007-04-02 03:14:06 +02:00
|
|
|
struct index_state {
|
|
|
|
struct cache_entry **cache;
|
2012-04-04 18:12:43 +02:00
|
|
|
unsigned int version;
|
2007-04-02 03:14:06 +02:00
|
|
|
unsigned int cache_nr, cache_alloc, cache_changed;
|
2009-12-25 09:30:51 +01:00
|
|
|
struct string_list *resolve_undo;
|
2007-04-02 03:14:06 +02:00
|
|
|
struct cache_tree *cache_tree;
|
2014-06-13 14:19:36 +02:00
|
|
|
struct split_index *split_index;
|
make USE_NSEC work as expected
Since the filesystem ext4 is now defined as stable in Linux v2.6.28,
and ext4 supports nanonsecond resolution timestamps natively, it is
time to make USE_NSEC work as expected.
This will make racy git situations less likely to happen. For 'git
checkout' this means it will be less likely that we have to open, read
the contents of the file into RAM, and check if file is really
modified or not. The result sould be a litle less used CPU time, less
pagefaults and a litle faster program, at least for 'git checkout'.
Since the number of possible racy git situations would increase when
disks gets faster, this patch would be more and more helpfull as times
go by. For a fast Solid State Disk, this patch should be helpfull.
Note that, when file operations starts to take less than 1 nanosecond,
one would again start to get more racy git situations.
For more info on racy git, see Documentation/technical/racy-git.txt
For more info on ext4, see http://kernelnewbies.org/Ext4
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-02-19 21:08:29 +01:00
|
|
|
struct cache_time timestamp;
|
unpack_trees(): protect the handcrafted in-core index from read_cache()
unpack_trees() rebuilds the in-core index from scratch by allocating a new
structure and finishing it off by copying the built one to the final
index.
The resulting in-core index is Ok for most use, but read_cache() does not
recognize it as such. The function is meant to be no-op if you already
have loaded the index, until you call discard_cache().
This change the way read_cache() detects an already initialized in-core
index, by introducing an extra bit, and marks the handcrafted in-core
index as initialized, to avoid this problem.
A better fix in the longer term would be to change the read_cache() API so
that it will always discard and re-read from the on-disk index to avoid
confusion. But there are higher level API that have relied on the current
semantics, and they and their users all need to get converted, which is
outside the scope of 'maint' track.
An example of such a higher level API is write_cache_as_tree(), which is
used by git-write-tree as well as later Porcelains like git-merge, revert
and cherry-pick. In the longer term, we should remove read_cache() from
there and add one to cmd_write_tree(); other callers expect that the
in-core index they prepared is what gets written as a tree so no other
change is necessary for this particular codepath.
The original version of this patch marked the index by pointing an
otherwise wasted malloc'ed memory with o->result.alloc, but this version
uses Linus's idea to use a new "initialized" bit, which is conceptually
much cleaner.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-08-23 21:57:30 +02:00
|
|
|
unsigned name_hash_initialized : 1,
|
|
|
|
initialized : 1;
|
2013-11-14 20:21:58 +01:00
|
|
|
struct hashmap name_hash;
|
2013-11-14 20:20:58 +01:00
|
|
|
struct hashmap dir_hash;
|
2014-04-10 20:31:21 +02:00
|
|
|
unsigned char sha1[20];
|
2007-04-02 03:14:06 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
extern struct index_state the_index;
|
|
|
|
|
2008-03-21 21:16:24 +01:00
|
|
|
/* Name hashing */
|
|
|
|
extern void add_name_hash(struct index_state *istate, struct cache_entry *ce);
|
2013-02-28 00:57:48 +01:00
|
|
|
extern void remove_name_hash(struct index_state *istate, struct cache_entry *ce);
|
|
|
|
extern void free_name_hash(struct index_state *istate);
|
2008-03-21 21:16:24 +01:00
|
|
|
|
|
|
|
|
2007-04-02 08:26:07 +02:00
|
|
|
#ifndef NO_THE_INDEX_COMPATIBILITY_MACROS
|
2007-04-02 03:14:06 +02:00
|
|
|
#define active_cache (the_index.cache)
|
|
|
|
#define active_nr (the_index.cache_nr)
|
|
|
|
#define active_alloc (the_index.cache_alloc)
|
|
|
|
#define active_cache_changed (the_index.cache_changed)
|
|
|
|
#define active_cache_tree (the_index.cache_tree)
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2007-04-02 08:26:07 +02:00
|
|
|
#define read_cache() read_index(&the_index)
|
|
|
|
#define read_cache_from(path) read_index_from(&the_index, (path))
|
2008-11-14 01:36:30 +01:00
|
|
|
#define read_cache_preload(pathspec) read_index_preload(&the_index, (pathspec))
|
checkout: Fix "initial checkout" detection
Earlier commit 5521883 (checkout: do not lose staged removal, 2008-09-07)
tightened the rule to prevent switching branches from losing local
changes, so that staged removal of paths can be protected, while
attempting to keep a loophole to still allow a special case of switching
out of an un-checked-out state.
However, the loophole was made a bit too tight, and did not allow
switching from one branch (in an un-checked-out state) to check out
another branch.
The change to builtin-checkout.c in this commit loosens it to allow this,
by not insisting the original commit and the new commit to be the same.
It also introduces a new function, is_index_unborn (and an associated
macro, is_cache_unborn), to check if the repository is truly in an
un-checked-out state more reliably, by making sure that $GIT_INDEX_FILE
did not exist when populating the in-core index structure. A few places
the earlier commit 5521883 added the check for the initial checkout
condition are updated to use this function.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-11-12 20:52:35 +01:00
|
|
|
#define is_cache_unborn() is_index_unborn(&the_index)
|
2008-06-27 18:21:58 +02:00
|
|
|
#define read_cache_unmerged() read_index_unmerged(&the_index)
|
2007-04-02 08:26:07 +02:00
|
|
|
#define discard_cache() discard_index(&the_index)
|
2008-02-07 17:40:13 +01:00
|
|
|
#define unmerged_cache() unmerged_index(&the_index)
|
2007-04-02 08:26:07 +02:00
|
|
|
#define cache_name_pos(name, namelen) index_name_pos(&the_index,(name),(namelen))
|
|
|
|
#define add_cache_entry(ce, option) add_index_entry(&the_index, (ce), (option))
|
2008-07-21 02:25:56 +02:00
|
|
|
#define rename_cache_entry_at(pos, new_name) rename_index_entry_at(&the_index, (pos), (new_name))
|
2007-04-02 08:26:07 +02:00
|
|
|
#define remove_cache_entry_at(pos) remove_index_entry_at(&the_index, (pos))
|
|
|
|
#define remove_file_from_cache(path) remove_file_from_index(&the_index, (path))
|
2008-05-21 21:04:34 +02:00
|
|
|
#define add_to_cache(path, st, flags) add_to_index(&the_index, (path), (st), (flags))
|
|
|
|
#define add_file_to_cache(path, flags) add_file_to_index(&the_index, (path), (flags))
|
2009-08-21 10:57:59 +02:00
|
|
|
#define refresh_cache(flags) refresh_index(&the_index, (flags), NULL, NULL, NULL)
|
2007-11-10 09:15:03 +01:00
|
|
|
#define ce_match_stat(ce, st, options) ie_match_stat(&the_index, (ce), (st), (options))
|
|
|
|
#define ce_modified(ce, st, options) ie_modified(&the_index, (ce), (st), (options))
|
2013-09-17 09:06:14 +02:00
|
|
|
#define cache_dir_exists(name, namelen) index_dir_exists(&the_index, (name), (namelen))
|
|
|
|
#define cache_file_exists(name, namelen, igncase) index_file_exists(&the_index, (name), (namelen), (igncase))
|
2008-10-16 17:07:26 +02:00
|
|
|
#define cache_name_is_other(name, namelen) index_name_is_other(&the_index, (name), (namelen))
|
2009-12-25 09:30:51 +01:00
|
|
|
#define resolve_undo_clear() resolve_undo_clear_index(&the_index)
|
2009-12-25 22:40:02 +01:00
|
|
|
#define unmerge_cache_entry_at(at) unmerge_index_entry_at(&the_index, at)
|
2009-12-25 20:57:11 +01:00
|
|
|
#define unmerge_cache(pathspec) unmerge_index(&the_index, pathspec)
|
2013-04-13 15:28:31 +02:00
|
|
|
#define read_blob_data_from_cache(path, sz) read_blob_data_from_index(&the_index, (path), (sz))
|
2007-04-02 08:26:07 +02:00
|
|
|
#endif
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2007-02-28 20:45:56 +01:00
|
|
|
enum object_type {
|
|
|
|
OBJ_BAD = -1,
|
|
|
|
OBJ_NONE = 0,
|
|
|
|
OBJ_COMMIT = 1,
|
|
|
|
OBJ_TREE = 2,
|
|
|
|
OBJ_BLOB = 3,
|
|
|
|
OBJ_TAG = 4,
|
|
|
|
/* 5 for future expansion */
|
|
|
|
OBJ_OFS_DELTA = 6,
|
|
|
|
OBJ_REF_DELTA = 7,
|
2008-02-25 22:46:04 +01:00
|
|
|
OBJ_ANY,
|
2010-05-14 11:31:35 +02:00
|
|
|
OBJ_MAX
|
2007-02-28 20:45:56 +01:00
|
|
|
};
|
|
|
|
|
2007-12-01 07:22:38 +01:00
|
|
|
static inline enum object_type object_type(unsigned int mode)
|
|
|
|
{
|
|
|
|
return S_ISDIR(mode) ? OBJ_TREE :
|
|
|
|
S_ISGITLINK(mode) ? OBJ_COMMIT :
|
|
|
|
OBJ_BLOB;
|
|
|
|
}
|
|
|
|
|
2013-03-08 10:29:08 +01:00
|
|
|
/* Double-check local_repo_env below if you add to this list. */
|
2005-05-10 07:57:58 +02:00
|
|
|
#define GIT_DIR_ENVIRONMENT "GIT_DIR"
|
$GIT_COMMON_DIR: a new environment variable
This variable is intended to support multiple working directories
attached to a repository. Such a repository may have a main working
directory, created by either "git init" or "git clone" and one or more
linked working directories. These working directories and the main
repository share the same repository directory.
In linked working directories, $GIT_COMMON_DIR must be defined to point
to the real repository directory and $GIT_DIR points to an unused
subdirectory inside $GIT_COMMON_DIR. File locations inside the
repository are reorganized from the linked worktree view point:
- worktree-specific such as HEAD, logs/HEAD, index, other top-level
refs and unrecognized files are from $GIT_DIR.
- the rest like objects, refs, info, hooks, packed-refs, shallow...
are from $GIT_COMMON_DIR (except info/sparse-checkout, but that's
a separate patch)
Scripts are supposed to retrieve paths in $GIT_DIR with "git rev-parse
--git-path", which will take care of "$GIT_DIR vs $GIT_COMMON_DIR"
business.
The redirection is done by git_path(), git_pathdup() and
strbuf_git_path(). The selected list of paths goes to $GIT_COMMON_DIR,
not the other way around in case a developer adds a new
worktree-specific file and it's accidentally promoted to be shared
across repositories (this includes unknown files added by third party
commands)
The list of known files that belong to $GIT_DIR are:
ADD_EDIT.patch BISECT_ANCESTORS_OK BISECT_EXPECTED_REV BISECT_LOG
BISECT_NAMES CHERRY_PICK_HEAD COMMIT_MSG FETCH_HEAD HEAD MERGE_HEAD
MERGE_MODE MERGE_RR NOTES_EDITMSG NOTES_MERGE_WORKTREE ORIG_HEAD
REVERT_HEAD SQUASH_MSG TAG_EDITMSG fast_import_crash_* logs/HEAD
next-index-* rebase-apply rebase-merge rsync-refs-* sequencer/*
shallow_*
Path mapping is NOT done for git_path_submodule(). Multi-checkouts are
not supported as submodules.
Helped-by: Jens Lehmann <Jens.Lehmann@web.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-11-30 09:24:36 +01:00
|
|
|
#define GIT_COMMON_DIR_ENVIRONMENT "GIT_COMMON_DIR"
|
ref namespaces: infrastructure
Add support for dividing the refs of a single repository into multiple
namespaces, each of which can have its own branches, tags, and HEAD.
Git can expose each namespace as an independent repository to pull from
and push to, while sharing the object store, and exposing all the refs
to operations such as git-gc.
Storing multiple repositories as namespaces of a single repository
avoids storing duplicate copies of the same objects, such as when
storing multiple branches of the same source. The alternates mechanism
provides similar support for avoiding duplicates, but alternates do not
prevent duplication between new objects added to the repositories
without ongoing maintenance, while namespaces do.
To specify a namespace, set the GIT_NAMESPACE environment variable to
the namespace. For each ref namespace, git stores the corresponding
refs in a directory under refs/namespaces/. For example,
GIT_NAMESPACE=foo will store refs under refs/namespaces/foo/. You can
also specify namespaces via the --namespace option to git.
Note that namespaces which include a / will expand to a hierarchy of
namespaces; for example, GIT_NAMESPACE=foo/bar will store refs under
refs/namespaces/foo/refs/namespaces/bar/. This makes paths in
GIT_NAMESPACE behave hierarchically, so that cloning with
GIT_NAMESPACE=foo/bar produces the same result as cloning with
GIT_NAMESPACE=foo and cloning from that repo with GIT_NAMESPACE=bar. It
also avoids ambiguity with strange namespace paths such as
foo/refs/heads/, which could otherwise generate directory/file conflicts
within the refs directory.
Add the infrastructure for ref namespaces: handle the GIT_NAMESPACE
environment variable and --namespace option, and support iterating over
refs in a namespace.
Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Jamey Sharp <jamey@minilop.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-07-05 19:54:44 +02:00
|
|
|
#define GIT_NAMESPACE_ENVIRONMENT "GIT_NAMESPACE"
|
2007-06-06 09:10:42 +02:00
|
|
|
#define GIT_WORK_TREE_ENVIRONMENT "GIT_WORK_TREE"
|
2013-03-08 10:30:25 +01:00
|
|
|
#define GIT_PREFIX_ENVIRONMENT "GIT_PREFIX"
|
2005-05-10 07:57:58 +02:00
|
|
|
#define DEFAULT_GIT_DIR_ENVIRONMENT ".git"
|
2005-05-10 02:57:56 +02:00
|
|
|
#define DB_ENVIRONMENT "GIT_OBJECT_DIRECTORY"
|
2005-04-21 19:55:18 +02:00
|
|
|
#define INDEX_ENVIRONMENT "GIT_INDEX_FILE"
|
2005-07-30 09:58:28 +02:00
|
|
|
#define GRAFT_ENVIRONMENT "GIT_GRAFT_FILE"
|
2013-12-05 14:02:45 +01:00
|
|
|
#define GIT_SHALLOW_FILE_ENVIRONMENT "GIT_SHALLOW_FILE"
|
2006-12-19 10:28:15 +01:00
|
|
|
#define TEMPLATE_DIR_ENVIRONMENT "GIT_TEMPLATE_DIR"
|
|
|
|
#define CONFIG_ENVIRONMENT "GIT_CONFIG"
|
2010-08-23 21:16:00 +02:00
|
|
|
#define CONFIG_DATA_ENVIRONMENT "GIT_CONFIG_PARAMETERS"
|
2006-12-19 10:28:15 +01:00
|
|
|
#define EXEC_PATH_ENVIRONMENT "GIT_EXEC_PATH"
|
2008-05-20 08:49:26 +02:00
|
|
|
#define CEILING_DIRECTORIES_ENVIRONMENT "GIT_CEILING_DIRECTORIES"
|
2009-11-18 07:50:58 +01:00
|
|
|
#define NO_REPLACE_OBJECTS_ENVIRONMENT "GIT_NO_REPLACE_OBJECTS"
|
Add basic infrastructure to assign attributes to paths
This adds the basic infrastructure to assign attributes to
paths, in a way similar to what the exclusion mechanism does
based on $GIT_DIR/info/exclude and .gitignore files.
An attribute is just a simple string that does not contain any
whitespace. They can be specified in $GIT_DIR/info/attributes
file, and .gitattributes file in each directory.
Each line in these files defines a pattern matching rule.
Similar to the exclusion mechanism, a later match overrides an
earlier match in the same file, and entries from .gitattributes
file in the same directory takes precedence over the ones from
parent directories. Lines in $GIT_DIR/info/attributes file are
used as the lowest precedence default rules.
A line is either a comment (an empty line, or a line that begins
with a '#'), or a rule, which is a whitespace separated list of
tokens. The first token on the line is a shell glob pattern.
The rest are names of attributes, each of which can optionally
be prefixed with '!'. Such a line means "if a path matches this
glob, this attribute is set (or unset -- if the attribute name
is prefixed with '!'). For glob matching, the same "if the
pattern does not have a slash in it, the basename of the path is
matched with fnmatch(3) against the pattern, otherwise, the path
is matched with the pattern with FNM_PATHNAME" rule as the
exclusion mechanism is used.
This does not define what an attribute means. Tying an
attribute to various effects it has on git operation for paths
that have it will be specified separately.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-04-12 10:07:32 +02:00
|
|
|
#define GITATTRIBUTES_FILE ".gitattributes"
|
|
|
|
#define INFOATTRIBUTES_FILE "info/attributes"
|
attribute macro support
This adds "attribute macros" (for lack of better name). So far,
we have low-level attributes such as crlf and diff, which are
defined in operational terms --- setting or unsetting them on a
particular path directly affects what is done to the path. For
example, in order to decline diffs or crlf conversions on a
binary blob, no diffs on PostScript files, and treat all other
files normally, you would have something like these:
* diff crlf
*.ps !diff
proprietary.o !diff !crlf
That is fine as the operation goes, but gets unwieldy rather
rapidly, when we start adding more low-level attributes that are
defined in operational terms. A near-term example of such an
attribute would be 'merge-3way' which would control if git
should attempt the usual 3-way file-level merge internally, or
leave merging to a specialized external program of user's
choice. When it is added, we do _not_ want to force the users
to update the above to:
* diff crlf merge-3way
*.ps !diff
proprietary.o !diff !crlf !merge-3way
The way this patch solves this issue is to realize that the
attributes the user is assigning to paths are not defined in
terms of operations but in terms of what they are.
All of the three low-level attributes usually make sense for
most of the files that sane SCM users have git operate on (these
files are typically called "text'). Only a few cases, such as
binary blob, need exception to decline the "usual treatment
given to text files" -- and people mark them as "binary".
So this allows the $GIT_DIR/info/alternates and .gitattributes
at the toplevel of the project to also specify attributes that
assigns other attributes. The syntax is '[attr]' followed by an
attribute name followed by a list of attribute names:
[attr] binary !diff !crlf !merge-3way
When "binary" attribute is set to a path, if the path has not
got diff/crlf/merge-3way attribute set or unset by other rules,
this rule unsets the three low-level attributes.
It is expected that the user level .gitattributes will be
expressed mostly in terms of attributes based on what the files
are, and the above sample would become like this:
(built-in attribute configuration)
[attr] binary !diff !crlf !merge-3way
* diff crlf merge-3way
(project specific .gitattributes)
proprietary.o binary
(user preference $GIT_DIR/info/attributes)
*.ps !diff
There are a few caveats.
* As described above, you can define these macros only in
$GIT_DIR/info/attributes and toplevel .gitattributes.
* There is no attempt to detect circular definition of macro
attributes, and definitions are evaluated from bottom to top
as usual to fill in other attributes that have not yet got
values. The following would work as expected:
[attr] text diff crlf
[attr] ps text !diff
*.ps ps
while this would most likely not (I haven't tried):
[attr] ps text !diff
[attr] text diff crlf
*.ps ps
* When a macro says "[attr] A B !C", saying that a path does
not have attribute A does not let you tell anything about
attributes B or C. That is, given this:
[attr] text diff crlf
[attr] ps text !diff
*.txt !ps
path hello.txt, which would match "*.txt" pattern, would have
"ps" attribute set to zero, but that does not make text
attribute of hello.txt set to false (nor diff attribute set to
true).
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-04-14 17:54:37 +02:00
|
|
|
#define ATTRIBUTE_MACRO_PREFIX "[attr]"
|
2009-10-09 12:21:57 +02:00
|
|
|
#define GIT_NOTES_REF_ENVIRONMENT "GIT_NOTES_REF"
|
|
|
|
#define GIT_NOTES_DEFAULT_REF "refs/notes/commits"
|
2010-03-12 18:04:26 +01:00
|
|
|
#define GIT_NOTES_DISPLAY_REF_ENVIRONMENT "GIT_NOTES_DISPLAY_REF"
|
2010-03-12 18:04:32 +01:00
|
|
|
#define GIT_NOTES_REWRITE_REF_ENVIRONMENT "GIT_NOTES_REWRITE_REF"
|
|
|
|
#define GIT_NOTES_REWRITE_MODE_ENVIRONMENT "GIT_NOTES_REWRITE_MODE"
|
add global --literal-pathspecs option
Git takes pathspec arguments in many places to limit the
scope of an operation. These pathspecs are treated not as
literal paths, but as glob patterns that can be fed to
fnmatch. When a user is giving a specific pattern, this is a
nice feature.
However, when programatically providing pathspecs, it can be
a nuisance. For example, to find the latest revision which
modified "$foo", one can use "git rev-list -- $foo". But if
"$foo" contains glob characters (e.g., "f*"), it will
erroneously match more entries than desired. The caller
needs to quote the characters in $foo, and even then, the
results may not be exactly the same as with a literal
pathspec. For instance, the depth checks in
match_pathspec_depth do not kick in if we match via fnmatch.
This patch introduces a global command-line option (i.e.,
one for "git" itself, not for specific commands) to turn
this behavior off. It also has a matching environment
variable, which can make it easier if you are a script or
porcelain interface that is going to issue many such
commands.
This option cannot turn off globbing for particular
pathspecs. That could eventually be done with a ":(noglob)"
magic pathspec prefix. However, that level of granularity is
more cumbersome to use for many cases, and doing ":(noglob)"
right would mean converting the whole codebase to use
"struct pathspec", as the usual "const char **pathspec"
cannot represent extra per-item flags.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-12-19 23:37:30 +01:00
|
|
|
#define GIT_LITERAL_PATHSPECS_ENVIRONMENT "GIT_LITERAL_PATHSPECS"
|
2013-07-14 10:36:08 +02:00
|
|
|
#define GIT_GLOB_PATHSPECS_ENVIRONMENT "GIT_GLOB_PATHSPECS"
|
|
|
|
#define GIT_NOGLOB_PATHSPECS_ENVIRONMENT "GIT_NOGLOB_PATHSPECS"
|
2013-07-14 10:36:09 +02:00
|
|
|
#define GIT_ICASE_PATHSPECS_ENVIRONMENT "GIT_ICASE_PATHSPECS"
|
2005-04-21 19:55:18 +02:00
|
|
|
|
2010-02-25 00:34:14 +01:00
|
|
|
/*
|
setup: suppress implicit "." work-tree for bare repos
If an explicit GIT_DIR is given without a working tree, we
implicitly assume that the current working directory should
be used as the working tree. E.g.,:
GIT_DIR=/some/repo.git git status
would compare against the cwd.
Unfortunately, we fool this rule for sub-invocations of git
by setting GIT_DIR internally ourselves. For example:
git init foo
cd foo/.git
git status ;# fails, as we expect
git config alias.st status
git status ;# does not fail, but should
What happens is that we run setup_git_directory when doing
alias lookup (since we need to see the config), set GIT_DIR
as a result, and then leave GIT_WORK_TREE blank (because we
do not have one). Then when we actually run the status
command, we do setup_git_directory again, which sees our
explicit GIT_DIR and uses the cwd as an implicit worktree.
It's tempting to argue that we should be suppressing that
second invocation of setup_git_directory, as it could use
the values we already found in memory. However, the problem
still exists for sub-processes (e.g., if "git status" were
an external command).
You can see another example with the "--bare" option, which
sets GIT_DIR explicitly. For example:
git init foo
cd foo/.git
git status ;# fails
git --bare status ;# does NOT fail
We need some way of telling sub-processes "even though
GIT_DIR is set, do not use cwd as an implicit working tree".
We could do it by putting a special token into
GIT_WORK_TREE, but the obvious choice (an empty string) has
some portability problems.
Instead, we add a new boolean variable, GIT_IMPLICIT_WORK_TREE,
which suppresses the use of cwd as a working tree when
GIT_DIR is set. We trigger the new variable when we know we
are in a bare setting.
The variable is left intentionally undocumented, as this is
an internal detail (for now, anyway). If somebody comes up
with a good alternate use for it, and once we are confident
we have shaken any bugs out of it, we can consider promoting
it further.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-08 10:32:22 +01:00
|
|
|
* This environment variable is expected to contain a boolean indicating
|
|
|
|
* whether we should or should not treat:
|
|
|
|
*
|
|
|
|
* GIT_DIR=foo.git git ...
|
|
|
|
*
|
|
|
|
* as if GIT_WORK_TREE=. was given. It's not expected that users will make use
|
|
|
|
* of this, but we use it internally to communicate to sub-processes that we
|
|
|
|
* are in a bare repo. If not set, defaults to true.
|
|
|
|
*/
|
|
|
|
#define GIT_IMPLICIT_WORK_TREE_ENVIRONMENT "GIT_IMPLICIT_WORK_TREE"
|
|
|
|
|
2010-02-25 00:34:14 +01:00
|
|
|
/*
|
2013-03-08 10:29:08 +01:00
|
|
|
* Repository-local GIT_* environment variables; these will be cleared
|
|
|
|
* when git spawns a sub-process that runs inside another repository.
|
|
|
|
* The array is NULL-terminated, which makes it easy to pass in the "env"
|
|
|
|
* parameter of a run-command invocation, or to do a simple walk.
|
2010-02-25 00:34:14 +01:00
|
|
|
*/
|
2013-03-08 10:29:08 +01:00
|
|
|
extern const char * const local_repo_env[];
|
2010-02-25 00:34:14 +01:00
|
|
|
|
2007-01-07 11:00:28 +01:00
|
|
|
extern int is_bare_repository_cfg;
|
|
|
|
extern int is_bare_repository(void);
|
2007-01-20 03:09:34 +01:00
|
|
|
extern int is_inside_git_dir(void);
|
Clean up work-tree handling
The old version of work-tree support was an unholy mess, barely readable,
and not to the point.
For example, why do you have to provide a worktree, when it is not used?
As in "git status". Now it works.
Another riddle was: if you can have work trees inside the git dir, why
are some programs complaining that they need a work tree?
IOW it is allowed to call
$ git --git-dir=../ --work-tree=. bla
when you really want to. In this case, you are both in the git directory
and in the working tree. So, programs have to actually test for the right
thing, namely if they are inside a working tree, and not if they are
inside a git directory.
Also, GIT_DIR=../.git should behave the same as if no GIT_DIR was
specified, unless there is a repository in the current working directory.
It does now.
The logic to determine if a repository is bare, or has a work tree
(tertium non datur), is this:
--work-tree=bla overrides GIT_WORK_TREE, which overrides core.bare = true,
which overrides core.worktree, which overrides GIT_DIR/.. when GIT_DIR
ends in /.git, which overrides the directory in which .git/ was found.
In related news, a long standing bug was fixed: when in .git/bla/x.git/,
which is a bare repository, git formerly assumed ../.. to be the
appropriate git dir. This problem was reported by Shawn Pearce to have
caused much pain, where a colleague mistakenly ran "git init" in "/" a
long time ago, and bare repositories just would not work.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-08-01 02:30:14 +02:00
|
|
|
extern char *git_work_tree_cfg;
|
2007-06-06 09:10:42 +02:00
|
|
|
extern int is_inside_work_tree(void);
|
2006-08-23 12:39:11 +02:00
|
|
|
extern const char *get_git_dir(void);
|
$GIT_COMMON_DIR: a new environment variable
This variable is intended to support multiple working directories
attached to a repository. Such a repository may have a main working
directory, created by either "git init" or "git clone" and one or more
linked working directories. These working directories and the main
repository share the same repository directory.
In linked working directories, $GIT_COMMON_DIR must be defined to point
to the real repository directory and $GIT_DIR points to an unused
subdirectory inside $GIT_COMMON_DIR. File locations inside the
repository are reorganized from the linked worktree view point:
- worktree-specific such as HEAD, logs/HEAD, index, other top-level
refs and unrecognized files are from $GIT_DIR.
- the rest like objects, refs, info, hooks, packed-refs, shallow...
are from $GIT_COMMON_DIR (except info/sparse-checkout, but that's
a separate patch)
Scripts are supposed to retrieve paths in $GIT_DIR with "git rev-parse
--git-path", which will take care of "$GIT_DIR vs $GIT_COMMON_DIR"
business.
The redirection is done by git_path(), git_pathdup() and
strbuf_git_path(). The selected list of paths goes to $GIT_COMMON_DIR,
not the other way around in case a developer adds a new
worktree-specific file and it's accidentally promoted to be shared
across repositories (this includes unknown files added by third party
commands)
The list of known files that belong to $GIT_DIR are:
ADD_EDIT.patch BISECT_ANCESTORS_OK BISECT_EXPECTED_REV BISECT_LOG
BISECT_NAMES CHERRY_PICK_HEAD COMMIT_MSG FETCH_HEAD HEAD MERGE_HEAD
MERGE_MODE MERGE_RR NOTES_EDITMSG NOTES_MERGE_WORKTREE ORIG_HEAD
REVERT_HEAD SQUASH_MSG TAG_EDITMSG fast_import_crash_* logs/HEAD
next-index-* rebase-apply rebase-merge rsync-refs-* sequencer/*
shallow_*
Path mapping is NOT done for git_path_submodule(). Multi-checkouts are
not supported as submodules.
Helped-by: Jens Lehmann <Jens.Lehmann@web.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-11-30 09:24:36 +01:00
|
|
|
extern const char *get_git_common_dir(void);
|
standardize and improve lookup rules for external local repos
When you specify a local repository on the command line of
clone, ls-remote, upload-pack, receive-pack, or upload-archive,
or in a request to git-daemon, we perform a little bit of
lookup magic, doing things like looking in working trees for
.git directories and appending ".git" for bare repos.
For clone, this magic happens in get_repo_path. For
everything else, it happens in enter_repo. In both cases,
there are some ambiguous or confusing cases that aren't
handled well, and there is one case that is not handled the
same by both methods.
This patch tries to provide (and test!) standard, sensible
lookup rules for both code paths. The intended changes are:
1. When looking up "foo", we have always preferred
a working tree "foo" (containing "foo/.git" over the
bare "foo.git". But we did not prefer a bare "foo" over
"foo.git". With this patch, we do so.
2. We would select directories that existed but didn't
actually look like git repositories. With this patch,
we make sure a selected directory looks like a git
repo. Not only is this more sensible in general, but it
will help anybody who is negatively affected by change
(1) negatively (e.g., if they had "foo.git" next to its
separate work tree "foo", and expect to keep finding
"foo.git" when they reference "foo").
3. The enter_repo code path would, given "foo", look for
"foo.git/.git" (i.e., do the ".git" append magic even
for a repo with working tree). The clone code path did
not; with this patch, they now behave the same.
In the unlikely case of a working tree overlaying a bare
repo (i.e., a ".git" directory _inside_ a bare repo), we
continue to treat it as a working tree (prefering the
"inner" .git over the bare repo). This is mainly because the
combination seems nonsensical, and I'd rather stick with
existing behavior on the off chance that somebody is relying
on it.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-02-02 22:59:13 +01:00
|
|
|
extern int is_git_directory(const char *path);
|
2005-05-10 07:57:58 +02:00
|
|
|
extern char *get_object_directory(void);
|
|
|
|
extern char *get_index_file(void);
|
2005-07-30 09:58:28 +02:00
|
|
|
extern char *get_graft_file(void);
|
2007-08-01 02:29:38 +02:00
|
|
|
extern int set_git_dir(const char *path);
|
2014-11-30 09:24:44 +01:00
|
|
|
extern int get_common_dir(struct strbuf *sb, const char *gitdir);
|
ref namespaces: infrastructure
Add support for dividing the refs of a single repository into multiple
namespaces, each of which can have its own branches, tags, and HEAD.
Git can expose each namespace as an independent repository to pull from
and push to, while sharing the object store, and exposing all the refs
to operations such as git-gc.
Storing multiple repositories as namespaces of a single repository
avoids storing duplicate copies of the same objects, such as when
storing multiple branches of the same source. The alternates mechanism
provides similar support for avoiding duplicates, but alternates do not
prevent duplication between new objects added to the repositories
without ongoing maintenance, while namespaces do.
To specify a namespace, set the GIT_NAMESPACE environment variable to
the namespace. For each ref namespace, git stores the corresponding
refs in a directory under refs/namespaces/. For example,
GIT_NAMESPACE=foo will store refs under refs/namespaces/foo/. You can
also specify namespaces via the --namespace option to git.
Note that namespaces which include a / will expand to a hierarchy of
namespaces; for example, GIT_NAMESPACE=foo/bar will store refs under
refs/namespaces/foo/refs/namespaces/bar/. This makes paths in
GIT_NAMESPACE behave hierarchically, so that cloning with
GIT_NAMESPACE=foo/bar produces the same result as cloning with
GIT_NAMESPACE=foo and cloning from that repo with GIT_NAMESPACE=bar. It
also avoids ambiguity with strange namespace paths such as
foo/refs/heads/, which could otherwise generate directory/file conflicts
within the refs directory.
Add the infrastructure for ref namespaces: handle the GIT_NAMESPACE
environment variable and --namespace option, and support iterating over
refs in a namespace.
Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Jamey Sharp <jamey@minilop.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-07-05 19:54:44 +02:00
|
|
|
extern const char *get_git_namespace(void);
|
|
|
|
extern const char *strip_namespace(const char *namespaced_ref);
|
Clean up work-tree handling
The old version of work-tree support was an unholy mess, barely readable,
and not to the point.
For example, why do you have to provide a worktree, when it is not used?
As in "git status". Now it works.
Another riddle was: if you can have work trees inside the git dir, why
are some programs complaining that they need a work tree?
IOW it is allowed to call
$ git --git-dir=../ --work-tree=. bla
when you really want to. In this case, you are both in the git directory
and in the working tree. So, programs have to actually test for the right
thing, namely if they are inside a working tree, and not if they are
inside a git directory.
Also, GIT_DIR=../.git should behave the same as if no GIT_DIR was
specified, unless there is a repository in the current working directory.
It does now.
The logic to determine if a repository is bare, or has a work tree
(tertium non datur), is this:
--work-tree=bla overrides GIT_WORK_TREE, which overrides core.bare = true,
which overrides core.worktree, which overrides GIT_DIR/.. when GIT_DIR
ends in /.git, which overrides the directory in which .git/ was found.
In related news, a long standing bug was fixed: when in .git/bla/x.git/,
which is a bare repository, git formerly assumed ../.. to be the
appropriate git dir. This problem was reported by Shawn Pearce to have
caused much pain, where a colleague mistakenly ran "git init" in "/" a
long time ago, and bare repositories just would not work.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-08-01 02:30:14 +02:00
|
|
|
extern const char *get_git_work_tree(void);
|
2011-08-22 23:04:56 +02:00
|
|
|
extern const char *read_gitfile(const char *path);
|
2011-08-15 23:17:46 +02:00
|
|
|
extern const char *resolve_gitdir(const char *suspect);
|
2008-04-27 19:39:21 +02:00
|
|
|
extern void set_git_work_tree(const char *tree);
|
2005-05-10 07:57:58 +02:00
|
|
|
|
|
|
|
#define ALTERNATE_DB_ENVIRONMENT "GIT_ALTERNATE_OBJECT_DIRECTORIES"
|
2005-04-21 19:55:18 +02:00
|
|
|
|
2005-09-21 09:00:47 +02:00
|
|
|
extern const char **get_pathspec(const char *prefix, const char **pathspec);
|
2007-11-03 12:23:11 +01:00
|
|
|
extern void setup_work_tree(void);
|
2005-11-26 08:14:15 +01:00
|
|
|
extern const char *setup_git_directory_gently(int *);
|
2005-08-17 03:06:34 +02:00
|
|
|
extern const char *setup_git_directory(void);
|
2010-11-11 15:08:03 +01:00
|
|
|
extern char *prefix_path(const char *prefix, int len, const char *path);
|
2013-07-14 10:36:03 +02:00
|
|
|
extern char *prefix_path_gently(const char *prefix, int len, int *remaining, const char *path);
|
2005-11-26 08:14:15 +01:00
|
|
|
extern const char *prefix_filename(const char *prefix, int len, const char *path);
|
2009-10-18 09:27:24 +02:00
|
|
|
extern int check_filename(const char *prefix, const char *name);
|
2012-06-18 20:18:21 +02:00
|
|
|
extern void verify_filename(const char *prefix,
|
|
|
|
const char *name,
|
|
|
|
int diagnose_misspelt_rev);
|
2006-04-27 00:09:27 +02:00
|
|
|
extern void verify_non_filename(const char *prefix, const char *name);
|
2012-06-21 20:09:50 +02:00
|
|
|
extern int path_inside_repo(const char *prefix, const char *path);
|
2005-08-17 03:06:34 +02:00
|
|
|
|
2008-04-27 19:39:27 +02:00
|
|
|
#define INIT_DB_QUIET 0x0001
|
|
|
|
|
2011-03-19 16:16:56 +01:00
|
|
|
extern int set_git_dir_init(const char *git_dir, const char *real_git_dir, int);
|
2008-04-27 19:39:27 +02:00
|
|
|
extern int init_db(const char *template_dir, unsigned int flags);
|
|
|
|
|
2013-07-16 11:27:36 +02:00
|
|
|
extern void sanitize_stdfds(void);
|
2014-02-08 08:08:51 +01:00
|
|
|
extern int daemonize(void);
|
2013-07-16 11:27:36 +02:00
|
|
|
|
2005-04-08 00:13:13 +02:00
|
|
|
#define alloc_nr(x) (((x)+16)*3/2)
|
|
|
|
|
2007-06-11 15:39:44 +02:00
|
|
|
/*
|
|
|
|
* Realloc the buffer pointed at by variable 'x' so that it can hold
|
|
|
|
* at least 'nr' entries; the number of entries currently allocated
|
|
|
|
* is 'alloc', using the standard growing factor alloc_nr() macro.
|
|
|
|
*
|
2010-10-08 18:46:59 +02:00
|
|
|
* DO NOT USE any expression with side-effect for 'x', 'nr', or 'alloc'.
|
2007-06-11 15:39:44 +02:00
|
|
|
*/
|
|
|
|
#define ALLOC_GROW(x, nr, alloc) \
|
|
|
|
do { \
|
2007-06-17 00:37:39 +02:00
|
|
|
if ((nr) > alloc) { \
|
Extend --pretty=oneline to cover the first paragraph,
so that an ugly commit message like this can be
handled sanely.
Currently, --pretty=oneline and --pretty=email (hence
format-patch) take and use only the first line of the commit log
message. This changes them to:
- Take the first paragraph, where the definition of the first
paragraph is "skip all blank lines from the beginning, and
then grab everything up to the next empty line".
- Replace all line breaks with a whitespace.
This change would not affect a well-behaved commit message that
adheres to the convention of "single line summary, a blank line,
and then body of message", as its first paragraph always
consists of a single line. Commit messages from different
culture, such as the ones imported from CVS/SVN, can however get
chomped with the existing behaviour at the first linebreak in
the middle of sentence right now, which would become much easier
to see with this change.
The Subject: and --pretty=oneline output would become very long
and unsightly for non-conforming commits, but their messages are
already ugly anyway, and thischange at least avoids the loss of
information.
The Subject: line from a multi-line paragraph is folded using
RFC2822 line folding rules at the places where line breaks were
in the original.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-06-12 07:10:55 +02:00
|
|
|
if (alloc_nr(alloc) < (nr)) \
|
|
|
|
alloc = (nr); \
|
|
|
|
else \
|
|
|
|
alloc = alloc_nr(alloc); \
|
2014-09-16 20:56:57 +02:00
|
|
|
REALLOC_ARRAY(x, alloc); \
|
2007-06-11 15:39:44 +02:00
|
|
|
} \
|
2010-08-13 00:11:15 +02:00
|
|
|
} while (0)
|
2007-06-11 15:39:44 +02:00
|
|
|
|
2005-04-09 18:48:20 +02:00
|
|
|
/* Initialize and use the cache information */
|
2014-06-13 14:19:23 +02:00
|
|
|
struct lock_file;
|
2007-04-02 08:26:07 +02:00
|
|
|
extern int read_index(struct index_state *);
|
2013-07-14 10:35:49 +02:00
|
|
|
extern int read_index_preload(struct index_state *, const struct pathspec *pathspec);
|
2014-06-13 14:19:51 +02:00
|
|
|
extern int do_read_index(struct index_state *istate, const char *path,
|
|
|
|
int must_exist); /* for testting only! */
|
2007-04-02 08:26:07 +02:00
|
|
|
extern int read_index_from(struct index_state *, const char *path);
|
checkout: Fix "initial checkout" detection
Earlier commit 5521883 (checkout: do not lose staged removal, 2008-09-07)
tightened the rule to prevent switching branches from losing local
changes, so that staged removal of paths can be protected, while
attempting to keep a loophole to still allow a special case of switching
out of an un-checked-out state.
However, the loophole was made a bit too tight, and did not allow
switching from one branch (in an un-checked-out state) to check out
another branch.
The change to builtin-checkout.c in this commit loosens it to allow this,
by not insisting the original commit and the new commit to be the same.
It also introduces a new function, is_index_unborn (and an associated
macro, is_cache_unborn), to check if the repository is truly in an
un-checked-out state more reliably, by making sure that $GIT_INDEX_FILE
did not exist when populating the in-core index structure. A few places
the earlier commit 5521883 added the check for the initial checkout
condition are updated to use this function.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-11-12 20:52:35 +01:00
|
|
|
extern int is_index_unborn(struct index_state *);
|
2008-06-27 18:21:58 +02:00
|
|
|
extern int read_index_unmerged(struct index_state *);
|
2014-06-13 14:19:23 +02:00
|
|
|
#define COMMIT_LOCK (1 << 0)
|
|
|
|
#define CLOSE_LOCK (1 << 1)
|
|
|
|
extern int write_locked_index(struct index_state *, struct lock_file *lock, unsigned flags);
|
2007-04-02 08:26:07 +02:00
|
|
|
extern int discard_index(struct index_state *);
|
2008-03-06 21:46:09 +01:00
|
|
|
extern int unmerged_index(const struct index_state *);
|
2006-05-18 21:07:31 +02:00
|
|
|
extern int verify_path(const char *path);
|
2013-09-17 09:06:14 +02:00
|
|
|
extern struct cache_entry *index_dir_exists(struct index_state *istate, const char *name, int namelen);
|
|
|
|
extern struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int igncase);
|
2008-03-06 21:46:09 +01:00
|
|
|
extern int index_name_pos(const struct index_state *, const char *name, int namelen);
|
2005-05-08 06:55:21 +02:00
|
|
|
#define ADD_CACHE_OK_TO_ADD 1 /* Ok to add */
|
|
|
|
#define ADD_CACHE_OK_TO_REPLACE 2 /* Ok to replace file/directory */
|
2005-06-25 11:25:29 +02:00
|
|
|
#define ADD_CACHE_SKIP_DFCHECK 4 /* Ok to skip DF conflict checks */
|
2007-08-09 22:42:50 +02:00
|
|
|
#define ADD_CACHE_JUST_APPEND 8 /* Append only; tree.c::read_tree() */
|
2008-08-21 10:44:53 +02:00
|
|
|
#define ADD_CACHE_NEW_ONLY 16 /* Do not replace existing ones */
|
2014-06-13 14:19:42 +02:00
|
|
|
#define ADD_CACHE_KEEP_CACHE_TREE 32 /* Do not invalidate cache-tree */
|
2007-04-02 08:26:07 +02:00
|
|
|
extern int add_index_entry(struct index_state *, struct cache_entry *ce, int option);
|
2008-07-21 02:25:56 +02:00
|
|
|
extern void rename_index_entry_at(struct index_state *, int pos, const char *new_name);
|
2007-04-02 08:26:07 +02:00
|
|
|
extern int remove_index_entry_at(struct index_state *, int pos);
|
check_updates(): effective removal of cache entries marked CE_REMOVE
Below is oprofile output from GIT command 'git chekcout -q my-v2.6.25'
(move from tag v2.6.27 to tag v2.6.25 of the Linux kernel):
CPU: Core 2, speed 1999.95 MHz (estimated)
Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit
mask of 0x00 (Unhalted core cycles) count 20000
Counted INST_RETIRED_ANY_P events (number of instructions retired) with a
unit mask of 0x00 (No unit mask) count 20000
CPU_CLK_UNHALT...|INST_RETIRED:2...|
samples| %| samples| %|
------------------------------------
409247 100.000 342878 100.000 git
CPU_CLK_UNHALT...|INST_RETIRED:2...|
samples| %| samples| %|
------------------------------------
260476 63.6476 257843 75.1996 libz.so.1.2.3
100876 24.6492 64378 18.7758 kernel-2.6.28.4_2.vmlinux
30850 7.5382 7874 2.2964 libc-2.9.so
14775 3.6103 8390 2.4469 git
2020 0.4936 4325 1.2614 libcrypto.so.0.9.8
191 0.0467 32 0.0093 libpthread-2.9.so
58 0.0142 36 0.0105 ld-2.9.so
1 2.4e-04 0 0 libldap-2.3.so.0.2.31
Detail list of the top 20 function entries (libz counted in one blob):
CPU_CLK_UNHALTED INST_RETIRED_ANY_P
samples % samples % image name symbol name
260476 63.6862 257843 75.2725 libz.so.1.2.3 /lib/libz.so.1.2.3
16587 4.0555 3636 1.0615 libc-2.9.so memcpy
7710 1.8851 277 0.0809 libc-2.9.so memmove
3679 0.8995 1108 0.3235 kernel-2.6.28.4_2.vmlinux d_validate
3546 0.8670 2607 0.7611 kernel-2.6.28.4_2.vmlinux __getblk
3174 0.7760 1813 0.5293 libc-2.9.so _int_malloc
2396 0.5858 3681 1.0746 kernel-2.6.28.4_2.vmlinux copy_to_user
2270 0.5550 2528 0.7380 kernel-2.6.28.4_2.vmlinux __link_path_walk
2205 0.5391 1797 0.5246 kernel-2.6.28.4_2.vmlinux ext4_mark_iloc_dirty
2103 0.5142 1203 0.3512 kernel-2.6.28.4_2.vmlinux find_first_zero_bit
2077 0.5078 997 0.2911 kernel-2.6.28.4_2.vmlinux do_get_write_access
2070 0.5061 514 0.1501 git cache_name_compare
2043 0.4995 1501 0.4382 kernel-2.6.28.4_2.vmlinux rcu_irq_exit
2022 0.4944 1732 0.5056 kernel-2.6.28.4_2.vmlinux __ext4_get_inode_loc
2020 0.4939 4325 1.2626 libcrypto.so.0.9.8 /usr/lib/libcrypto.so.0.9.8
1965 0.4804 1384 0.4040 git patch_delta
1708 0.4176 984 0.2873 kernel-2.6.28.4_2.vmlinux rcu_sched_grace_period
1682 0.4112 727 0.2122 kernel-2.6.28.4_2.vmlinux sysfs_slab_alias
1659 0.4056 290 0.0847 git find_pack_entry_one
1480 0.3619 1307 0.3816 kernel-2.6.28.4_2.vmlinux ext4_writepage_trans_blocks
Notice the memmove line, where the CPU did 7710 / 277 = 27.8 cycles
per instruction, and compared to the total cycles spent inside the
source code of GIT for this command, all the memmove() calls
translates to (7710 * 100) / 14775 = 52.2% of this.
Retesting with a GIT program compiled for gcov usage, I found out that
the memmove() calls came from remove_index_entry_at() in read-cache.c,
where we have:
memmove(istate->cache + pos,
istate->cache + pos + 1,
(istate->cache_nr - pos) * sizeof(struct cache_entry *));
remove_index_entry_at() is called 4902 times from check_updates() in
unpack-trees.c, and each time called we move each cache_entry pointers
(from the removed one) one step to the left.
Since we have 28828 entries in the cache this time, and if we on
average move half of them each time, we in total move approximately
4902 * 0.5 * 28828 * 4 = 282 629 712 bytes, or twice this amount if
each pointer is 8 bytes (64 bit).
OK, is seems that the function check_updates() is called 28 times, so
the estimated guess above had been more correct if check_updates() had
been called only once, but the point is: we get lots of bytes moved.
To fix this, and use an O(N) algorithm instead, where N is the number
of cache_entries, we delete/remove all entries in one loop through all
entries.
From a retest, the new remove_marked_cache_entries() from the patch
below, ended up with the following output line from oprofile:
46 0.0105 15 0.0041 git remove_marked_cache_entries
If we can trust the numbers from oprofile in this case, we saved
approximately ((7710 - 46) * 20000) / (2 * 1000 * 1000 * 1000) = 0.077
seconds CPU time with this fix for this particular test. And notice
that now the CPU did only 46 / 15 = 3.1 cycles/instruction.
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-02-18 23:18:03 +01:00
|
|
|
extern void remove_marked_cache_entries(struct index_state *istate);
|
2007-04-02 08:26:07 +02:00
|
|
|
extern int remove_file_from_index(struct index_state *, const char *path);
|
2008-05-21 21:04:34 +02:00
|
|
|
#define ADD_CACHE_VERBOSE 1
|
|
|
|
#define ADD_CACHE_PRETEND 2
|
2008-05-25 23:03:50 +02:00
|
|
|
#define ADD_CACHE_IGNORE_ERRORS 4
|
2008-07-21 10:24:17 +02:00
|
|
|
#define ADD_CACHE_IGNORE_REMOVAL 8
|
2008-08-21 10:44:53 +02:00
|
|
|
#define ADD_CACHE_INTENT 16
|
2008-05-21 21:04:34 +02:00
|
|
|
extern int add_to_index(struct index_state *, const char *path, struct stat *, int flags);
|
|
|
|
extern int add_file_to_index(struct index_state *, const char *path, int flags);
|
2014-01-27 15:45:08 +01:00
|
|
|
extern struct cache_entry *make_cache_entry(unsigned int mode, const unsigned char *sha1, const char *path, int stage, unsigned int refresh_options);
|
Convert "struct cache_entry *" to "const ..." wherever possible
I attempted to make index_state->cache[] a "const struct cache_entry **"
to find out how existing entries in index are modified and where. The
question I have is what do we do if we really need to keep track of on-disk
changes in the index. The result is
- diff-lib.c: setting CE_UPTODATE
- name-hash.c: setting CE_HASHED
- preload-index.c, read-cache.c, unpack-trees.c and
builtin/update-index: obvious
- entry.c: write_entry() may refresh the checked out entry via
fill_stat_cache_info(). This causes "non-const struct cache_entry
*" in builtin/apply.c, builtin/checkout-index.c and
builtin/checkout.c
- builtin/ls-files.c: --with-tree changes stagemask and may set
CE_UPDATE
Of these, write_entry() and its call sites are probably most
interesting because it modifies on-disk info. But this is stat info
and can be retrieved via refresh, at least for porcelain
commands. Other just uses ce_flags for local purposes.
So, keeping track of "dirty" entries is just a matter of setting a
flag in index modification functions exposed by read-cache.c. Except
unpack-trees, the rest of the code base does not do anything funny
behind read-cache's back.
The actual patch is less valueable than the summary above. But if
anyone wants to re-identify the above sites. Applying this patch, then
this:
diff --git a/cache.h b/cache.h
index 430d021..1692891 100644
--- a/cache.h
+++ b/cache.h
@@ -267,7 +267,7 @@ static inline unsigned int canon_mode(unsigned int mode)
#define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1)
struct index_state {
- struct cache_entry **cache;
+ const struct cache_entry **cache;
unsigned int version;
unsigned int cache_nr, cache_alloc, cache_changed;
struct string_list *resolve_undo;
will help quickly identify them without bogus warnings.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-09 17:29:00 +02:00
|
|
|
extern int ce_same_name(const struct cache_entry *a, const struct cache_entry *b);
|
2014-02-04 03:20:09 +01:00
|
|
|
extern void set_object_name_for_intent_to_add_entry(struct cache_entry *ce);
|
2008-10-16 17:07:26 +02:00
|
|
|
extern int index_name_is_other(const struct index_state *, const char *, int);
|
2013-04-13 15:28:31 +02:00
|
|
|
extern void *read_blob_data_from_index(struct index_state *, const char *, unsigned long *);
|
2007-11-10 09:15:03 +01:00
|
|
|
|
|
|
|
/* do stat comparison even if CE_VALID is true */
|
|
|
|
#define CE_MATCH_IGNORE_VALID 01
|
|
|
|
/* do not check the contents but report dirty on racily-clean entries */
|
2009-12-14 12:43:58 +01:00
|
|
|
#define CE_MATCH_RACY_IS_DIRTY 02
|
|
|
|
/* do stat comparison even if CE_SKIP_WORKTREE is true */
|
|
|
|
#define CE_MATCH_IGNORE_SKIP_WORKTREE 04
|
2014-01-27 15:45:07 +01:00
|
|
|
/* ignore non-existent files during stat update */
|
|
|
|
#define CE_MATCH_IGNORE_MISSING 0x08
|
2014-01-27 15:45:08 +01:00
|
|
|
/* enable stat refresh */
|
|
|
|
#define CE_MATCH_REFRESH 0x10
|
2013-06-02 17:46:52 +02:00
|
|
|
extern int ie_match_stat(const struct index_state *, const struct cache_entry *, struct stat *, unsigned int);
|
|
|
|
extern int ie_modified(const struct index_state *, const struct cache_entry *, struct stat *, unsigned int);
|
2007-11-10 09:15:03 +01:00
|
|
|
|
2011-05-08 10:47:33 +02:00
|
|
|
#define HASH_WRITE_OBJECT 1
|
|
|
|
#define HASH_FORMAT_CHECK 2
|
|
|
|
extern int index_fd(unsigned char *sha1, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags);
|
|
|
|
extern int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags);
|
2013-06-20 10:37:50 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Record to sd the data from st that we use to check whether a file
|
|
|
|
* might have changed.
|
|
|
|
*/
|
|
|
|
extern void fill_stat_data(struct stat_data *sd, struct stat *st);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Return 0 if st is consistent with a file not having been changed
|
|
|
|
* since sd was filled. If there are differences, return a
|
|
|
|
* combination of MTIME_CHANGED, CTIME_CHANGED, OWNER_CHANGED,
|
|
|
|
* INODE_CHANGED, and DATA_CHANGED.
|
|
|
|
*/
|
|
|
|
extern int match_stat_data(const struct stat_data *sd, struct stat *st);
|
|
|
|
|
2005-05-15 23:23:12 +02:00
|
|
|
extern void fill_stat_cache_info(struct cache_entry *ce, struct stat *st);
|
|
|
|
|
2006-05-19 18:56:35 +02:00
|
|
|
#define REFRESH_REALLY 0x0001 /* ignore_valid */
|
|
|
|
#define REFRESH_UNMERGED 0x0002 /* allow unmerged */
|
|
|
|
#define REFRESH_QUIET 0x0004 /* be quiet about it */
|
|
|
|
#define REFRESH_IGNORE_MISSING 0x0008 /* ignore non-existent */
|
2008-07-20 08:25:00 +02:00
|
|
|
#define REFRESH_IGNORE_SUBMODULES 0x0010 /* ignore submodules */
|
2009-08-21 10:57:58 +02:00
|
|
|
#define REFRESH_IN_PORCELAIN 0x0020 /* user friendly output, not "needs update" */
|
2013-07-14 10:35:54 +02:00
|
|
|
extern int refresh_index(struct index_state *, unsigned int flags, const struct pathspec *pathspec, char *seen, const char *header_msg);
|
2006-05-19 18:56:35 +02:00
|
|
|
|
2011-03-21 18:16:10 +01:00
|
|
|
extern void update_index_if_able(struct index_state *, struct lock_file *);
|
_GIT_INDEX_OUTPUT: allow plumbing to output to an alternative index file.
When defined, this allows plumbing commands that update the
index (add, apply, checkout-index, merge-recursive, mv,
read-tree, rm, update-index, and write-tree) to write their
resulting index to an alternative index file while holding a
lock to the original index file. With this, git-commit that
jumps the index does not have to make an extra copy of the index
file, and more importantly, it can do the update while holding
the lock on the index.
However, I think the interface to let an environment variable
specify the output is a mistake, as shown in the documentation.
If a curious user has the environment variable set to something
other than the file GIT_INDEX_FILE points at, almost everything
will break. This should instead be a command line parameter to
tell these plumbing commands to write the result in the named
file, to prevent stupid mistakes.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-04-01 08:09:02 +02:00
|
|
|
|
|
|
|
extern int hold_locked_index(struct lock_file *, int);
|
2007-04-01 08:27:41 +02:00
|
|
|
extern void set_alternate_index_output(const char *);
|
2014-10-01 12:28:42 +02:00
|
|
|
|
2015-02-17 18:00:13 +01:00
|
|
|
extern int delete_ref(const char *, const unsigned char *sha1, unsigned int flags);
|
2005-04-09 18:48:20 +02:00
|
|
|
|
2006-02-27 23:47:45 +01:00
|
|
|
/* Environment bits from configuration mechanism */
|
2005-10-11 01:31:08 +02:00
|
|
|
extern int trust_executable_bit;
|
2008-07-28 08:31:28 +02:00
|
|
|
extern int trust_ctime;
|
2013-01-22 08:49:22 +01:00
|
|
|
extern int check_stat;
|
2007-06-25 00:11:24 +02:00
|
|
|
extern int quote_path_fully;
|
2007-03-02 22:11:30 +01:00
|
|
|
extern int has_symlinks;
|
2010-10-28 20:28:04 +02:00
|
|
|
extern int minimum_abbrev, default_abbrev;
|
2008-03-22 00:52:46 +01:00
|
|
|
extern int ignore_case;
|
2006-02-09 06:15:24 +01:00
|
|
|
extern int assume_unchanged;
|
2006-05-02 09:40:24 +02:00
|
|
|
extern int prefer_symlink_refs;
|
2006-05-17 11:55:40 +02:00
|
|
|
extern int log_all_ref_updates;
|
2006-03-21 03:45:47 +01:00
|
|
|
extern int warn_ambiguous_refs;
|
cat-file: disable object/refname ambiguity check for batch mode
A common use of "cat-file --batch-check" is to feed a list
of objects from "rev-list --objects" or a similar command.
In this instance, all of our input objects are 40-byte sha1
ids. However, cat-file has always allowed arbitrary revision
specifiers, and feeds the result to get_sha1().
Fortunately, get_sha1() recognizes a 40-byte sha1 before
doing any hard work trying to look up refs, meaning this
scenario should end up spending very little time converting
the input into an object sha1. However, since 798c35f
(get_sha1: warn about full or short object names that look
like refs, 2013-05-29), when we encounter this case, we
spend the extra effort to do a refname lookup anyway, just
to print a warning. This is further exacerbated by ca91993
(get_packed_ref_cache: reload packed-refs file when it
changes, 2013-06-20), which makes individual ref lookup more
expensive by requiring a stat() of the packed-refs file for
each missing ref.
With no patches, this is the time it takes to run:
$ git rev-list --objects --all >objects
$ time git cat-file --batch-check='%(objectname)' <objects
on the linux.git repository:
real 1m13.494s
user 0m25.924s
sys 0m47.532s
If we revert ca91993, the packed-refs up-to-date check, it
gets a little better:
real 0m54.697s
user 0m21.692s
sys 0m32.916s
but we are still spending quite a bit of time on ref lookup
(and we would not want to revert that patch, anyway, which
has correctness issues). If we revert 798c35f, disabling
the warning entirely, we get a much more reasonable time:
real 0m7.452s
user 0m6.836s
sys 0m0.608s
This patch does the moral equivalent of this final case (and
gets similar speedups). We introduce a global flag that
callers of get_sha1() can use to avoid paying the price for
the warning.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-12 08:20:05 +02:00
|
|
|
extern int warn_on_object_refname_ambiguity;
|
2005-12-22 23:13:56 +01:00
|
|
|
extern int shared_repository;
|
2006-02-27 23:47:45 +01:00
|
|
|
extern const char *apply_default_whitespace;
|
2009-08-04 13:16:49 +02:00
|
|
|
extern const char *apply_default_ignorewhitespace;
|
2011-10-06 20:22:24 +02:00
|
|
|
extern const char *git_attributes_file;
|
2006-07-03 22:11:47 +02:00
|
|
|
extern int zlib_compression_level;
|
Custom compression levels for objects and packs
Add config variables pack.compression and core.loosecompression ,
and switch --compression=level to pack-objects.
Loose objects will be compressed using core.loosecompression if set,
else core.compression if set, else Z_BEST_SPEED.
Packed objects will be compressed using --compression=level if seen,
else pack.compression if set, else core.compression if set,
else Z_DEFAULT_COMPRESSION. This is the "pack compression level".
Loose objects added to a pack undeltified will be recompressed
to the pack compression level if it is unequal to the current
loose compression level by the preceding rules, or if the loose
object was written while core.legacyheaders = true. Newly
deltified loose objects are always compressed to the current
pack compression level.
Previously packed objects added to a pack are recompressed
to the current pack compression level exactly when their
deltification status changes, since the previous pack data
cannot be reused.
In either case, the --no-reuse-object switch from the first
patch below will always force recompression to the current pack
compression level, instead of assuming the pack compression level
hasn't changed and pack data can be reused when possible.
This applies on top of the following patches from Nicolas Pitre:
[PATCH] allow for undeltified objects not to be reused
[PATCH] make "repack -f" imply "pack-objects --no-reuse-object"
Signed-off-by: Dana L. How <danahow@gmail.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-05-09 22:56:50 +02:00
|
|
|
extern int core_compression_level;
|
|
|
|
extern int core_compression_seen;
|
2006-12-23 08:34:28 +01:00
|
|
|
extern size_t packed_git_window_size;
|
2006-12-23 08:33:35 +01:00
|
|
|
extern size_t packed_git_limit;
|
2007-03-19 06:14:37 +01:00
|
|
|
extern size_t delta_base_cache_limit;
|
2011-04-05 19:44:11 +02:00
|
|
|
extern unsigned long big_file_threshold;
|
2011-10-28 23:48:40 +02:00
|
|
|
extern unsigned long pack_size_limit_cfg;
|
2014-02-18 12:24:55 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Do replace refs need to be checked this run? This variable is
|
|
|
|
* initialized to true unless --no-replace-object is used or
|
|
|
|
* $GIT_NO_REPLACE_OBJECTS is set, but is set to false by some
|
|
|
|
* commands that do not want replace references to be active. As an
|
|
|
|
* optimization it is also set to false if replace references have
|
|
|
|
* been sought but there were none.
|
|
|
|
*/
|
|
|
|
extern int check_replace_refs;
|
|
|
|
|
2008-06-19 00:18:44 +02:00
|
|
|
extern int fsync_object_files;
|
2008-11-14 01:36:30 +01:00
|
|
|
extern int core_preload_index;
|
2009-08-20 15:47:08 +02:00
|
|
|
extern int core_apply_sparse_checkout;
|
git on Mac OS and precomposed unicode
Mac OS X mangles file names containing unicode on file systems HFS+,
VFAT or SAMBA. When a file using unicode code points outside ASCII
is created on a HFS+ drive, the file name is converted into
decomposed unicode and written to disk. No conversion is done if
the file name is already decomposed unicode.
Calling open("\xc3\x84", ...) with a precomposed "Ä" yields the same
result as open("\x41\xcc\x88",...) with a decomposed "Ä".
As a consequence, readdir() returns the file names in decomposed
unicode, even if the user expects precomposed unicode. Unlike on
HFS+, Mac OS X stores files on a VFAT drive (e.g. an USB drive) in
precomposed unicode, but readdir() still returns file names in
decomposed unicode. When a git repository is stored on a network
share using SAMBA, file names are send over the wire and written to
disk on the remote system in precomposed unicode, but Mac OS X
readdir() returns decomposed unicode to be compatible with its
behaviour on HFS+ and VFAT.
The unicode decomposition causes many problems:
- The names "git add" and other commands get from the end user may
often be precomposed form (the decomposed form is not easily input
from the keyboard), but when the commands read from the filesystem
to see what it is going to update the index with already is on the
filesystem, readdir() will give decomposed form, which is different.
- Similarly "git log", "git mv" and all other commands that need to
compare pathnames found on the command line (often but not always
precomposed form; a command line input resulting from globbing may
be in decomposed) with pathnames found in the tree objects (should
be precomposed form to be compatible with other systems and for
consistency in general).
- The same for names stored in the index, which should be
precomposed, that may need to be compared with the names read from
readdir().
NFS mounted from Linux is fully transparent and does not suffer from
the above.
As Mac OS X treats precomposed and decomposed file names as equal,
we can
- wrap readdir() on Mac OS X to return the precomposed form, and
- normalize decomposed form given from the command line also to the
precomposed form,
to ensure that all pathnames used in Git are always in the
precomposed form. This behaviour can be requested by setting
"core.precomposedunicode" configuration variable to true.
The code in compat/precomposed_utf8.c implements basically 4 new
functions: precomposed_utf8_opendir(), precomposed_utf8_readdir(),
precomposed_utf8_closedir() and precompose_argv(). The first three
are to wrap opendir(3), readdir(3), and closedir(3) functions.
The argv[] conversion allows to use the TAB filename completion done
by the shell on command line. It tolerates other tools which use
readdir() to feed decomposed file names into git.
When creating a new git repository with "git init" or "git clone",
"core.precomposedunicode" will be set "false".
The user needs to activate this feature manually. She typically
sets core.precomposedunicode to "true" on HFS and VFAT, or file
systems mounted via SAMBA.
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-07-08 15:50:25 +02:00
|
|
|
extern int precomposed_unicode;
|
2014-12-16 00:15:20 +01:00
|
|
|
extern int protect_hfs;
|
2014-12-16 23:46:59 +01:00
|
|
|
extern int protect_ntfs;
|
$GIT_COMMON_DIR: a new environment variable
This variable is intended to support multiple working directories
attached to a repository. Such a repository may have a main working
directory, created by either "git init" or "git clone" and one or more
linked working directories. These working directories and the main
repository share the same repository directory.
In linked working directories, $GIT_COMMON_DIR must be defined to point
to the real repository directory and $GIT_DIR points to an unused
subdirectory inside $GIT_COMMON_DIR. File locations inside the
repository are reorganized from the linked worktree view point:
- worktree-specific such as HEAD, logs/HEAD, index, other top-level
refs and unrecognized files are from $GIT_DIR.
- the rest like objects, refs, info, hooks, packed-refs, shallow...
are from $GIT_COMMON_DIR (except info/sparse-checkout, but that's
a separate patch)
Scripts are supposed to retrieve paths in $GIT_DIR with "git rev-parse
--git-path", which will take care of "$GIT_DIR vs $GIT_COMMON_DIR"
business.
The redirection is done by git_path(), git_pathdup() and
strbuf_git_path(). The selected list of paths goes to $GIT_COMMON_DIR,
not the other way around in case a developer adds a new
worktree-specific file and it's accidentally promoted to be shared
across repositories (this includes unknown files added by third party
commands)
The list of known files that belong to $GIT_DIR are:
ADD_EDIT.patch BISECT_ANCESTORS_OK BISECT_EXPECTED_REV BISECT_LOG
BISECT_NAMES CHERRY_PICK_HEAD COMMIT_MSG FETCH_HEAD HEAD MERGE_HEAD
MERGE_MODE MERGE_RR NOTES_EDITMSG NOTES_MERGE_WORKTREE ORIG_HEAD
REVERT_HEAD SQUASH_MSG TAG_EDITMSG fast_import_crash_* logs/HEAD
next-index-* rebase-apply rebase-merge rsync-refs-* sequencer/*
shallow_*
Path mapping is NOT done for git_path_submodule(). Multi-checkouts are
not supported as submodules.
Helped-by: Jens Lehmann <Jens.Lehmann@web.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-11-30 09:24:36 +01:00
|
|
|
extern int git_db_env, git_index_env, git_graft_env, git_common_dir_env;
|
2005-10-11 01:31:08 +02:00
|
|
|
|
2015-03-20 19:43:06 +01:00
|
|
|
/*
|
|
|
|
* Include broken refs in all ref iterations, which will
|
|
|
|
* generally choke dangerous operations rather than letting
|
|
|
|
* them silently proceed without taking the broken ref into
|
|
|
|
* account.
|
|
|
|
*/
|
|
|
|
extern int ref_paranoia;
|
|
|
|
|
2013-01-16 20:18:48 +01:00
|
|
|
/*
|
|
|
|
* The character that begins a commented line in user-editable file
|
|
|
|
* that is subject to stripspace.
|
|
|
|
*/
|
|
|
|
extern char comment_line_char;
|
2014-05-17 03:52:23 +02:00
|
|
|
extern int auto_comment_line_char;
|
2013-01-16 20:18:48 +01:00
|
|
|
|
2008-02-19 17:24:37 +01:00
|
|
|
enum branch_track {
|
2008-08-21 19:23:20 +02:00
|
|
|
BRANCH_TRACK_UNSPECIFIED = -1,
|
2008-02-19 17:24:37 +01:00
|
|
|
BRANCH_TRACK_NEVER = 0,
|
|
|
|
BRANCH_TRACK_REMOTE,
|
|
|
|
BRANCH_TRACK_ALWAYS,
|
|
|
|
BRANCH_TRACK_EXPLICIT,
|
2010-05-14 11:31:35 +02:00
|
|
|
BRANCH_TRACK_OVERRIDE
|
2008-02-19 17:24:37 +01:00
|
|
|
};
|
|
|
|
|
2008-05-11 00:36:29 +02:00
|
|
|
enum rebase_setup_type {
|
|
|
|
AUTOREBASE_NEVER = 0,
|
|
|
|
AUTOREBASE_LOCAL,
|
|
|
|
AUTOREBASE_REMOTE,
|
2010-05-14 11:31:35 +02:00
|
|
|
AUTOREBASE_ALWAYS
|
2008-05-11 00:36:29 +02:00
|
|
|
};
|
|
|
|
|
2009-03-16 16:42:51 +01:00
|
|
|
enum push_default_type {
|
|
|
|
PUSH_DEFAULT_NOTHING = 0,
|
|
|
|
PUSH_DEFAULT_MATCHING,
|
2012-04-24 09:50:03 +02:00
|
|
|
PUSH_DEFAULT_SIMPLE,
|
2011-02-16 01:54:24 +01:00
|
|
|
PUSH_DEFAULT_UPSTREAM,
|
push: Provide situational hints for non-fast-forward errors
Pushing a non-fast-forward update to a remote repository will result in
an error, but the hint text doesn't provide the correct resolution in
every case. Give better resolution advice in three push scenarios:
1) If you push your current branch and it triggers a non-fast-forward
error, you should merge remote changes with 'git pull' before pushing
again.
2) If you push to a shared repository others push to, and your local
tracking branches are not kept up to date, the 'matching refs' default
will generate non-fast-forward errors on outdated branches. If this is
your workflow, the 'matching refs' default is not for you. Consider
setting the 'push.default' configuration variable to 'current' or
'upstream' to ensure only your current branch is pushed.
3) If you explicitly specify a ref that is not your current branch or
push matching branches with ':', you will generate a non-fast-forward
error if any pushed branch tip is out of date. You should checkout the
offending branch and merge remote changes before pushing again.
Teach transport.c to recognize these scenarios and configure push.c
to hint for them. If 'git push's default behavior changes or we
discover more scenarios, extension is easy. Standardize on the
advice API and add three new advice variables, 'pushNonFFCurrent',
'pushNonFFDefault', and 'pushNonFFMatching'. Setting any of these
to 'false' will disable their affiliated advice. Setting
'pushNonFastForward' to false will disable all three, thus preserving the
config option for users who already set it, but guaranteeing new
users won't disable push advice accidentally.
Based-on-patch-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Christopher Tiwald <christiwald@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-03-20 05:31:33 +01:00
|
|
|
PUSH_DEFAULT_CURRENT,
|
|
|
|
PUSH_DEFAULT_UNSPECIFIED
|
2009-03-16 16:42:51 +01:00
|
|
|
};
|
|
|
|
|
2008-02-19 17:24:37 +01:00
|
|
|
extern enum branch_track git_branch_track;
|
2008-05-11 00:36:29 +02:00
|
|
|
extern enum rebase_setup_type autorebase;
|
2009-03-16 16:42:51 +01:00
|
|
|
extern enum push_default_type push_default;
|
2008-02-19 17:24:37 +01:00
|
|
|
|
2009-04-28 00:32:25 +02:00
|
|
|
enum object_creation_mode {
|
|
|
|
OBJECT_CREATION_USES_HARDLINKS = 0,
|
2010-05-14 11:31:35 +02:00
|
|
|
OBJECT_CREATION_USES_RENAMES = 1
|
2009-04-28 00:32:25 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
extern enum object_creation_mode object_creation_mode;
|
2009-04-25 11:57:14 +02:00
|
|
|
|
2009-10-09 12:21:57 +02:00
|
|
|
extern char *notes_ref_name;
|
|
|
|
|
2009-07-23 17:33:49 +02:00
|
|
|
extern int grafts_replace_parents;
|
|
|
|
|
2005-11-26 00:59:09 +01:00
|
|
|
#define GIT_REPO_VERSION 0
|
|
|
|
extern int repository_format_version;
|
|
|
|
extern int check_repository_format(void);
|
|
|
|
|
2005-04-09 18:48:20 +02:00
|
|
|
#define MTIME_CHANGED 0x0001
|
|
|
|
#define CTIME_CHANGED 0x0002
|
|
|
|
#define OWNER_CHANGED 0x0004
|
|
|
|
#define MODE_CHANGED 0x0008
|
|
|
|
#define INODE_CHANGED 0x0010
|
|
|
|
#define DATA_CHANGED 0x0020
|
2005-05-05 14:38:25 +02:00
|
|
|
#define TYPE_CHANGED 0x0040
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2008-10-26 22:59:13 +01:00
|
|
|
extern char *mksnpath(char *buf, size_t n, const char *fmt, ...)
|
|
|
|
__attribute__((format (printf, 3, 4)));
|
2014-11-30 09:24:28 +01:00
|
|
|
extern void strbuf_git_path(struct strbuf *sb, const char *fmt, ...)
|
|
|
|
__attribute__((format (printf, 2, 3)));
|
2008-10-27 11:17:51 +01:00
|
|
|
extern char *git_pathdup(const char *fmt, ...)
|
|
|
|
__attribute__((format (printf, 1, 2)));
|
2012-06-22 11:03:23 +02:00
|
|
|
extern char *mkpathdup(const char *fmt, ...)
|
|
|
|
__attribute__((format (printf, 1, 2)));
|
2008-10-26 22:59:13 +01:00
|
|
|
|
2005-04-08 00:13:13 +02:00
|
|
|
/* Return a statically allocated filename matching the sha1 signature */
|
2014-11-30 09:24:27 +01:00
|
|
|
extern const char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
|
|
|
|
extern const char *git_path(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
|
|
|
|
extern const char *git_path_submodule(const char *path, const char *fmt, ...)
|
2010-07-07 15:39:11 +02:00
|
|
|
__attribute__((format (printf, 2, 3)));
|
2014-11-30 09:24:54 +01:00
|
|
|
extern void report_linked_checkout_garbage(void);
|
2010-07-07 15:39:11 +02:00
|
|
|
|
2014-02-21 17:32:06 +01:00
|
|
|
/*
|
|
|
|
* Return the name of the file in the local object database that would
|
|
|
|
* be used to store a loose object with the specified sha1. The
|
|
|
|
* return value is a pointer to a statically allocated buffer that is
|
|
|
|
* overwritten each time the function is called.
|
|
|
|
*/
|
2014-02-21 17:32:05 +01:00
|
|
|
extern const char *sha1_file_name(const unsigned char *sha1);
|
2014-02-21 17:32:06 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Return the name of the (local) packfile with the specified sha1 in
|
|
|
|
* its name. The return value is a pointer to memory that is
|
|
|
|
* overwritten each time this function is called.
|
|
|
|
*/
|
2005-08-01 02:53:44 +02:00
|
|
|
extern char *sha1_pack_name(const unsigned char *sha1);
|
2014-02-21 17:32:06 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Return the name of the (local) pack index file with the specified
|
|
|
|
* sha1 in its name. The return value is a pointer to memory that is
|
|
|
|
* overwritten each time this function is called.
|
|
|
|
*/
|
2005-08-01 02:53:44 +02:00
|
|
|
extern char *sha1_pack_index_name(const unsigned char *sha1);
|
2014-02-21 17:32:06 +01:00
|
|
|
|
2005-10-12 00:22:48 +02:00
|
|
|
extern const char *find_unique_abbrev(const unsigned char *sha1, int);
|
2015-03-14 00:39:28 +01:00
|
|
|
extern const unsigned char null_sha1[GIT_SHA1_RAWSZ];
|
2011-04-28 12:19:02 +02:00
|
|
|
|
|
|
|
static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2)
|
2006-08-15 22:37:19 +02:00
|
|
|
{
|
2011-04-28 12:19:02 +02:00
|
|
|
int i;
|
|
|
|
|
2015-03-14 00:39:28 +01:00
|
|
|
for (i = 0; i < GIT_SHA1_RAWSZ; i++, sha1++, sha2++) {
|
2011-04-28 12:19:02 +02:00
|
|
|
if (*sha1 != *sha2)
|
|
|
|
return *sha1 - *sha2;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
2006-08-15 22:37:19 +02:00
|
|
|
}
|
2011-04-28 12:19:02 +02:00
|
|
|
|
2015-03-14 00:39:28 +01:00
|
|
|
static inline int oidcmp(const struct object_id *oid1, const struct object_id *oid2)
|
|
|
|
{
|
|
|
|
return hashcmp(oid1->hash, oid2->hash);
|
|
|
|
}
|
|
|
|
|
2011-04-28 12:19:02 +02:00
|
|
|
static inline int is_null_sha1(const unsigned char *sha1)
|
2006-08-17 20:54:57 +02:00
|
|
|
{
|
2011-04-28 12:19:02 +02:00
|
|
|
return !hashcmp(sha1, null_sha1);
|
2006-08-17 20:54:57 +02:00
|
|
|
}
|
2011-04-28 12:19:02 +02:00
|
|
|
|
2015-03-14 00:39:28 +01:00
|
|
|
static inline int is_null_oid(const struct object_id *oid)
|
|
|
|
{
|
|
|
|
return !hashcmp(oid->hash, null_sha1);
|
|
|
|
}
|
|
|
|
|
2006-08-23 08:49:00 +02:00
|
|
|
static inline void hashcpy(unsigned char *sha_dst, const unsigned char *sha_src)
|
|
|
|
{
|
2015-03-14 00:39:28 +01:00
|
|
|
memcpy(sha_dst, sha_src, GIT_SHA1_RAWSZ);
|
2006-08-23 08:49:00 +02:00
|
|
|
}
|
2015-03-14 00:39:28 +01:00
|
|
|
|
|
|
|
static inline void oidcpy(struct object_id *dst, const struct object_id *src)
|
|
|
|
{
|
|
|
|
hashcpy(dst->hash, src->hash);
|
2006-08-23 08:49:00 +02:00
|
|
|
}
|
2015-03-14 00:39:28 +01:00
|
|
|
|
2006-08-23 22:57:23 +02:00
|
|
|
static inline void hashclr(unsigned char *hash)
|
|
|
|
{
|
2015-03-14 00:39:28 +01:00
|
|
|
memset(hash, 0, GIT_SHA1_RAWSZ);
|
2006-08-23 22:57:23 +02:00
|
|
|
}
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2015-03-14 00:39:28 +01:00
|
|
|
static inline void oidclr(struct object_id *oid)
|
|
|
|
{
|
|
|
|
hashclr(oid->hash);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-11-12 09:17:52 +01:00
|
|
|
#define EMPTY_TREE_SHA1_HEX \
|
|
|
|
"4b825dc642cb6eb9a060e54bf8d69288fbee4904"
|
2011-02-07 09:17:27 +01:00
|
|
|
#define EMPTY_TREE_SHA1_BIN_LITERAL \
|
2008-11-12 09:17:52 +01:00
|
|
|
"\x4b\x82\x5d\xc6\x42\xcb\x6e\xb9\xa0\x60" \
|
|
|
|
"\xe5\x4b\xf8\xd6\x92\x88\xfb\xee\x49\x04"
|
2011-02-07 09:17:27 +01:00
|
|
|
#define EMPTY_TREE_SHA1_BIN \
|
|
|
|
((const unsigned char *) EMPTY_TREE_SHA1_BIN_LITERAL)
|
2008-11-12 09:17:52 +01:00
|
|
|
|
2012-03-22 19:53:39 +01:00
|
|
|
#define EMPTY_BLOB_SHA1_HEX \
|
|
|
|
"e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"
|
|
|
|
#define EMPTY_BLOB_SHA1_BIN_LITERAL \
|
|
|
|
"\xe6\x9d\xe2\x9b\xb2\xd1\xd6\x43\x4b\x8b" \
|
|
|
|
"\x29\xae\x77\x5a\xd8\xc2\xe4\x8c\x53\x91"
|
|
|
|
#define EMPTY_BLOB_SHA1_BIN \
|
|
|
|
((const unsigned char *) EMPTY_BLOB_SHA1_BIN_LITERAL)
|
|
|
|
|
|
|
|
static inline int is_empty_blob_sha1(const unsigned char *sha1)
|
|
|
|
{
|
|
|
|
return !hashcmp(sha1, EMPTY_BLOB_SHA1_BIN);
|
|
|
|
}
|
|
|
|
|
2005-08-04 22:43:03 +02:00
|
|
|
int git_mkstemp(char *path, size_t n, const char *template);
|
|
|
|
|
2009-05-31 10:35:52 +02:00
|
|
|
int git_mkstemps(char *path, size_t n, const char *template, int suffix_len);
|
|
|
|
|
2010-02-22 23:32:13 +01:00
|
|
|
/* set default permissions by passing mode arguments to open(2) */
|
|
|
|
int git_mkstemps_mode(char *pattern, int suffix_len, int mode);
|
|
|
|
int git_mkstemp_mode(char *pattern, int mode);
|
|
|
|
|
2008-04-16 10:34:24 +02:00
|
|
|
/*
|
|
|
|
* NOTE NOTE NOTE!!
|
|
|
|
*
|
|
|
|
* PERM_UMASK, OLD_PERM_GROUP and OLD_PERM_EVERYBODY enumerations must
|
|
|
|
* not be changed. Old repositories have core.sharedrepository written in
|
|
|
|
* numeric format, and therefore these values are preserved for compatibility
|
|
|
|
* reasons.
|
|
|
|
*/
|
2006-06-10 08:09:49 +02:00
|
|
|
enum sharedrepo {
|
2008-04-16 10:34:24 +02:00
|
|
|
PERM_UMASK = 0,
|
|
|
|
OLD_PERM_GROUP = 1,
|
|
|
|
OLD_PERM_EVERYBODY = 2,
|
|
|
|
PERM_GROUP = 0660,
|
2010-05-14 11:31:35 +02:00
|
|
|
PERM_EVERYBODY = 0664
|
2006-06-10 08:09:49 +02:00
|
|
|
};
|
|
|
|
int git_config_perm(const char *var, const char *value);
|
2013-03-30 10:53:32 +01:00
|
|
|
int adjust_shared_perm(const char *path);
|
2014-01-06 14:45:25 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Create the directory containing the named path, using care to be
|
|
|
|
* somewhat safe against races. Return one of the scld_error values
|
|
|
|
* to indicate success/failure.
|
2014-01-06 14:45:27 +01:00
|
|
|
*
|
|
|
|
* SCLD_VANISHED indicates that one of the ancestor directories of the
|
|
|
|
* path existed at one point during the function call and then
|
|
|
|
* suddenly vanished, probably because another process pruned the
|
|
|
|
* directory while we were working. To be robust against this kind of
|
|
|
|
* race, callers might want to try invoking the function again when it
|
|
|
|
* returns SCLD_VANISHED.
|
2014-01-06 14:45:25 +01:00
|
|
|
*/
|
|
|
|
enum scld_error {
|
|
|
|
SCLD_OK = 0,
|
|
|
|
SCLD_FAILED = -1,
|
|
|
|
SCLD_PERMS = -2,
|
2014-01-06 14:45:27 +01:00
|
|
|
SCLD_EXISTS = -3,
|
|
|
|
SCLD_VANISHED = -4
|
2014-01-06 14:45:25 +01:00
|
|
|
};
|
|
|
|
enum scld_error safe_create_leading_directories(char *path);
|
|
|
|
enum scld_error safe_create_leading_directories_const(const char *path);
|
|
|
|
|
2011-03-11 01:02:50 +01:00
|
|
|
int mkdir_in_gitdir(const char *path);
|
2012-06-22 11:03:23 +02:00
|
|
|
extern void home_config_paths(char **global, char **xdg, char *file);
|
2009-11-17 18:24:25 +01:00
|
|
|
extern char *expand_user_path(const char *path);
|
2011-10-04 22:02:00 +02:00
|
|
|
const char *enter_repo(const char *path, int strict);
|
2007-08-01 02:28:59 +02:00
|
|
|
static inline int is_absolute_path(const char *path)
|
|
|
|
{
|
2011-05-27 18:00:38 +02:00
|
|
|
return is_dir_sep(path[0]) || has_dos_drive_prefix(path);
|
2007-08-01 02:28:59 +02:00
|
|
|
}
|
2008-09-09 10:27:07 +02:00
|
|
|
int is_directory(const char *);
|
2011-03-17 12:26:46 +01:00
|
|
|
const char *real_path(const char *path);
|
2012-10-28 17:16:22 +01:00
|
|
|
const char *real_path_if_valid(const char *path);
|
2011-03-17 12:26:46 +01:00
|
|
|
const char *absolute_path(const char *path);
|
2013-10-14 04:29:40 +02:00
|
|
|
const char *remove_leading_path(const char *in, const char *prefix);
|
2013-06-25 17:53:43 +02:00
|
|
|
const char *relative_path(const char *in, const char *prefix, struct strbuf *sb);
|
2013-07-14 10:36:03 +02:00
|
|
|
int normalize_path_copy_len(char *dst, const char *src, int *prefix_len);
|
2009-02-07 16:08:28 +01:00
|
|
|
int normalize_path_copy(char *dst, const char *src);
|
2012-10-28 17:16:24 +01:00
|
|
|
int longest_ancestor_length(const char *path, struct string_list *prefixes);
|
2009-02-19 20:10:49 +01:00
|
|
|
char *strip_path_suffix(const char *path, const char *suffix);
|
2009-11-09 20:26:43 +01:00
|
|
|
int daemon_avoid_alias(const char *path);
|
path: add is_ntfs_dotgit() helper
We do not allow paths with a ".git" component to be added to
the index, as that would mean repository contents could
overwrite our repository files. However, asking "is this
path the same as .git" is not as simple as strcmp() on some
filesystems.
On NTFS (and FAT32), there exist so-called "short names" for
backwards-compatibility: 8.3 compliant names that refer to the same files
as their long names. As ".git" is not an 8.3 compliant name, a short name
is generated automatically, typically "git~1".
Depending on the Windows version, any combination of trailing spaces and
periods are ignored, too, so that both "git~1." and ".git." still refer
to the Git directory. The reason is that 8.3 stores file names shorter
than 8 characters with trailing spaces. So literally, it does not matter
for the short name whether it is padded with spaces or whether it is
shorter than 8 characters, it is considered to be the exact same.
The period is the separator between file name and file extension, and
again, an empty extension consists just of spaces in 8.3 format. So
technically, we would need only take care of the equivalent of this
regex:
(\.git {0,4}|git~1 {0,3})\. {0,3}
However, there are indications that at least some Windows versions might
be more lenient and accept arbitrary combinations of trailing spaces and
periods and strip them out. So we're playing it real safe here. Besides,
there can be little doubt about the intention behind using file names
matching even the more lenient pattern specified above, therefore we
should be fine with disallowing such patterns.
Extra care is taken to catch names such as '.\\.git\\booh' because the
backslash is marked as a directory separator only on Windows, and we want
to use this new helper function also in fsck on other platforms.
A big thank you goes to Ed Thomson and an unnamed Microsoft engineer for
the detailed analysis performed to come up with the corresponding fixes
for libgit2.
This commit adds a function to detect whether a given file name can refer
to the Git directory by mistake.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-12-16 23:31:03 +01:00
|
|
|
extern int is_ntfs_dotgit(const char *name);
|
2005-07-06 10:11:52 +02:00
|
|
|
|
2011-05-15 21:54:53 +02:00
|
|
|
/* object replacement */
|
2013-12-11 08:46:04 +01:00
|
|
|
#define LOOKUP_REPLACE_OBJECT 1
|
2011-05-15 21:54:54 +02:00
|
|
|
extern void *read_sha1_file_extended(const unsigned char *sha1, enum object_type *type, unsigned long *size, unsigned flag);
|
2009-01-23 10:07:01 +01:00
|
|
|
static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size)
|
|
|
|
{
|
2013-12-11 08:46:04 +01:00
|
|
|
return read_sha1_file_extended(sha1, type, size, LOOKUP_REPLACE_OBJECT);
|
2011-05-15 21:54:54 +02:00
|
|
|
}
|
2014-02-28 17:29:16 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This internal function is only declared here for the benefit of
|
|
|
|
* lookup_replace_object(). Please do not call it directly.
|
|
|
|
*/
|
2011-05-15 21:54:53 +02:00
|
|
|
extern const unsigned char *do_lookup_replace_object(const unsigned char *sha1);
|
2014-02-28 17:29:16 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If object sha1 should be replaced, return the replacement object's
|
|
|
|
* name (replaced recursively, if necessary). The return value is
|
|
|
|
* either sha1 or a pointer to a permanently-allocated value. When
|
|
|
|
* object replacement is suppressed, always return sha1.
|
|
|
|
*/
|
2011-05-15 21:54:53 +02:00
|
|
|
static inline const unsigned char *lookup_replace_object(const unsigned char *sha1)
|
|
|
|
{
|
2014-02-18 12:24:55 +01:00
|
|
|
if (!check_replace_refs)
|
2011-05-15 21:54:53 +02:00
|
|
|
return sha1;
|
|
|
|
return do_lookup_replace_object(sha1);
|
2009-01-23 10:07:01 +01:00
|
|
|
}
|
2014-02-28 17:29:16 +01:00
|
|
|
|
2013-12-11 08:46:06 +01:00
|
|
|
static inline const unsigned char *lookup_replace_object_extended(const unsigned char *sha1, unsigned flag)
|
|
|
|
{
|
|
|
|
if (!(flag & LOOKUP_REPLACE_OBJECT))
|
|
|
|
return sha1;
|
|
|
|
return lookup_replace_object(sha1);
|
|
|
|
}
|
2011-05-15 21:54:53 +02:00
|
|
|
|
2005-04-08 00:13:13 +02:00
|
|
|
/* Read and unpack a sha1 file into memory, write memory to a sha1 file */
|
2007-02-26 20:55:59 +01:00
|
|
|
extern int sha1_object_info(const unsigned char *, unsigned long *);
|
2007-03-20 21:02:09 +01:00
|
|
|
extern int hash_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1);
|
2010-04-02 02:03:18 +02:00
|
|
|
extern int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *return_sha1);
|
2007-02-26 20:55:59 +01:00
|
|
|
extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *);
|
2008-05-14 07:32:48 +02:00
|
|
|
extern int force_object_loose(const unsigned char *sha1, time_t mtime);
|
2013-10-24 20:01:47 +02:00
|
|
|
extern int git_open_noatime(const char *name);
|
2011-05-15 04:42:10 +02:00
|
|
|
extern void *map_sha1_file(const unsigned char *sha1, unsigned long *size);
|
2011-07-19 18:33:03 +02:00
|
|
|
extern int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz);
|
2011-05-15 04:42:10 +02:00
|
|
|
extern int parse_sha1_header(const char *hdr, unsigned long *sizep);
|
2005-04-24 03:47:23 +02:00
|
|
|
|
close another possibility for propagating pack corruption
Abstract
--------
With index v2 we have a per object CRC to allow quick and safe reuse of
pack data when repacking. This, however, doesn't currently prevent a
stealth corruption from being propagated into a new pack when _not_
reusing pack data as demonstrated by the modification to t5302 included
here.
The Context
-----------
The Git database is all checksummed with SHA1 hashes. Any kind of
corruption can be confirmed by verifying this per object hash against
corresponding data. However this can be costly to perform systematically
and therefore this check is often not performed at run time when
accessing the object database.
First, the loose object format is entirely compressed with zlib which
already provide a CRC verification of its own when inflating data. Any
disk corruption would be caught already in this case.
Then, packed objects are also compressed with zlib but only for their
actual payload. The object headers and delta base references are not
deflated for obvious performance reasons, however this leave them
vulnerable to potentially undetected disk corruptions. Object types
are often validated against the expected type when they're requested,
and deflated size must always match the size recorded in the object header,
so those cases are pretty much covered as well.
Where corruptions could go unnoticed is in the delta base reference.
Of course, in the OBJ_REF_DELTA case, the odds for a SHA1 reference to
get corrupted so it actually matches the SHA1 of another object with the
same size (the delta header stores the expected size of the base object
to apply against) are virtually zero. In the OBJ_OFS_DELTA case, the
reference is a pack offset which would have to match the start boundary
of a different base object but still with the same size, and although this
is relatively much more "probable" than in the OBJ_REF_DELTA case, the
probability is also about zero in absolute terms. Still, the possibility
exists as demonstrated in t5302 and is certainly greater than a SHA1
collision, especially in the OBJ_OFS_DELTA case which is now the default
when repacking.
Again, repacking by reusing existing pack data is OK since the per object
CRC provided by index v2 guards against any such corruptions. What t5302
failed to test is a full repack in such case.
The Solution
------------
As unlikely as this kind of stealth corruption can be in practice, it
certainly isn't acceptable to propagate it into a freshly created pack.
But, because this is so unlikely, we don't want to pay the run time cost
associated with extra validation checks all the time either. Furthermore,
consequences of such corruption in anything but repacking should be rather
visible, and even if it could be quite unpleasant, it still has far less
severe consequences than actively creating bad packs.
So the best compromize is to check packed object CRC when unpacking
objects, and only during the compression/writing phase of a repack, and
only when not streaming the result. The cost of this is minimal (less
than 1% CPU time), and visible only with a full repack.
Someone with a stats background could provide an objective evaluation of
this, but I suspect that it's bad RAM that has more potential for data
corruptions at this point, even in those cases where this extra check
is not performed. Still, it is best to prevent a known hole for
corruption when recreating object data into a new pack.
What about the streamed pack case? Well, any client receiving a pack
must always consider that pack as untrusty and perform full validation
anyway, hence no such stealth corruption could be propagated to remote
repositoryes already. It is therefore worthless doing local validation
in that case.
Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-10-31 16:31:08 +01:00
|
|
|
/* global flag to enable extra checks when accessing packed objects */
|
|
|
|
extern int do_check_packed_object_crc;
|
|
|
|
|
2005-06-03 17:05:39 +02:00
|
|
|
extern int check_sha1_signature(const unsigned char *sha1, void *buf, unsigned long size, const char *type);
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2006-09-01 09:17:47 +02:00
|
|
|
extern int move_temp_to_file(const char *tmpfile, const char *filename);
|
2005-04-24 03:47:23 +02:00
|
|
|
|
2009-02-28 08:15:53 +01:00
|
|
|
extern int has_sha1_pack(const unsigned char *sha1);
|
2014-02-21 17:32:06 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Return true iff we have an object named sha1, whether local or in
|
|
|
|
* an alternate object database, and whether packed or loose. This
|
|
|
|
* function does not respect replace references.
|
|
|
|
*/
|
2005-04-24 03:47:23 +02:00
|
|
|
extern int has_sha1_file(const unsigned char *sha1);
|
2014-02-21 17:32:06 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Return true iff an alternate object database has a loose object
|
|
|
|
* with the specified name. This function does not respect replace
|
|
|
|
* references.
|
|
|
|
*/
|
2008-11-10 06:59:57 +01:00
|
|
|
extern int has_loose_object_nonlocal(const unsigned char *sha1);
|
2005-04-24 03:47:23 +02:00
|
|
|
|
2005-08-01 02:53:44 +02:00
|
|
|
extern int has_pack_index(const unsigned char *sha1);
|
|
|
|
|
make commit_tree a library function
Until now, this has been part of the commit-tree builtin.
However, it is already used by other builtins (like commit,
merge, and notes), and it would be useful to access it from
library code.
The check_valid helper has to come along, too, but is given
a more library-ish name of "assert_sha1_type".
Otherwise, the code is unchanged. There are still a few
rough edges for a library function, like printing the utf8
warning to stderr, but we can address those if and when they
come up as inappropriate.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-04-02 02:05:23 +02:00
|
|
|
extern void assert_sha1_type(const unsigned char *sha1, enum object_type expect);
|
|
|
|
|
2007-05-30 19:32:19 +02:00
|
|
|
extern const signed char hexval_table[256];
|
|
|
|
static inline unsigned int hexval(unsigned char c)
|
2006-09-21 01:04:46 +02:00
|
|
|
{
|
|
|
|
return hexval_table[c];
|
|
|
|
}
|
|
|
|
|
2005-04-08 00:13:13 +02:00
|
|
|
/* Convert to/from hex/sha1 representation */
|
2010-10-28 20:28:04 +02:00
|
|
|
#define MINIMUM_ABBREV minimum_abbrev
|
|
|
|
#define DEFAULT_ABBREV default_abbrev
|
2006-01-25 10:03:18 +01:00
|
|
|
|
2010-06-09 19:02:06 +02:00
|
|
|
struct object_context {
|
|
|
|
unsigned char tree[20];
|
|
|
|
char path[PATH_MAX];
|
|
|
|
unsigned mode;
|
|
|
|
};
|
|
|
|
|
2012-07-03 08:35:05 +02:00
|
|
|
#define GET_SHA1_QUIETLY 01
|
|
|
|
#define GET_SHA1_COMMIT 02
|
|
|
|
#define GET_SHA1_COMMITTISH 04
|
|
|
|
#define GET_SHA1_TREE 010
|
|
|
|
#define GET_SHA1_TREEISH 020
|
|
|
|
#define GET_SHA1_BLOB 040
|
2012-07-02 19:32:11 +02:00
|
|
|
#define GET_SHA1_ONLY_TO_DIE 04000
|
2012-06-18 20:32:03 +02:00
|
|
|
|
2005-05-02 01:36:56 +02:00
|
|
|
extern int get_sha1(const char *str, unsigned char *sha1);
|
2012-07-03 08:35:05 +02:00
|
|
|
extern int get_sha1_commit(const char *str, unsigned char *sha1);
|
2012-07-02 21:04:52 +02:00
|
|
|
extern int get_sha1_committish(const char *str, unsigned char *sha1);
|
2012-07-03 08:35:05 +02:00
|
|
|
extern int get_sha1_tree(const char *str, unsigned char *sha1);
|
|
|
|
extern int get_sha1_treeish(const char *str, unsigned char *sha1);
|
|
|
|
extern int get_sha1_blob(const char *str, unsigned char *sha1);
|
2012-07-02 20:01:25 +02:00
|
|
|
extern void maybe_die_on_misspelt_object_name(const char *name, const char *prefix);
|
2012-07-02 19:32:11 +02:00
|
|
|
extern int get_sha1_with_context(const char *str, unsigned flags, unsigned char *sha1, struct object_context *orc);
|
2011-09-23 15:38:36 +02:00
|
|
|
|
2012-07-03 23:21:59 +02:00
|
|
|
typedef int each_abbrev_fn(const unsigned char *sha1, void *);
|
|
|
|
extern int for_each_abbrev(const char *prefix, each_abbrev_fn, void *);
|
2011-09-23 15:38:36 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Try to read a SHA1 in hexadecimal format from the 40 characters
|
|
|
|
* starting at hex. Write the 20-byte result to sha1 in binary form.
|
|
|
|
* Return 0 on success. Reading stops if a NUL is encountered in the
|
|
|
|
* input, so it is safe to pass this function an arbitrary
|
|
|
|
* null-terminated string.
|
|
|
|
*/
|
2005-04-09 21:09:27 +02:00
|
|
|
extern int get_sha1_hex(const char *hex, unsigned char *sha1);
|
2015-03-14 00:39:28 +01:00
|
|
|
extern int get_oid_hex(const char *hex, struct object_id *sha1);
|
2011-09-23 15:38:36 +02:00
|
|
|
|
2005-04-09 21:09:27 +02:00
|
|
|
extern char *sha1_to_hex(const unsigned char *sha1); /* static buffer result! */
|
2015-03-14 00:39:28 +01:00
|
|
|
extern char *oid_to_hex(const struct object_id *oid); /* same static buffer as sha1_to_hex */
|
2014-07-15 21:59:36 +02:00
|
|
|
extern int read_ref_full(const char *refname, int resolve_flags,
|
|
|
|
unsigned char *sha1, int *flags);
|
2011-12-12 06:38:09 +01:00
|
|
|
extern int read_ref(const char *refname, unsigned char *sha1);
|
2011-09-15 23:10:42 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Resolve a reference, recursively following symbolic refererences.
|
|
|
|
*
|
|
|
|
* Store the referred-to object's name in sha1 and return the name of
|
|
|
|
* the non-symbolic reference that ultimately pointed at it. The
|
|
|
|
* return value, if not NULL, is a pointer into either a static buffer
|
|
|
|
* or the input ref.
|
|
|
|
*
|
|
|
|
* If the reference cannot be resolved to an object, the behavior
|
2014-07-15 21:59:36 +02:00
|
|
|
* depends on the RESOLVE_REF_READING flag:
|
2011-09-15 23:10:42 +02:00
|
|
|
*
|
2014-07-15 21:59:36 +02:00
|
|
|
* - If RESOLVE_REF_READING is set, return NULL.
|
2011-09-15 23:10:42 +02:00
|
|
|
*
|
2014-07-15 21:59:36 +02:00
|
|
|
* - If RESOLVE_REF_READING is not set, clear sha1 and return the name of
|
|
|
|
* the last reference name in the chain, which will either be a non-symbolic
|
2011-09-15 23:10:42 +02:00
|
|
|
* reference or an undefined reference. If this is a prelude to
|
|
|
|
* "writing" to the ref, the return value is the name of the ref
|
|
|
|
* that will actually be created or changed.
|
|
|
|
*
|
2014-09-11 03:22:48 +02:00
|
|
|
* If the RESOLVE_REF_NO_RECURSE flag is passed, only resolves one
|
|
|
|
* level of symbolic reference. The value stored in sha1 for a symbolic
|
|
|
|
* reference will always be null_sha1 in this case, and the return
|
|
|
|
* value is the reference that the symref refers to directly.
|
|
|
|
*
|
2014-07-15 21:59:36 +02:00
|
|
|
* If flags is non-NULL, set the value that it points to the
|
2011-09-15 23:10:42 +02:00
|
|
|
* combination of REF_ISPACKED (if the reference was found among the
|
2014-07-15 21:59:36 +02:00
|
|
|
* packed references), REF_ISSYMREF (if the initial reference was a
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
* symbolic reference), REF_BAD_NAME (if the reference name is ill
|
|
|
|
* formed --- see RESOLVE_REF_ALLOW_BAD_NAME below), and REF_ISBROKEN
|
|
|
|
* (if the ref is malformed or has a bad name). See refs.h for more detail
|
|
|
|
* on each flag.
|
2011-09-15 23:10:42 +02:00
|
|
|
*
|
|
|
|
* If ref is not a properly-formatted, normalized reference, return
|
|
|
|
* NULL. If more than MAXDEPTH recursive symbolic lookups are needed,
|
|
|
|
* give up and return NULL.
|
|
|
|
*
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
* RESOLVE_REF_ALLOW_BAD_NAME allows resolving refs even when their
|
|
|
|
* name is invalid according to git-check-ref-format(1). If the name
|
|
|
|
* is bad then the value stored in sha1 will be null_sha1 and the two
|
|
|
|
* flags REF_ISBROKEN and REF_BAD_NAME will be set.
|
|
|
|
*
|
|
|
|
* Even with RESOLVE_REF_ALLOW_BAD_NAME, names that escape the refs/
|
|
|
|
* directory and do not consist of all caps and underscores cannot be
|
|
|
|
* resolved. The function returns NULL for such ref names.
|
|
|
|
* Caps and underscores refers to the special refs, such as HEAD,
|
|
|
|
* FETCH_HEAD and friends, that all live outside of the refs/ directory.
|
2011-09-15 23:10:42 +02:00
|
|
|
*/
|
2014-07-15 21:59:36 +02:00
|
|
|
#define RESOLVE_REF_READING 0x01
|
2014-09-11 03:22:48 +02:00
|
|
|
#define RESOLVE_REF_NO_RECURSE 0x02
|
refs.c: allow listing and deleting badly named refs
We currently do not handle badly named refs well:
$ cp .git/refs/heads/master .git/refs/heads/master.....@\*@\\.
$ git branch
fatal: Reference has invalid format: 'refs/heads/master.....@*@\.'
$ git branch -D master.....@\*@\\.
error: branch 'master.....@*@\.' not found.
Users cannot recover from a badly named ref without manually finding
and deleting the loose ref file or appropriate line in packed-refs.
Making that easier will make it easier to tweak the ref naming rules
in the future, for example to forbid shell metacharacters like '`'
and '"', without putting people in a state that is hard to get out of.
So allow "branch --list" to show these refs and allow "branch -d/-D"
and "update-ref -d" to delete them. Other commands (for example to
rename refs) will continue to not handle these refs but can be changed
in later patches.
Details:
In resolving functions, refuse to resolve refs that don't pass the
git-check-ref-format(1) check unless the new RESOLVE_REF_ALLOW_BAD_NAME
flag is passed. Even with RESOLVE_REF_ALLOW_BAD_NAME, refuse to
resolve refs that escape the refs/ directory and do not match the
pattern [A-Z_]* (think "HEAD" and "MERGE_HEAD").
In locking functions, refuse to act on badly named refs unless they
are being deleted and either are in the refs/ directory or match [A-Z_]*.
Just like other invalid refs, flag resolved, badly named refs with the
REF_ISBROKEN flag, treat them as resolving to null_sha1, and skip them
in all iteration functions except for for_each_rawref.
Flag badly named refs (but not symrefs pointing to badly named refs)
with a REF_BAD_NAME flag to make it easier for future callers to
notice and handle them specially. For example, in a later patch
for-each-ref will use this flag to detect refs whose names can confuse
callers parsing for-each-ref output.
In the transaction API, refuse to create or update badly named refs,
but allow deleting them (unless they try to escape refs/ and don't match
[A-Z_]*).
Signed-off-by: Ronnie Sahlberg <sahlberg@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-03 20:45:43 +02:00
|
|
|
#define RESOLVE_REF_ALLOW_BAD_NAME 0x04
|
2014-07-15 21:59:36 +02:00
|
|
|
extern const char *resolve_ref_unsafe(const char *ref, int resolve_flags, unsigned char *sha1, int *flags);
|
|
|
|
extern char *resolve_refdup(const char *ref, int resolve_flags, unsigned char *sha1, int *flags);
|
2011-09-15 23:10:42 +02:00
|
|
|
|
2007-01-19 10:15:15 +01:00
|
|
|
extern int dwim_ref(const char *str, int len, unsigned char *sha1, char **ref);
|
2007-02-09 01:28:23 +01:00
|
|
|
extern int dwim_log(const char *str, int len, unsigned char *sha1, char **ref);
|
2013-09-02 08:34:29 +02:00
|
|
|
extern int interpret_branch_name(const char *str, int len, struct strbuf *);
|
2009-10-18 21:34:56 +02:00
|
|
|
extern int get_sha1_mb(const char *str, unsigned char *sha1);
|
2007-01-19 10:15:15 +01:00
|
|
|
|
2014-01-14 04:16:07 +01:00
|
|
|
/*
|
|
|
|
* Return true iff abbrev_name is a possible abbreviation for
|
|
|
|
* full_name according to the rules defined by ref_rev_parse_rules in
|
|
|
|
* refs.c.
|
|
|
|
*/
|
|
|
|
extern int refname_match(const char *abbrev_name, const char *full_name);
|
add refname_match()
We use at least two rulesets for matching abbreviated refnames with
full refnames (starting with 'refs/'). git-rev-parse and git-fetch
use slightly different rules.
This commit introduces a new function refname_match
(const char *abbrev_name, const char *full_name, const char **rules).
abbrev_name is expanded using the rules and matched against full_name.
If a match is found the function returns true. rules is a NULL-terminate
list of format patterns with "%.*s", for example:
const char *ref_rev_parse_rules[] = {
"%.*s",
"refs/%.*s",
"refs/tags/%.*s",
"refs/heads/%.*s",
"refs/remotes/%.*s",
"refs/remotes/%.*s/HEAD",
NULL
};
Asterisks are included in the format strings because this is the form
required in sha1_name.c. Sharing the list with the functions there is
a good idea to avoid duplicating the rules. Hopefully this
facilitates unified matching rules in the future.
This commit makes the rules used by rev-parse for resolving refs to
sha1s available for string comparison. Before this change, the rules
were buried in get_sha1*() and dwim_ref().
A follow-up commit will refactor the rules used by fetch.
refname_match() will be used for matching refspecs in git-send-pack.
Thanks to Daniel Barkalow <barkalow@iabervon.org> for pointing
out that ref_matches_abbrev in remote.c solves a similar problem
and care should be taken to avoid confusion.
Signed-off-by: Steffen Prohaska <prohaska@zib.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-11 15:01:46 +01:00
|
|
|
|
2007-01-26 23:26:10 +01:00
|
|
|
extern int create_symref(const char *ref, const char *refs_heads_master, const char *logmsg);
|
2007-01-02 08:31:08 +01:00
|
|
|
extern int validate_headref(const char *ref);
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2005-05-20 18:09:18 +02:00
|
|
|
extern int base_name_compare(const char *name1, int len1, int mode1, const char *name2, int len2, int mode2);
|
2008-03-06 03:25:10 +01:00
|
|
|
extern int df_name_compare(const char *name1, int len1, int mode1, const char *name2, int len2, int mode2);
|
2014-06-20 04:06:44 +02:00
|
|
|
extern int name_compare(const char *name1, size_t len1, const char *name2, size_t len2);
|
2012-07-11 11:22:37 +02:00
|
|
|
extern int cache_name_stage_compare(const char *name1, int len1, int stage1, const char *name2, int len2, int stage2);
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2005-04-29 01:42:27 +02:00
|
|
|
extern void *read_object_with_reference(const unsigned char *sha1,
|
2005-05-18 14:14:09 +02:00
|
|
|
const char *required_type,
|
2005-04-29 01:42:27 +02:00
|
|
|
unsigned long *size,
|
|
|
|
unsigned char *sha1_ret);
|
2005-04-21 03:06:49 +02:00
|
|
|
|
2007-12-24 09:51:01 +01:00
|
|
|
extern struct object *peel_to_type(const char *name, int namelen,
|
|
|
|
struct object *o, enum object_type);
|
|
|
|
|
2007-07-14 08:14:52 +02:00
|
|
|
enum date_mode {
|
|
|
|
DATE_NORMAL = 0,
|
|
|
|
DATE_RELATIVE,
|
|
|
|
DATE_SHORT,
|
|
|
|
DATE_LOCAL,
|
|
|
|
DATE_ISO8601,
|
2014-08-29 18:58:42 +02:00
|
|
|
DATE_ISO8601_STRICT,
|
2009-02-20 23:15:22 +01:00
|
|
|
DATE_RFC2822,
|
|
|
|
DATE_RAW
|
2007-07-14 08:14:52 +02:00
|
|
|
};
|
|
|
|
|
2007-02-27 16:21:04 +01:00
|
|
|
const char *show_date(unsigned long time, int timezone, enum date_mode mode);
|
2012-04-23 14:30:23 +02:00
|
|
|
void show_date_relative(unsigned long time, int tz, const struct timeval *now,
|
|
|
|
struct strbuf *timebuf);
|
2014-08-27 09:57:08 +02:00
|
|
|
int parse_date(const char *date, struct strbuf *out);
|
2010-07-15 18:22:57 +02:00
|
|
|
int parse_date_basic(const char *date, unsigned long *timestamp, int *offset);
|
2013-04-18 00:38:08 +02:00
|
|
|
int parse_expiry_date(const char *date, unsigned long *timestamp);
|
2014-08-27 09:57:08 +02:00
|
|
|
void datestamp(struct strbuf *out);
|
2010-01-26 20:58:00 +01:00
|
|
|
#define approxidate(s) approxidate_careful((s), NULL)
|
|
|
|
unsigned long approxidate_careful(const char *, int *);
|
2009-08-31 04:26:05 +02:00
|
|
|
unsigned long approxidate_relative(const char *date, const struct timeval *now);
|
2007-09-28 16:17:31 +02:00
|
|
|
enum date_mode parse_date_format(const char *format);
|
2014-02-24 08:39:45 +01:00
|
|
|
int date_overflows(unsigned long date);
|
2005-04-30 18:46:49 +02:00
|
|
|
|
2012-05-25 01:28:40 +02:00
|
|
|
#define IDENT_STRICT 1
|
2012-05-22 01:10:11 +02:00
|
|
|
#define IDENT_NO_DATE 2
|
ident: let callers omit name with fmt_indent
Most callers want to see all of "$name <$email> $date", but
a few want only limited parts, omitting the date, or even
the name. We already have IDENT_NO_DATE to handle the date
part, but there's not a good option for getting just the
email. Callers have to done one of:
1. Call ident_default_email; this does not respect
environment variables, nor does it promise to trim
whitespace or other crud from the result.
2. Call git_{committer,author}_info; this returns the name
and email, leaving the caller to parse out the wanted
bits.
This patch adds IDENT_NO_NAME; it stops short of adding
IDENT_NO_EMAIL, as no callers want it (nor are likely to),
and it complicates the error handling of the function.
When no name is requested, the angle brackets (<>) around
the email address are also omitted.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-05-25 01:27:24 +02:00
|
|
|
#define IDENT_NO_NAME 4
|
2006-02-19 05:31:05 +01:00
|
|
|
extern const char *git_author_info(int);
|
|
|
|
extern const char *git_committer_info(int);
|
2007-02-05 02:50:14 +01:00
|
|
|
extern const char *fmt_ident(const char *name, const char *email, const char *date_str, int);
|
2007-12-02 22:43:34 +01:00
|
|
|
extern const char *fmt_name(const char *name, const char *email);
|
2014-07-25 21:11:34 +02:00
|
|
|
extern const char *ident_default_name(void);
|
2012-05-22 01:09:43 +02:00
|
|
|
extern const char *ident_default_email(void);
|
2009-11-12 01:01:27 +01:00
|
|
|
extern const char *git_editor(void);
|
2010-02-14 12:59:59 +01:00
|
|
|
extern const char *git_pager(int stdout_is_tty);
|
2012-05-22 01:09:54 +02:00
|
|
|
extern int git_ident_config(const char *, const char *, void *);
|
2005-07-12 20:49:27 +02:00
|
|
|
|
2012-03-11 10:25:43 +01:00
|
|
|
struct ident_split {
|
|
|
|
const char *name_begin;
|
|
|
|
const char *name_end;
|
|
|
|
const char *mail_begin;
|
|
|
|
const char *mail_end;
|
|
|
|
const char *date_begin;
|
|
|
|
const char *date_end;
|
|
|
|
const char *tz_begin;
|
|
|
|
const char *tz_end;
|
|
|
|
};
|
|
|
|
/*
|
|
|
|
* Signals an success with 0, but time part of the result may be NULL
|
|
|
|
* if the input lacks timestamp and zone
|
|
|
|
*/
|
|
|
|
extern int split_ident_line(struct ident_split *, const char *, int);
|
|
|
|
|
2014-05-02 03:07:22 +02:00
|
|
|
/*
|
|
|
|
* Like show_date, but pull the timestamp and tz parameters from
|
|
|
|
* the ident_split. It will also sanity-check the values and produce
|
|
|
|
* a well-known sentinel date if they appear bogus.
|
|
|
|
*/
|
|
|
|
const char *show_ident_date(const struct ident_split *id, enum date_mode mode);
|
|
|
|
|
2013-09-20 12:16:28 +02:00
|
|
|
/*
|
|
|
|
* Compare split idents for equality or strict ordering. Note that we
|
|
|
|
* compare only the ident part of the line, ignoring any timestamp.
|
|
|
|
*
|
|
|
|
* Because there are two fields, we must choose one as the primary key; we
|
|
|
|
* currently arbitrarily pick the email.
|
|
|
|
*/
|
|
|
|
extern int ident_cmp(const struct ident_split *, const struct ident_split *);
|
|
|
|
|
2005-06-06 06:59:54 +02:00
|
|
|
struct checkout {
|
2014-06-13 14:19:34 +02:00
|
|
|
struct index_state *istate;
|
2005-06-06 06:59:54 +02:00
|
|
|
const char *base_dir;
|
|
|
|
int base_dir_len;
|
|
|
|
unsigned force:1,
|
|
|
|
quiet:1,
|
|
|
|
not_new:1,
|
|
|
|
refresh_cache:1;
|
|
|
|
};
|
|
|
|
|
2013-10-23 19:52:42 +02:00
|
|
|
#define TEMPORARY_FILENAME_LENGTH 25
|
2007-04-25 16:18:08 +02:00
|
|
|
extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath);
|
2009-07-09 22:35:31 +02:00
|
|
|
|
|
|
|
struct cache_def {
|
2014-07-05 00:41:46 +02:00
|
|
|
struct strbuf path;
|
2009-07-09 22:35:31 +02:00
|
|
|
int flags;
|
|
|
|
int track_flags;
|
|
|
|
int prefix_len_stat_func;
|
|
|
|
};
|
2014-07-05 00:41:46 +02:00
|
|
|
#define CACHE_DEF_INIT { STRBUF_INIT, 0, 0, 0 }
|
2014-07-12 01:02:34 +02:00
|
|
|
static inline void cache_def_clear(struct cache_def *cache)
|
2014-07-05 00:41:46 +02:00
|
|
|
{
|
|
|
|
strbuf_release(&cache->path);
|
|
|
|
}
|
2009-07-09 22:35:31 +02:00
|
|
|
|
2009-02-09 21:54:06 +01:00
|
|
|
extern int has_symlink_leading_path(const char *name, int len);
|
2009-07-09 22:35:31 +02:00
|
|
|
extern int threaded_has_symlink_leading_path(struct cache_def *, const char *, int);
|
2010-10-09 15:53:00 +02:00
|
|
|
extern int check_leading_path(const char *name, int len);
|
2009-02-09 21:54:06 +01:00
|
|
|
extern int has_dirs_only_path(const char *name, int len, int prefix_len);
|
2009-02-09 21:54:07 +01:00
|
|
|
extern void schedule_dir_for_removal(const char *name, int len);
|
|
|
|
extern void remove_scheduled_dirs(void);
|
2005-06-06 06:59:54 +02:00
|
|
|
|
2005-06-28 23:56:57 +02:00
|
|
|
extern struct alternate_object_database {
|
2005-08-15 02:25:57 +02:00
|
|
|
struct alternate_object_database *next;
|
2005-06-28 23:56:57 +02:00
|
|
|
char *name;
|
2006-01-07 10:33:54 +01:00
|
|
|
char base[FLEX_ARRAY]; /* more */
|
2005-08-15 02:25:57 +02:00
|
|
|
} *alt_odb_list;
|
2005-06-28 23:56:57 +02:00
|
|
|
extern void prepare_alt_odb(void);
|
2012-05-14 18:24:45 +02:00
|
|
|
extern void read_info_alternates(const char * relative_base, int depth);
|
2008-04-18 01:32:30 +02:00
|
|
|
extern void add_to_alternates_file(const char *reference);
|
push: receiver end advertises refs from alternate repositories
Earlier, when pushing into a repository that borrows from alternate object
stores, we followed the longstanding design decision not to trust refs in
the alternate repository that houses the object store we are borrowing
from. If your public repository is borrowing from Linus's public
repository, you pushed into it long time ago, and now when you try to push
your updated history that is in sync with more recent history from Linus,
you will end up sending not just your own development, but also the
changes you acquired through Linus's tree, even though the objects needed
for the latter already exists at the receiving end. This is because the
receiving end does not advertise that the objects only reachable from the
borrowed repository (i.e. Linus's) are already available there.
This solves the issue by making the receiving end advertise refs from
borrowed repositories. They are not sent with their true names but with a
phoney name ".have" to make sure that the old senders will safely ignore
them (otherwise, the old senders will misbehave, trying to push matching
refs, and mirror push that deletes refs that only exist at the receiving
end).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-09-09 10:27:10 +02:00
|
|
|
typedef int alt_odb_fn(struct alternate_object_database *, void *);
|
2014-10-16 00:33:13 +02:00
|
|
|
extern int foreach_alt_odb(alt_odb_fn, void*);
|
2005-06-28 23:56:57 +02:00
|
|
|
|
2006-12-23 08:33:44 +01:00
|
|
|
struct pack_window {
|
|
|
|
struct pack_window *next;
|
|
|
|
unsigned char *base;
|
|
|
|
off_t offset;
|
|
|
|
size_t len;
|
|
|
|
unsigned int last_used;
|
|
|
|
unsigned int inuse_cnt;
|
|
|
|
};
|
|
|
|
|
2005-06-28 23:56:57 +02:00
|
|
|
extern struct packed_git {
|
|
|
|
struct packed_git *next;
|
2006-12-23 08:33:44 +01:00
|
|
|
struct pack_window *windows;
|
2006-12-23 08:33:47 +01:00
|
|
|
off_t pack_size;
|
2007-04-09 07:06:28 +02:00
|
|
|
const void *index_data;
|
|
|
|
size_t index_size;
|
|
|
|
uint32_t num_objects;
|
2008-06-24 03:23:39 +02:00
|
|
|
uint32_t num_bad_objects;
|
|
|
|
unsigned char *bad_object_sha1;
|
2007-03-16 21:42:50 +01:00
|
|
|
int index_version;
|
2007-04-09 07:06:28 +02:00
|
|
|
time_t mtime;
|
2006-12-23 08:34:01 +01:00
|
|
|
int pack_fd;
|
2008-11-12 18:59:03 +01:00
|
|
|
unsigned pack_local:1,
|
2011-03-02 19:01:54 +01:00
|
|
|
pack_keep:1,
|
2015-04-20 21:55:00 +02:00
|
|
|
freshened:1,
|
2011-03-02 19:01:54 +01:00
|
|
|
do_not_close:1;
|
2005-08-01 02:53:44 +02:00
|
|
|
unsigned char sha1[20];
|
2006-01-07 10:33:54 +01:00
|
|
|
/* something like ".git/objects/pack/xxxxx.pack" */
|
|
|
|
char pack_name[FLEX_ARRAY]; /* more */
|
2005-06-28 23:56:57 +02:00
|
|
|
} *packed_git;
|
2005-07-01 02:15:39 +02:00
|
|
|
|
|
|
|
struct pack_entry {
|
2007-03-07 02:44:30 +01:00
|
|
|
off_t offset;
|
2005-07-01 02:15:39 +02:00
|
|
|
unsigned char sha1[20];
|
|
|
|
struct packed_git *p;
|
|
|
|
};
|
|
|
|
|
2010-04-19 16:23:08 +02:00
|
|
|
extern struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path);
|
2005-08-01 02:53:44 +02:00
|
|
|
|
2013-02-15 13:07:10 +01:00
|
|
|
/* A hook for count-objects to report invalid files in pack directory */
|
|
|
|
extern void (*report_garbage)(const char *desc, const char *path);
|
|
|
|
|
2005-06-28 23:56:57 +02:00
|
|
|
extern void prepare_packed_git(void);
|
2006-11-01 23:06:21 +01:00
|
|
|
extern void reprepare_packed_git(void);
|
2005-08-01 02:53:44 +02:00
|
|
|
extern void install_packed_git(struct packed_git *pack);
|
|
|
|
|
2007-06-07 09:04:01 +02:00
|
|
|
extern struct packed_git *find_sha1_pack(const unsigned char *sha1,
|
2005-08-01 02:53:44 +02:00
|
|
|
struct packed_git *packs);
|
|
|
|
|
2007-01-17 07:28:02 +01:00
|
|
|
extern void pack_report(void);
|
2014-02-21 17:32:06 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* mmap the index file for the specified packfile (if it is not
|
|
|
|
* already mmapped). Return 0 on success.
|
|
|
|
*/
|
2007-05-26 07:24:19 +02:00
|
|
|
extern int open_pack_index(struct packed_git *);
|
2014-02-21 17:32:06 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* munmap the index file for the specified packfile (if it is
|
|
|
|
* currently mmapped).
|
|
|
|
*/
|
2010-04-19 16:23:06 +02:00
|
|
|
extern void close_pack_index(struct packed_git *);
|
2014-02-21 17:32:06 +01:00
|
|
|
|
2011-06-10 20:52:15 +02:00
|
|
|
extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *);
|
2008-01-18 04:57:00 +01:00
|
|
|
extern void close_pack_windows(struct packed_git *);
|
2006-12-23 08:34:08 +01:00
|
|
|
extern void unuse_pack(struct pack_window **);
|
2008-12-09 20:26:52 +01:00
|
|
|
extern void free_pack_by_name(const char *);
|
2009-02-10 22:36:12 +01:00
|
|
|
extern void clear_delta_base_cache(void);
|
2007-03-16 21:42:50 +01:00
|
|
|
extern struct packed_git *add_packed_git(const char *, int, int);
|
2014-02-21 17:32:06 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Return the SHA-1 of the nth object within the specified packfile.
|
|
|
|
* Open the index if it is not already open. The return value points
|
|
|
|
* at the SHA-1 within the mmapped index. Return NULL if there is an
|
|
|
|
* error.
|
|
|
|
*/
|
|
|
|
extern const unsigned char *nth_packed_object_sha1(struct packed_git *, uint32_t n);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Return the offset of the nth object within the specified packfile.
|
|
|
|
* The index must already be opened.
|
|
|
|
*/
|
|
|
|
extern off_t nth_packed_object_offset(const struct packed_git *, uint32_t n);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the object named sha1 is present in the specified packfile,
|
|
|
|
* return its offset within the packfile; otherwise, return 0.
|
|
|
|
*/
|
|
|
|
extern off_t find_pack_entry_one(const unsigned char *sha1, struct packed_git *);
|
|
|
|
|
pack-objects: protect against disappearing packs
It's possible that while pack-objects is running, a
simultaneously running prune process might delete a pack
that we are interested in. Because we load the pack indices
early on, we know that the pack contains our item, but by
the time we try to open and map it, it is gone.
Since c715f78, we already protect against this in the normal
object access code path, but pack-objects accesses the packs
at a lower level. In the normal access path, we call
find_pack_entry, which will call find_pack_entry_one on each
pack index, which does the actual lookup. If it gets a hit,
we will actually open and verify the validity of the
matching packfile (using c715f78's is_pack_valid). If we
can't open it, we'll issue a warning and pretend that we
didn't find it, causing us to go on to the next pack (or on
to loose objects).
Furthermore, we will cache the descriptor to the opened
packfile. Which means that later, when we actually try to
access the object, we are likely to still have that packfile
opened, and won't care if it has been unlinked from the
filesystem.
Notice the "likely" above. If there is another pack access
in the interim, and we run out of descriptors, we could
close the pack. And then a later attempt to access the
closed pack could fail (we'll try to re-open it, of course,
but it may have been deleted). In practice, this doesn't
happen because we tend to look up items and then access them
immediately.
Pack-objects does not follow this code path. Instead, it
accesses the packs at a much lower level, using
find_pack_entry_one directly. This means we skip the
is_pack_valid check, and may end up with the name of a
packfile, but no open descriptor.
We can add the same is_pack_valid check here. Unfortunately,
the access patterns of pack-objects are not quite as nice
for keeping lookup and object access together. We look up
each object as we find out about it, and the only later when
writing the packfile do we necessarily access it. Which
means that the opened packfile may be closed in the interim.
In practice, however, adding this check still has value, for
three reasons.
1. If you have a reasonable number of packs and/or a
reasonable file descriptor limit, you can keep all of
your packs open simultaneously. If this is the case,
then the race is impossible to trigger.
2. Even if you can't keep all packs open at once, you
may end up keeping the deleted one open (i.e., you may
get lucky).
3. The race window is shortened. You may notice early that
the pack is gone, and not try to access it. Triggering
the problem without this check means deleting the pack
any time after we read the list of index files, but
before we access the looked-up objects. Triggering it
with this check means deleting the pack means deleting
the pack after we do a lookup (and successfully access
the packfile), but before we access the object. Which
is a smaller window.
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-10-14 20:03:48 +02:00
|
|
|
extern int is_pack_valid(struct packed_git *);
|
2007-03-07 02:44:30 +01:00
|
|
|
extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *);
|
2008-10-30 00:02:46 +01:00
|
|
|
extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
|
2007-04-16 18:31:56 +02:00
|
|
|
extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
|
2011-05-14 00:33:33 +02:00
|
|
|
extern int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *);
|
2005-06-28 23:56:57 +02:00
|
|
|
|
2014-10-16 00:38:55 +02:00
|
|
|
/*
|
|
|
|
* Iterate over the files in the loose-object parts of the object
|
|
|
|
* directory "path", triggering the following callbacks:
|
|
|
|
*
|
|
|
|
* - loose_object is called for each loose object we find.
|
|
|
|
*
|
|
|
|
* - loose_cruft is called for any files that do not appear to be
|
|
|
|
* loose objects. Note that we only look in the loose object
|
|
|
|
* directories "objects/[0-9a-f]{2}/", so we will not report
|
|
|
|
* "objects/foobar" as cruft.
|
|
|
|
*
|
|
|
|
* - loose_subdir is called for each top-level hashed subdirectory
|
|
|
|
* of the object directory (e.g., "$OBJDIR/f0"). It is called
|
|
|
|
* after the objects in the directory are processed.
|
|
|
|
*
|
|
|
|
* Any callback that is NULL will be ignored. Callbacks returning non-zero
|
|
|
|
* will end the iteration.
|
2015-02-09 02:13:22 +01:00
|
|
|
*
|
|
|
|
* In the "buf" variant, "path" is a strbuf which will also be used as a
|
|
|
|
* scratch buffer, but restored to its original contents before
|
|
|
|
* the function returns.
|
2014-10-16 00:38:55 +02:00
|
|
|
*/
|
|
|
|
typedef int each_loose_object_fn(const unsigned char *sha1,
|
|
|
|
const char *path,
|
|
|
|
void *data);
|
|
|
|
typedef int each_loose_cruft_fn(const char *basename,
|
|
|
|
const char *path,
|
|
|
|
void *data);
|
|
|
|
typedef int each_loose_subdir_fn(int nr,
|
|
|
|
const char *path,
|
|
|
|
void *data);
|
|
|
|
int for_each_loose_file_in_objdir(const char *path,
|
|
|
|
each_loose_object_fn obj_cb,
|
|
|
|
each_loose_cruft_fn cruft_cb,
|
|
|
|
each_loose_subdir_fn subdir_cb,
|
|
|
|
void *data);
|
2015-02-09 02:13:22 +01:00
|
|
|
int for_each_loose_file_in_objdir_buf(struct strbuf *path,
|
|
|
|
each_loose_object_fn obj_cb,
|
|
|
|
each_loose_cruft_fn cruft_cb,
|
|
|
|
each_loose_subdir_fn subdir_cb,
|
|
|
|
void *data);
|
2014-10-16 00:38:55 +02:00
|
|
|
|
2014-10-16 00:41:21 +02:00
|
|
|
/*
|
|
|
|
* Iterate over loose and packed objects in both the local
|
reachable: only mark local objects as recent
When pruning and repacking a repository that has an
alternate object store configured, we may traverse a large
number of objects in the alternate. This serves no purpose,
and may be expensive to do. A longer explanation is below.
Commits d3038d2 and abcb865 taught prune and pack-objects
(respectively) to treat "recent" objects as tips for
reachability, so that we keep whole chunks of history. They
built on the object traversal in 660c889 (sha1_file: add
for_each iterators for loose and packed objects,
2014-10-15), which covers both local and alternate objects.
In both cases, covering alternate objects is unnecessary, as
both commands can only drop objects from the local
repository. In the case of prune, we traverse only the local
object directory. And in the case of repacking, while we may
or may not include local objects in our pack, we will never
reach into the alternate with "repack -d". The "-l" option
is only a question of whether we are migrating objects from
the alternate into our repository, or leaving them
untouched.
It is possible that we may drop an object that is depended
upon by another object in the alternate. For example,
imagine two repositories, A and B, with A pointing to B as
an alternate. Now imagine a commit that is in B which
references a tree that is only in A. Traversing from recent
objects in B might prevent A from dropping that tree. But
this case isn't worth covering. Repo B should take
responsibility for its own objects. It would never have had
the commit in the first place if it did not also have the
tree, and assuming it is using the same "keep recent chunks
of history" scheme, then it would itself keep the tree, as
well.
So checking the alternate objects is not worth doing, and
come with a significant performance impact. In both cases,
we skip any recent objects that have already been marked
SEEN (i.e., that we know are already reachable for prune, or
included in the pack for a repack). So there is a slight
waste of time in opening the alternate packs at all, only to
notice that we have already considered each object. But much
worse, the alternate repository may have a large number of
objects that are not reachable from the local repository at
all, and we end up adding them to the traversal.
We can fix this by considering only local unseen objects.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-03-27 12:32:41 +01:00
|
|
|
* repository and any alternates repositories (unless the
|
|
|
|
* LOCAL_ONLY flag is set).
|
2014-10-16 00:41:21 +02:00
|
|
|
*/
|
reachable: only mark local objects as recent
When pruning and repacking a repository that has an
alternate object store configured, we may traverse a large
number of objects in the alternate. This serves no purpose,
and may be expensive to do. A longer explanation is below.
Commits d3038d2 and abcb865 taught prune and pack-objects
(respectively) to treat "recent" objects as tips for
reachability, so that we keep whole chunks of history. They
built on the object traversal in 660c889 (sha1_file: add
for_each iterators for loose and packed objects,
2014-10-15), which covers both local and alternate objects.
In both cases, covering alternate objects is unnecessary, as
both commands can only drop objects from the local
repository. In the case of prune, we traverse only the local
object directory. And in the case of repacking, while we may
or may not include local objects in our pack, we will never
reach into the alternate with "repack -d". The "-l" option
is only a question of whether we are migrating objects from
the alternate into our repository, or leaving them
untouched.
It is possible that we may drop an object that is depended
upon by another object in the alternate. For example,
imagine two repositories, A and B, with A pointing to B as
an alternate. Now imagine a commit that is in B which
references a tree that is only in A. Traversing from recent
objects in B might prevent A from dropping that tree. But
this case isn't worth covering. Repo B should take
responsibility for its own objects. It would never have had
the commit in the first place if it did not also have the
tree, and assuming it is using the same "keep recent chunks
of history" scheme, then it would itself keep the tree, as
well.
So checking the alternate objects is not worth doing, and
come with a significant performance impact. In both cases,
we skip any recent objects that have already been marked
SEEN (i.e., that we know are already reachable for prune, or
included in the pack for a repack). So there is a slight
waste of time in opening the alternate packs at all, only to
notice that we have already considered each object. But much
worse, the alternate repository may have a large number of
objects that are not reachable from the local repository at
all, and we end up adding them to the traversal.
We can fix this by considering only local unseen objects.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-03-27 12:32:41 +01:00
|
|
|
#define FOR_EACH_OBJECT_LOCAL_ONLY 0x1
|
2014-10-16 00:41:21 +02:00
|
|
|
typedef int each_packed_object_fn(const unsigned char *sha1,
|
|
|
|
struct packed_git *pack,
|
|
|
|
uint32_t pos,
|
|
|
|
void *data);
|
reachable: only mark local objects as recent
When pruning and repacking a repository that has an
alternate object store configured, we may traverse a large
number of objects in the alternate. This serves no purpose,
and may be expensive to do. A longer explanation is below.
Commits d3038d2 and abcb865 taught prune and pack-objects
(respectively) to treat "recent" objects as tips for
reachability, so that we keep whole chunks of history. They
built on the object traversal in 660c889 (sha1_file: add
for_each iterators for loose and packed objects,
2014-10-15), which covers both local and alternate objects.
In both cases, covering alternate objects is unnecessary, as
both commands can only drop objects from the local
repository. In the case of prune, we traverse only the local
object directory. And in the case of repacking, while we may
or may not include local objects in our pack, we will never
reach into the alternate with "repack -d". The "-l" option
is only a question of whether we are migrating objects from
the alternate into our repository, or leaving them
untouched.
It is possible that we may drop an object that is depended
upon by another object in the alternate. For example,
imagine two repositories, A and B, with A pointing to B as
an alternate. Now imagine a commit that is in B which
references a tree that is only in A. Traversing from recent
objects in B might prevent A from dropping that tree. But
this case isn't worth covering. Repo B should take
responsibility for its own objects. It would never have had
the commit in the first place if it did not also have the
tree, and assuming it is using the same "keep recent chunks
of history" scheme, then it would itself keep the tree, as
well.
So checking the alternate objects is not worth doing, and
come with a significant performance impact. In both cases,
we skip any recent objects that have already been marked
SEEN (i.e., that we know are already reachable for prune, or
included in the pack for a repack). So there is a slight
waste of time in opening the alternate packs at all, only to
notice that we have already considered each object. But much
worse, the alternate repository may have a large number of
objects that are not reachable from the local repository at
all, and we end up adding them to the traversal.
We can fix this by considering only local unseen objects.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-03-27 12:32:41 +01:00
|
|
|
extern int for_each_loose_object(each_loose_object_fn, void *, unsigned flags);
|
|
|
|
extern int for_each_packed_object(each_packed_object_fn, void *, unsigned flags);
|
2014-10-16 00:41:21 +02:00
|
|
|
|
2011-05-13 00:51:38 +02:00
|
|
|
struct object_info {
|
|
|
|
/* Request */
|
2013-07-12 08:34:57 +02:00
|
|
|
enum object_type *typep;
|
2011-05-13 00:51:38 +02:00
|
|
|
unsigned long *sizep;
|
2013-07-07 12:04:00 +02:00
|
|
|
unsigned long *disk_sizep;
|
2013-12-21 15:24:20 +01:00
|
|
|
unsigned char *delta_base_sha1;
|
2011-05-13 00:51:38 +02:00
|
|
|
|
|
|
|
/* Response */
|
|
|
|
enum {
|
|
|
|
OI_CACHED,
|
|
|
|
OI_LOOSE,
|
2011-05-13 22:20:43 +02:00
|
|
|
OI_PACKED,
|
|
|
|
OI_DBCACHED
|
2011-05-13 00:51:38 +02:00
|
|
|
} whence;
|
|
|
|
union {
|
|
|
|
/*
|
|
|
|
* struct {
|
|
|
|
* ... Nothing to expose in this case
|
|
|
|
* } cached;
|
|
|
|
* struct {
|
|
|
|
* ... Nothing to expose in this case
|
|
|
|
* } loose;
|
|
|
|
*/
|
|
|
|
struct {
|
|
|
|
struct packed_git *pack;
|
|
|
|
off_t offset;
|
|
|
|
unsigned int is_delta;
|
|
|
|
} packed;
|
|
|
|
} u;
|
|
|
|
};
|
2013-12-11 08:46:07 +01:00
|
|
|
extern int sha1_object_info_extended(const unsigned char *, struct object_info *, unsigned flags);
|
2005-06-28 23:56:57 +02:00
|
|
|
|
[PATCH] Add update-server-info.
The git-update-server-info command prepares informational files
to help clients discover the contents of a repository, and pull
from it via a dumb transport protocols. Currently, the
following files are produced.
- The $repo/info/refs file lists the name of heads and tags
available in the $repo/refs/ directory, along with their
SHA1. This can be used by git-ls-remote command running on
the client side.
- The $repo/info/rev-cache file describes the commit ancestry
reachable from references in the $repo/refs/ directory. This
file is in an append-only binary format to make the server
side friendly to rsync mirroring scheme, and can be read by
git-show-rev-cache command.
- The $repo/objects/info/pack file lists the name of the packs
available, the interdependencies among them, and the head
commits and tags contained in them. Along with the other two
files, this is designed to help clients to make smart pull
decisions.
The git-receive-pack command is changed to invoke it at the end,
so just after a push to a public repository finishes via "git
push", the server info is automatically updated.
In addition, building of the rev-cache file can be done by a
standalone git-build-rev-cache command separately.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-07-24 02:54:41 +02:00
|
|
|
/* Dumb servers support */
|
|
|
|
extern int update_server_info(int);
|
|
|
|
|
2011-05-17 17:38:52 +02:00
|
|
|
/* git_config_parse_key() returns these negated: */
|
|
|
|
#define CONFIG_INVALID_KEY 1
|
|
|
|
#define CONFIG_NO_SECTION_OR_NAME 2
|
|
|
|
/* git_config_set(), git_config_set_multivar() return the above or these: */
|
|
|
|
#define CONFIG_NO_LOCK -1
|
|
|
|
#define CONFIG_INVALID_FILE 3
|
|
|
|
#define CONFIG_NO_WRITE 4
|
|
|
|
#define CONFIG_NOTHING_SET 5
|
|
|
|
#define CONFIG_INVALID_PATTERN 6
|
2012-07-29 22:43:21 +02:00
|
|
|
#define CONFIG_GENERIC_ERROR 7
|
2011-05-17 17:38:52 +02:00
|
|
|
|
2014-08-19 08:20:00 +02:00
|
|
|
#define CONFIG_REGEX_NONE ((void *)1)
|
|
|
|
|
2014-02-18 23:58:54 +01:00
|
|
|
struct git_config_source {
|
2014-02-18 23:58:55 +01:00
|
|
|
unsigned int use_stdin:1;
|
2014-02-18 23:58:54 +01:00
|
|
|
const char *file;
|
|
|
|
const char *blob;
|
|
|
|
};
|
|
|
|
|
2008-05-14 19:46:53 +02:00
|
|
|
typedef int (*config_fn_t)(const char *, const char *, void *);
|
|
|
|
extern int git_default_config(const char *, const char *, void *);
|
|
|
|
extern int git_config_from_file(config_fn_t fn, const char *, void *);
|
2013-07-12 00:46:47 +02:00
|
|
|
extern int git_config_from_buf(config_fn_t fn, const char *name,
|
|
|
|
const char *buf, size_t len, void *data);
|
2010-08-23 21:16:00 +02:00
|
|
|
extern void git_config_push_parameter(const char *text);
|
2010-05-21 12:07:47 +02:00
|
|
|
extern int git_config_from_parameters(config_fn_t fn, void *data);
|
2014-08-07 13:59:15 +02:00
|
|
|
extern void git_config(config_fn_t fn, void *);
|
config: add include directive
It can be useful to split your ~/.gitconfig across multiple
files. For example, you might have a "main" file which is
used on many machines, but a small set of per-machine
tweaks. Or you may want to make some of your config public
(e.g., clever aliases) while keeping other data back (e.g.,
your name or other identifying information). Or you may want
to include a number of config options in some subset of your
repos without copying and pasting (e.g., you want to
reference them from the .git/config of participating repos).
This patch introduces an include directive for config files.
It looks like:
[include]
path = /path/to/file
This is syntactically backwards-compatible with existing git
config parsers (i.e., they will see it as another config
entry and ignore it unless you are looking up include.path).
The implementation provides a "git_config_include" callback
which wraps regular config callbacks. Callers can pass it to
git_config_from_file, and it will transparently follow any
include directives, passing all of the discovered options to
the real callback.
Include directives are turned on automatically for "regular"
git config parsing. This includes calls to git_config, as
well as calls to the "git config" program that do not
specify a single file (e.g., using "-f", "--global", etc).
They are not turned on in other cases, including:
1. Parsing of other config-like files, like .gitmodules.
There isn't a real need, and I'd rather be conservative
and avoid unnecessary incompatibility or confusion.
2. Reading single files via "git config". This is for two
reasons:
a. backwards compatibility with scripts looking at
config-like files.
b. inspection of a specific file probably means you
care about just what's in that file, not a general
lookup for "do we have this value anywhere at
all". If that is not the case, the caller can
always specify "--includes".
3. Writing files via "git config"; we want to treat
include.* variables as literal items to be copied (or
modified), and not expand them. So "git config
--unset-all foo.bar" would operate _only_ on
.git/config, not any of its included files (just as it
also does not operate on ~/.gitconfig).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-02-06 10:54:04 +01:00
|
|
|
extern int git_config_with_options(config_fn_t fn, void *,
|
2014-02-18 23:58:54 +01:00
|
|
|
struct git_config_source *config_source,
|
2013-07-12 00:46:47 +02:00
|
|
|
int respect_includes);
|
2010-11-26 16:32:33 +01:00
|
|
|
extern int git_config_early(config_fn_t fn, void *, const char *repo_config);
|
2007-07-12 15:32:26 +02:00
|
|
|
extern int git_parse_ulong(const char *, unsigned long *);
|
2005-10-11 01:31:08 +02:00
|
|
|
extern int git_config_int(const char *, const char *);
|
git-config: always treat --int as 64-bit internally
When you run "git config --int", the maximum size of integer
you get depends on how git was compiled, and what it
considers to be an "int".
This is almost useful, because your scripts calling "git
config" will behave similarly to git internally. But relying
on this is dubious; you have to actually know how git treats
each value internally (e.g., int versus unsigned long),
which is not documented and is subject to change. And even
if you know it is "unsigned long", we do not have a
git-config option to match that behavior.
Furthermore, you may simply be asking git to store a value
on your behalf (e.g., configuration for a hook). In that
case, the relevant range check has nothing at all to do with
git, but rather with whatever scripting tools you are using
(and git has no way of knowing what the appropriate range is
there).
Not only is the range check useless, but it is actively
harmful, as there is no way at all for scripts to look
at config variables with large values. For instance, one
cannot reliably get the value of pack.packSizeLimit via
git-config. On an LP64 system, git happily uses a 64-bit
"unsigned long" internally to represent the value, but the
script cannot read any value over 2G.
Ideally, the "--int" option would simply represent an
arbitrarily large integer. For practical purposes, however,
a 64-bit integer is large enough, and is much easier to
implement (and if somebody overflows it, we will still
notice the problem, and not simply return garbage).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-09-08 10:40:02 +02:00
|
|
|
extern int64_t git_config_int64(const char *, const char *);
|
2007-07-12 15:32:26 +02:00
|
|
|
extern unsigned long git_config_ulong(const char *, const char *);
|
2008-04-13 03:33:31 +02:00
|
|
|
extern int git_config_bool_or_int(const char *, const char *, int *);
|
2005-10-11 01:31:08 +02:00
|
|
|
extern int git_config_bool(const char *, const char *);
|
2010-02-17 08:59:46 +01:00
|
|
|
extern int git_config_maybe_bool(const char *, const char *);
|
2008-02-16 06:00:24 +01:00
|
|
|
extern int git_config_string(const char **, const char *, const char *);
|
2009-11-17 18:24:25 +01:00
|
|
|
extern int git_config_pathname(const char **, const char *, const char *);
|
2011-08-04 12:39:00 +02:00
|
|
|
extern int git_config_set_in_file(const char *, const char *, const char *);
|
2005-11-17 22:32:36 +01:00
|
|
|
extern int git_config_set(const char *, const char *);
|
2011-01-30 20:40:41 +01:00
|
|
|
extern int git_config_parse_key(const char *, char **, int *);
|
2005-11-20 06:52:22 +01:00
|
|
|
extern int git_config_set_multivar(const char *, const char *, const char *, int);
|
2011-08-04 12:39:00 +02:00
|
|
|
extern int git_config_set_multivar_in_file(const char *, const char *, const char *, const char *, int);
|
2006-12-16 15:14:14 +01:00
|
|
|
extern int git_config_rename_section(const char *, const char *);
|
2012-02-16 09:04:25 +01:00
|
|
|
extern int git_config_rename_section_in_file(const char *, const char *, const char *);
|
2007-11-13 21:05:05 +01:00
|
|
|
extern const char *git_etc_gitconfig(void);
|
2008-05-14 19:46:53 +02:00
|
|
|
extern int check_repository_format_version(const char *var, const char *value, void *cb);
|
2010-03-17 20:55:51 +01:00
|
|
|
extern int git_env_bool(const char *, int);
|
2014-08-26 17:23:21 +02:00
|
|
|
extern unsigned long git_env_ulong(const char *, unsigned long);
|
2008-02-06 11:11:18 +01:00
|
|
|
extern int git_config_system(void);
|
2008-02-11 19:41:18 +01:00
|
|
|
extern int config_error_nonbool(const char *);
|
2014-05-06 17:17:50 +02:00
|
|
|
#if defined(__GNUC__)
|
2014-05-06 17:14:42 +02:00
|
|
|
#define config_error_nonbool(s) (config_error_nonbool(s), const_error())
|
2012-12-15 18:42:10 +01:00
|
|
|
#endif
|
2010-11-02 20:59:07 +01:00
|
|
|
extern const char *get_log_output_encoding(void);
|
|
|
|
extern const char *get_commit_output_encoding(void);
|
|
|
|
|
2011-06-09 17:56:42 +02:00
|
|
|
extern int git_config_parse_parameter(const char *, config_fn_t fn, void *data);
|
|
|
|
|
config: add include directive
It can be useful to split your ~/.gitconfig across multiple
files. For example, you might have a "main" file which is
used on many machines, but a small set of per-machine
tweaks. Or you may want to make some of your config public
(e.g., clever aliases) while keeping other data back (e.g.,
your name or other identifying information). Or you may want
to include a number of config options in some subset of your
repos without copying and pasting (e.g., you want to
reference them from the .git/config of participating repos).
This patch introduces an include directive for config files.
It looks like:
[include]
path = /path/to/file
This is syntactically backwards-compatible with existing git
config parsers (i.e., they will see it as another config
entry and ignore it unless you are looking up include.path).
The implementation provides a "git_config_include" callback
which wraps regular config callbacks. Callers can pass it to
git_config_from_file, and it will transparently follow any
include directives, passing all of the discovered options to
the real callback.
Include directives are turned on automatically for "regular"
git config parsing. This includes calls to git_config, as
well as calls to the "git config" program that do not
specify a single file (e.g., using "-f", "--global", etc).
They are not turned on in other cases, including:
1. Parsing of other config-like files, like .gitmodules.
There isn't a real need, and I'd rather be conservative
and avoid unnecessary incompatibility or confusion.
2. Reading single files via "git config". This is for two
reasons:
a. backwards compatibility with scripts looking at
config-like files.
b. inspection of a specific file probably means you
care about just what's in that file, not a general
lookup for "do we have this value anywhere at
all". If that is not the case, the caller can
always specify "--includes".
3. Writing files via "git config"; we want to treat
include.* variables as literal items to be copied (or
modified), and not expand them. So "git config
--unset-all foo.bar" would operate _only_ on
.git/config, not any of its included files (just as it
also does not operate on ~/.gitconfig).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-02-06 10:54:04 +01:00
|
|
|
struct config_include_data {
|
|
|
|
int depth;
|
|
|
|
config_fn_t fn;
|
|
|
|
void *data;
|
|
|
|
};
|
|
|
|
#define CONFIG_INCLUDE_INIT { 0 }
|
|
|
|
extern int git_config_include(const char *name, const char *value, void *data);
|
2005-10-11 01:31:08 +02:00
|
|
|
|
2013-01-23 07:23:05 +01:00
|
|
|
/*
|
|
|
|
* Match and parse a config key of the form:
|
|
|
|
*
|
|
|
|
* section.(subsection.)?key
|
|
|
|
*
|
|
|
|
* (i.e., what gets handed to a config_fn_t). The caller provides the section;
|
|
|
|
* we return -1 if it does not match, 0 otherwise. The subsection and key
|
|
|
|
* out-parameters are filled by the function (and subsection is NULL if it is
|
|
|
|
* missing).
|
|
|
|
*/
|
|
|
|
extern int parse_config_key(const char *var,
|
|
|
|
const char *section,
|
|
|
|
const char **subsection, int *subsection_len,
|
|
|
|
const char **key);
|
|
|
|
|
2014-08-07 13:59:17 +02:00
|
|
|
struct config_set_element {
|
|
|
|
struct hashmap_entry ent;
|
|
|
|
char *key;
|
|
|
|
struct string_list value_list;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct configset_list_item {
|
|
|
|
struct config_set_element *e;
|
|
|
|
int value_index;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* the contents of the list are ordered according to their
|
|
|
|
* position in the config files and order of parsing the files.
|
|
|
|
* (i.e. key-value pair at the last position of .git/config will
|
|
|
|
* be at the last item of the list)
|
|
|
|
*/
|
|
|
|
struct configset_list {
|
|
|
|
struct configset_list_item *items;
|
|
|
|
unsigned int nr, alloc;
|
|
|
|
};
|
|
|
|
|
2014-07-28 12:10:38 +02:00
|
|
|
struct config_set {
|
|
|
|
struct hashmap config_hash;
|
|
|
|
int hash_initialized;
|
2014-08-07 13:59:17 +02:00
|
|
|
struct configset_list list;
|
2014-07-28 12:10:38 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
extern void git_configset_init(struct config_set *cs);
|
|
|
|
extern int git_configset_add_file(struct config_set *cs, const char *filename);
|
|
|
|
extern int git_configset_get_value(struct config_set *cs, const char *key, const char **value);
|
|
|
|
extern const struct string_list *git_configset_get_value_multi(struct config_set *cs, const char *key);
|
|
|
|
extern void git_configset_clear(struct config_set *cs);
|
|
|
|
extern int git_configset_get_string_const(struct config_set *cs, const char *key, const char **dest);
|
|
|
|
extern int git_configset_get_string(struct config_set *cs, const char *key, char **dest);
|
|
|
|
extern int git_configset_get_int(struct config_set *cs, const char *key, int *dest);
|
|
|
|
extern int git_configset_get_ulong(struct config_set *cs, const char *key, unsigned long *dest);
|
|
|
|
extern int git_configset_get_bool(struct config_set *cs, const char *key, int *dest);
|
|
|
|
extern int git_configset_get_bool_or_int(struct config_set *cs, const char *key, int *is_bool, int *dest);
|
|
|
|
extern int git_configset_get_maybe_bool(struct config_set *cs, const char *key, int *dest);
|
|
|
|
extern int git_configset_get_pathname(struct config_set *cs, const char *key, const char **dest);
|
|
|
|
|
|
|
|
extern int git_config_get_value(const char *key, const char **value);
|
|
|
|
extern const struct string_list *git_config_get_value_multi(const char *key);
|
|
|
|
extern void git_config_clear(void);
|
|
|
|
extern void git_config_iter(config_fn_t fn, void *data);
|
|
|
|
extern int git_config_get_string_const(const char *key, const char **dest);
|
|
|
|
extern int git_config_get_string(const char *key, char **dest);
|
|
|
|
extern int git_config_get_int(const char *key, int *dest);
|
|
|
|
extern int git_config_get_ulong(const char *key, unsigned long *dest);
|
|
|
|
extern int git_config_get_bool(const char *key, int *dest);
|
|
|
|
extern int git_config_get_bool_or_int(const char *key, int *is_bool, int *dest);
|
|
|
|
extern int git_config_get_maybe_bool(const char *key, int *dest);
|
|
|
|
extern int git_config_get_pathname(const char *key, const char **dest);
|
|
|
|
|
2014-08-07 13:59:14 +02:00
|
|
|
struct key_value_info {
|
|
|
|
const char *filename;
|
|
|
|
int linenr;
|
|
|
|
};
|
|
|
|
|
2014-08-07 13:59:16 +02:00
|
|
|
extern NORETURN void git_die_config(const char *key, const char *err, ...) __attribute__((format(printf, 2, 3)));
|
|
|
|
extern NORETURN void git_die_config_linenr(const char *key, const char *filename, int linenr);
|
|
|
|
|
ident: keep separate "explicit" flags for author and committer
We keep track of whether the user ident was given to us
explicitly, or if we guessed at it from system parameters
like username and hostname. However, we kept only a single
variable. This covers the common cases (because the author
and committer will usually come from the same explicit
source), but can miss two cases:
1. GIT_COMMITTER_* is set explicitly, but we fallback for
GIT_AUTHOR. We claim the ident is explicit, even though
the author is not.
2. GIT_AUTHOR_* is set and we ask for author ident, but
not committer ident. We will claim the ident is
implicit, even though it is explicit.
This patch uses two variables instead of one, updates both
when we set the "fallback" values, and updates them
individually when we read from the environment.
Rather than keep user_ident_sufficiently_given as a
compatibility wrapper, we update the only two callers to
check the committer_ident, which matches their intent and
what was happening already.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-11-15 01:34:13 +01:00
|
|
|
extern int committer_ident_sufficiently_given(void);
|
|
|
|
extern int author_ident_sufficiently_given(void);
|
2005-10-12 03:47:34 +02:00
|
|
|
|
2007-03-12 20:33:18 +01:00
|
|
|
extern const char *git_commit_encoding;
|
2007-03-07 02:44:17 +01:00
|
|
|
extern const char *git_log_output_encoding;
|
2009-02-08 15:34:27 +01:00
|
|
|
extern const char *git_mailmap_file;
|
2012-12-12 12:04:04 +01:00
|
|
|
extern const char *git_mailmap_blob;
|
2005-11-28 01:09:40 +01:00
|
|
|
|
2007-06-29 19:40:46 +02:00
|
|
|
/* IO helper functions */
|
|
|
|
extern void maybe_flush_or_die(FILE *, const char *);
|
2014-09-10 12:03:52 +02:00
|
|
|
__attribute__((format (printf, 2, 3)))
|
|
|
|
extern void fprintf_or_die(FILE *, const char *fmt, ...);
|
2005-10-22 10:28:13 +02:00
|
|
|
extern int copy_fd(int ifd, int ofd);
|
2008-02-25 20:24:48 +01:00
|
|
|
extern int copy_file(const char *dst, const char *src, int mode);
|
2009-09-12 11:03:48 +02:00
|
|
|
extern int copy_file_with_time(const char *dst, const char *src, int mode);
|
2006-08-21 20:43:43 +02:00
|
|
|
extern void write_or_die(int fd, const void *buf, size_t count);
|
2006-09-02 18:23:48 +02:00
|
|
|
extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg);
|
2007-01-08 16:57:52 +01:00
|
|
|
extern int write_or_whine_pipe(int fd, const void *buf, size_t count, const char *msg);
|
2008-05-30 17:42:16 +02:00
|
|
|
extern void fsync_or_die(int fd, const char *);
|
2005-12-15 07:17:38 +01:00
|
|
|
|
use write_str_in_full helper to avoid literal string lengths
In 2d14d65 (Use a clearer style to issue commands to remote helpers,
2009-09-03) I happened to notice two changes like this:
- write_in_full(helper->in, "list\n", 5);
+
+ strbuf_addstr(&buf, "list\n");
+ write_in_full(helper->in, buf.buf, buf.len);
+ strbuf_reset(&buf);
IMHO, it would be better to define a new function,
static inline ssize_t write_str_in_full(int fd, const char *str)
{
return write_in_full(fd, str, strlen(str));
}
and then use it like this:
- strbuf_addstr(&buf, "list\n");
- write_in_full(helper->in, buf.buf, buf.len);
- strbuf_reset(&buf);
+ write_str_in_full(helper->in, "list\n");
Thus not requiring the added allocation, and still avoiding
the maintenance risk of literal string lengths.
These days, compilers are good enough that strlen("literal")
imposes no run-time cost.
Transformed via this:
perl -pi -e \
's/write_in_full\((.*?), (".*?"), \d+\)/write_str_in_full($1, $2)/'\
$(git grep -l 'write_in_full.*"')
Signed-off-by: Jim Meyering <meyering@redhat.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-12 10:54:32 +02:00
|
|
|
extern ssize_t read_in_full(int fd, void *buf, size_t count);
|
|
|
|
extern ssize_t write_in_full(int fd, const void *buf, size_t count);
|
2014-04-10 20:31:21 +02:00
|
|
|
extern ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset);
|
|
|
|
|
use write_str_in_full helper to avoid literal string lengths
In 2d14d65 (Use a clearer style to issue commands to remote helpers,
2009-09-03) I happened to notice two changes like this:
- write_in_full(helper->in, "list\n", 5);
+
+ strbuf_addstr(&buf, "list\n");
+ write_in_full(helper->in, buf.buf, buf.len);
+ strbuf_reset(&buf);
IMHO, it would be better to define a new function,
static inline ssize_t write_str_in_full(int fd, const char *str)
{
return write_in_full(fd, str, strlen(str));
}
and then use it like this:
- strbuf_addstr(&buf, "list\n");
- write_in_full(helper->in, buf.buf, buf.len);
- strbuf_reset(&buf);
+ write_str_in_full(helper->in, "list\n");
Thus not requiring the added allocation, and still avoiding
the maintenance risk of literal string lengths.
These days, compilers are good enough that strlen("literal")
imposes no run-time cost.
Transformed via this:
perl -pi -e \
's/write_in_full\((.*?), (".*?"), \d+\)/write_str_in_full($1, $2)/'\
$(git grep -l 'write_in_full.*"')
Signed-off-by: Jim Meyering <meyering@redhat.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-12 10:54:32 +02:00
|
|
|
static inline ssize_t write_str_in_full(int fd, const char *str)
|
|
|
|
{
|
|
|
|
return write_in_full(fd, str, strlen(str));
|
|
|
|
}
|
2014-11-30 09:24:45 +01:00
|
|
|
__attribute__((format (printf, 3, 4)))
|
|
|
|
extern int write_file(const char *path, int fatal, const char *fmt, ...);
|
use write_str_in_full helper to avoid literal string lengths
In 2d14d65 (Use a clearer style to issue commands to remote helpers,
2009-09-03) I happened to notice two changes like this:
- write_in_full(helper->in, "list\n", 5);
+
+ strbuf_addstr(&buf, "list\n");
+ write_in_full(helper->in, buf.buf, buf.len);
+ strbuf_reset(&buf);
IMHO, it would be better to define a new function,
static inline ssize_t write_str_in_full(int fd, const char *str)
{
return write_in_full(fd, str, strlen(str));
}
and then use it like this:
- strbuf_addstr(&buf, "list\n");
- write_in_full(helper->in, buf.buf, buf.len);
- strbuf_reset(&buf);
+ write_str_in_full(helper->in, "list\n");
Thus not requiring the added allocation, and still avoiding
the maintenance risk of literal string lengths.
These days, compilers are good enough that strlen("literal")
imposes no run-time cost.
Transformed via this:
perl -pi -e \
's/write_in_full\((.*?), (".*?"), \d+\)/write_str_in_full($1, $2)/'\
$(git grep -l 'write_in_full.*"')
Signed-off-by: Jim Meyering <meyering@redhat.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-12 10:54:32 +02:00
|
|
|
|
2006-02-28 20:26:21 +01:00
|
|
|
/* pager.c */
|
|
|
|
extern void setup_pager(void);
|
2008-02-16 06:01:11 +01:00
|
|
|
extern const char *pager_program;
|
2007-12-11 07:27:33 +01:00
|
|
|
extern int pager_in_use(void);
|
2006-07-30 00:27:43 +02:00
|
|
|
extern int pager_use_color;
|
2012-02-12 15:12:32 +01:00
|
|
|
extern int term_columns(void);
|
decimal_width: avoid integer overflow
The decimal_width function originally appeared in blame.c as
"lineno_width", and was designed for calculating the
print-width of small-ish integer values (line numbers in
text files). In ec7ff5b, it was made into a reusable
function, and in dc801e7, we started using it to align
diffstats.
Binary files in a diffstat show byte counts rather than line
numbers, meaning they can be quite large (e.g., consider
adding or removing a 2GB file). decimal_width is not up to
the challenge for two reasons:
1. It takes the value as an "int", whereas large files may
easily surpass this. The value may be truncated, in
which case we will produce an incorrect value.
2. It counts "up" by repeatedly multiplying another
integer by 10 until it surpasses the value. This can
cause an infinite loop when the value is close to the
largest representable integer.
For example, consider using a 32-bit signed integer,
and a value of 2,140,000,000 (just shy of 2^31-1).
We will count up and eventually see that 1,000,000,000
is smaller than our value. The next step would be to
multiply by 10 and see that 10,000,000,000 is too
large, ending the loop. But we can't represent that
value, and we have signed overflow.
This is technically undefined behavior, but a common
behavior is to lose the high bits, in which case our
iterator will certainly be less than the number. So
we'll keep multiplying, overflow again, and so on.
This patch changes the argument to a uintmax_t (the same
type we use to store the diffstat information for binary
filese), and counts "down" by repeatedly dividing our value
by 10.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-02-05 09:14:19 +01:00
|
|
|
extern int decimal_width(uintmax_t);
|
2012-10-26 17:53:52 +02:00
|
|
|
extern int check_pager_config(const char *cmd);
|
2006-02-28 20:26:21 +01:00
|
|
|
|
2008-02-16 06:01:41 +01:00
|
|
|
extern const char *editor_program;
|
2010-08-30 15:38:38 +02:00
|
|
|
extern const char *askpass_program;
|
2008-02-16 06:01:59 +01:00
|
|
|
extern const char *excludes_file;
|
2007-07-20 14:06:09 +02:00
|
|
|
|
binary patch.
This adds "binary patch" to the diff output and teaches apply
what to do with them.
On the diff generation side, traditionally, we said "Binary
files differ\n" without giving anything other than the preimage
and postimage object name on the index line. This was good
enough for applying a patch generated from your own repository
(very useful while rebasing), because the postimage would be
available in such a case. However, this was not useful when the
recipient of such a patch via e-mail were to apply it, even if
the preimage was available.
This patch allows the diff to generate "binary" patch when
operating under --full-index option. The binary patch follows
the usual extended git diff headers, and looks like this:
"GIT binary patch\n"
<length byte><data>"\n"
...
"\n"
Each line is prefixed with a "length-byte", whose value is upper
or lowercase alphabet that encodes number of bytes that the data
on the line decodes to (1..52 -- 'A' means 1, 'B' means 2, ...,
'Z' means 26, 'a' means 27, ...). <data> is 1 or more groups of
5-byte sequence, each of which encodes up to 4 bytes in base85
encoding. Because 52 / 4 * 5 = 65 and we have the length byte,
an output line is capped to 66 characters. The payload is the
same diff-delta as we use in the packfiles.
On the consumption side, git-apply now can decode and apply the
binary patch when --allow-binary-replacement is given, the diff
was generated with --full-index, and the receiving repository
has the preimage blob, which is the same condition as it always
required when accepting an "Binary files differ\n" patch.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-05-05 01:51:44 +02:00
|
|
|
/* base85 */
|
2007-04-10 00:56:33 +02:00
|
|
|
int decode_85(char *dst, const char *line, int linelen);
|
|
|
|
void encode_85(char *buf, const unsigned char *data, int bytes);
|
binary patch.
This adds "binary patch" to the diff output and teaches apply
what to do with them.
On the diff generation side, traditionally, we said "Binary
files differ\n" without giving anything other than the preimage
and postimage object name on the index line. This was good
enough for applying a patch generated from your own repository
(very useful while rebasing), because the postimage would be
available in such a case. However, this was not useful when the
recipient of such a patch via e-mail were to apply it, even if
the preimage was available.
This patch allows the diff to generate "binary" patch when
operating under --full-index option. The binary patch follows
the usual extended git diff headers, and looks like this:
"GIT binary patch\n"
<length byte><data>"\n"
...
"\n"
Each line is prefixed with a "length-byte", whose value is upper
or lowercase alphabet that encodes number of bytes that the data
on the line decodes to (1..52 -- 'A' means 1, 'B' means 2, ...,
'Z' means 26, 'a' means 27, ...). <data> is 1 or more groups of
5-byte sequence, each of which encodes up to 4 bytes in base85
encoding. Because 52 / 4 * 5 = 65 and we have the length byte,
an output line is capped to 66 characters. The payload is the
same diff-delta as we use in the packfiles.
On the consumption side, git-apply now can decode and apply the
binary patch when --allow-binary-replacement is given, the diff
was generated with --full-index, and the receiving repository
has the preimage blob, which is the same condition as it always
required when accepting an "Binary files differ\n" patch.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-05-05 01:51:44 +02:00
|
|
|
|
Add specialized object allocator
This creates a simple specialized object allocator for basic
objects.
This avoids wasting space with malloc overhead (metadata and
extra alignment), since the specialized allocator knows the
alignment, and that objects, once allocated, are never freed.
It also allows us to track some basic statistics about object
allocations. For example, for the mozilla import, it shows
object usage as follows:
blobs: 627629 (14710 kB)
trees: 1119035 (34969 kB)
commits: 196423 (8440 kB)
tags: 1336 (46 kB)
and the simpler allocator shaves off about 2.5% off the memory
footprint off a "git-rev-list --all --objects", and is a bit
faster too.
[ Side note: this concludes the series of "save memory in object storage".
The thing is, there simply isn't much more to be saved on the objects.
Doing "git-rev-list --all --objects" on the mozilla archive has a final
total RSS of 131498 pages for me: that's about 513MB. Of that, the
object overhead is now just 56MB, the rest is going somewhere else (put
another way: the fact that this patch shaves off 2.5% of the total
memory overhead, considering that objects are now not much more than 10%
of the total shows how big the wasted space really was: this makes
object allocations much more memory- and time-efficient).
I haven't looked at where the rest is, but I suspect the bulk of it is
just the pack-file loading. It may be that we should pack the tree
objects separately from the blob objects: for git-rev-list --objects, we
don't actually ever need to even look at the blobs, but since trees and
blobs are interspersed in the pack-file, we end up not being dense in
the tree accesses, so we end up looking at more pages than we strictly
need to.
So with a 535MB pack-file, it's entirely possible - even likely - that
most of the remaining RSS is just the mmap of the pack-file itself. We
don't need to map in _all_ of it, but we do end up mapping a fair
amount. ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-19 19:44:15 +02:00
|
|
|
/* alloc.c */
|
2007-04-17 07:11:43 +02:00
|
|
|
extern void *alloc_blob_node(void);
|
|
|
|
extern void *alloc_tree_node(void);
|
|
|
|
extern void *alloc_commit_node(void);
|
|
|
|
extern void *alloc_tag_node(void);
|
|
|
|
extern void *alloc_object_node(void);
|
Add specialized object allocator
This creates a simple specialized object allocator for basic
objects.
This avoids wasting space with malloc overhead (metadata and
extra alignment), since the specialized allocator knows the
alignment, and that objects, once allocated, are never freed.
It also allows us to track some basic statistics about object
allocations. For example, for the mozilla import, it shows
object usage as follows:
blobs: 627629 (14710 kB)
trees: 1119035 (34969 kB)
commits: 196423 (8440 kB)
tags: 1336 (46 kB)
and the simpler allocator shaves off about 2.5% off the memory
footprint off a "git-rev-list --all --objects", and is a bit
faster too.
[ Side note: this concludes the series of "save memory in object storage".
The thing is, there simply isn't much more to be saved on the objects.
Doing "git-rev-list --all --objects" on the mozilla archive has a final
total RSS of 131498 pages for me: that's about 513MB. Of that, the
object overhead is now just 56MB, the rest is going somewhere else (put
another way: the fact that this patch shaves off 2.5% of the total
memory overhead, considering that objects are now not much more than 10%
of the total shows how big the wasted space really was: this makes
object allocations much more memory- and time-efficient).
I haven't looked at where the rest is, but I suspect the bulk of it is
just the pack-file loading. It may be that we should pack the tree
objects separately from the blob objects: for git-rev-list --objects, we
don't actually ever need to even look at the blobs, but since trees and
blobs are interspersed in the pack-file, we end up not being dense in
the tree accesses, so we end up looking at more pages than we strictly
need to.
So with a 535MB pack-file, it's entirely possible - even likely - that
most of the remaining RSS is just the mmap of the pack-file itself. We
don't need to map in _all_ of it, but we do end up mapping a fair
amount. ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-19 19:44:15 +02:00
|
|
|
extern void alloc_report(void);
|
2014-07-13 08:42:08 +02:00
|
|
|
extern unsigned int alloc_commit_index(void);
|
Add specialized object allocator
This creates a simple specialized object allocator for basic
objects.
This avoids wasting space with malloc overhead (metadata and
extra alignment), since the specialized allocator knows the
alignment, and that objects, once allocated, are never freed.
It also allows us to track some basic statistics about object
allocations. For example, for the mozilla import, it shows
object usage as follows:
blobs: 627629 (14710 kB)
trees: 1119035 (34969 kB)
commits: 196423 (8440 kB)
tags: 1336 (46 kB)
and the simpler allocator shaves off about 2.5% off the memory
footprint off a "git-rev-list --all --objects", and is a bit
faster too.
[ Side note: this concludes the series of "save memory in object storage".
The thing is, there simply isn't much more to be saved on the objects.
Doing "git-rev-list --all --objects" on the mozilla archive has a final
total RSS of 131498 pages for me: that's about 513MB. Of that, the
object overhead is now just 56MB, the rest is going somewhere else (put
another way: the fact that this patch shaves off 2.5% of the total
memory overhead, considering that objects are now not much more than 10%
of the total shows how big the wasted space really was: this makes
object allocations much more memory- and time-efficient).
I haven't looked at where the rest is, but I suspect the bulk of it is
just the pack-file loading. It may be that we should pack the tree
objects separately from the blob objects: for git-rev-list --objects, we
don't actually ever need to even look at the blobs, but since trees and
blobs are interspersed in the pack-file, we end up not being dense in
the tree accesses, so we end up looking at more pages than we strictly
need to.
So with a 535MB pack-file, it's entirely possible - even likely - that
most of the remaining RSS is just the mmap of the pack-file itself. We
don't need to map in _all_ of it, but we do end up mapping a fair
amount. ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-19 19:44:15 +02:00
|
|
|
|
2014-06-11 09:56:49 +02:00
|
|
|
/* pkt-line.c */
|
2011-02-24 15:30:19 +01:00
|
|
|
void packet_trace_identity(const char *prog);
|
2006-09-02 18:23:48 +02:00
|
|
|
|
2007-11-18 10:12:04 +01:00
|
|
|
/* add */
|
2008-05-12 19:58:10 +02:00
|
|
|
/*
|
|
|
|
* return 0 if success, 1 - if addition of a file failed and
|
|
|
|
* ADD_FILES_IGNORE_ERRORS was specified in flags
|
|
|
|
*/
|
2013-07-14 10:35:56 +02:00
|
|
|
int add_files_to_cache(const char *prefix, const struct pathspec *pathspec, int flags);
|
2007-11-18 10:12:04 +01:00
|
|
|
|
2007-08-31 22:13:42 +02:00
|
|
|
/* diff.c */
|
|
|
|
extern int diff_auto_refresh_index;
|
|
|
|
|
2007-02-16 01:32:45 +01:00
|
|
|
/* match-trees.c */
|
|
|
|
void shift_tree(const unsigned char *, const unsigned char *, unsigned char *, int);
|
2008-07-01 07:18:57 +02:00
|
|
|
void shift_tree_by(const unsigned char *, const unsigned char *, unsigned char *, const char *);
|
2007-02-16 01:32:45 +01:00
|
|
|
|
2007-11-02 08:24:27 +01:00
|
|
|
/*
|
|
|
|
* whitespace rules.
|
|
|
|
* used by both diff and apply
|
2010-11-30 09:29:11 +01:00
|
|
|
* last two digits are tab width
|
2007-11-02 08:24:27 +01:00
|
|
|
*/
|
2010-11-30 09:29:11 +01:00
|
|
|
#define WS_BLANK_AT_EOL 0100
|
|
|
|
#define WS_SPACE_BEFORE_TAB 0200
|
|
|
|
#define WS_INDENT_WITH_NON_TAB 0400
|
|
|
|
#define WS_CR_AT_EOL 01000
|
|
|
|
#define WS_BLANK_AT_EOF 02000
|
|
|
|
#define WS_TAB_IN_INDENT 04000
|
2009-09-06 07:21:17 +02:00
|
|
|
#define WS_TRAILING_SPACE (WS_BLANK_AT_EOL|WS_BLANK_AT_EOF)
|
2010-11-30 09:29:11 +01:00
|
|
|
#define WS_DEFAULT_RULE (WS_TRAILING_SPACE|WS_SPACE_BEFORE_TAB|8)
|
|
|
|
#define WS_TAB_WIDTH_MASK 077
|
2007-12-06 09:14:14 +01:00
|
|
|
extern unsigned whitespace_rule_cfg;
|
|
|
|
extern unsigned whitespace_rule(const char *);
|
|
|
|
extern unsigned parse_whitespace_rule(const char *);
|
2008-06-27 00:35:21 +02:00
|
|
|
extern unsigned ws_check(const char *line, int len, unsigned ws_rule);
|
|
|
|
extern void ws_check_emit(const char *line, int len, unsigned ws_rule, FILE *stream, const char *set, const char *reset, const char *ws);
|
2007-12-13 14:32:29 +01:00
|
|
|
extern char *whitespace_error_string(unsigned ws);
|
2010-04-03 01:37:23 +02:00
|
|
|
extern void ws_fix_copy(struct strbuf *, const char *, int, unsigned, int *);
|
2008-06-27 00:36:59 +02:00
|
|
|
extern int ws_blank_line(const char *line, int len, unsigned ws_rule);
|
2010-11-30 09:29:11 +01:00
|
|
|
#define ws_tab_width(rule) ((rule) & WS_TAB_WIDTH_MASK)
|
2007-11-02 08:24:27 +01:00
|
|
|
|
2007-11-18 10:13:32 +01:00
|
|
|
/* ls-files */
|
|
|
|
void overlay_tree_on_cache(const char *tree_name, const char *prefix);
|
|
|
|
|
2008-02-24 23:17:14 +01:00
|
|
|
char *alias_lookup(const char *alias);
|
2008-06-27 18:21:54 +02:00
|
|
|
int split_cmdline(char *cmdline, const char ***argv);
|
2010-08-07 07:13:39 +02:00
|
|
|
/* Takes a negative value returned by split_cmdline */
|
|
|
|
const char *split_cmdline_strerror(int cmdline_errno);
|
2008-02-24 23:17:14 +01:00
|
|
|
|
2010-08-06 04:40:35 +02:00
|
|
|
/* git.c */
|
|
|
|
struct startup_info {
|
2010-08-06 04:46:33 +02:00
|
|
|
int have_repository;
|
2010-12-02 00:33:22 +01:00
|
|
|
const char *prefix;
|
2010-08-06 04:40:35 +02:00
|
|
|
};
|
|
|
|
extern struct startup_info *startup_info;
|
|
|
|
|
2012-10-26 17:53:49 +02:00
|
|
|
/* merge.c */
|
|
|
|
struct commit_list;
|
|
|
|
int try_merge_command(const char *strategy, size_t xopts_nr,
|
|
|
|
const char **xopts, struct commit_list *common,
|
|
|
|
const char *head_arg, struct commit_list *remotes);
|
|
|
|
int checkout_fast_forward(const unsigned char *from,
|
|
|
|
const unsigned char *to,
|
|
|
|
int overwrite_ignore);
|
|
|
|
|
2010-03-06 21:34:41 +01:00
|
|
|
|
2012-03-30 09:52:18 +02:00
|
|
|
int sane_execvp(const char *file, char *const argv[]);
|
|
|
|
|
2013-06-20 10:37:51 +02:00
|
|
|
/*
|
|
|
|
* A struct to encapsulate the concept of whether a file has changed
|
|
|
|
* since we last checked it. This uses criteria similar to those used
|
|
|
|
* for the index.
|
|
|
|
*/
|
|
|
|
struct stat_validity {
|
|
|
|
struct stat_data *sd;
|
|
|
|
};
|
|
|
|
|
|
|
|
void stat_validity_clear(struct stat_validity *sv);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Returns 1 if the path is a regular file (or a symlink to a regular
|
|
|
|
* file) and matches the saved stat_validity, 0 otherwise. A missing
|
|
|
|
* or inaccessible file is considered a match if the struct was just
|
|
|
|
* initialized, or if the previous update found an inaccessible file.
|
|
|
|
*/
|
|
|
|
int stat_validity_check(struct stat_validity *sv, const char *path);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Update the stat_validity from a file opened at descriptor fd. If
|
|
|
|
* the file is missing, inaccessible, or not a regular file, then
|
|
|
|
* future calls to stat_validity_check will match iff one of those
|
|
|
|
* conditions continues to be true.
|
|
|
|
*/
|
|
|
|
void stat_validity_update(struct stat_validity *sv, int fd);
|
|
|
|
|
2014-02-27 13:56:52 +01:00
|
|
|
int versioncmp(const char *s1, const char *s2);
|
|
|
|
|
2005-04-08 00:13:13 +02:00
|
|
|
#endif /* CACHE_H */
|