2005-04-08 00:13:13 +02:00
|
|
|
#ifndef CACHE_H
|
|
|
|
#define CACHE_H
|
|
|
|
|
2005-12-05 20:54:29 +01:00
|
|
|
#include "git-compat-util.h"
|
Rewrite convert_to_{git,working_tree} to use strbuf's.
* Now, those functions take an "out" strbuf argument, where they store their
result if any. In that case, it also returns 1, else it returns 0.
* those functions support "in place" editing, in the sense that it's OK to
call them this way:
convert_to_git(path, sb->buf, sb->len, sb);
When doable, conversions are done in place for real, else the strbuf
content is just replaced with the new one, transparentely for the caller.
If you want to create a new filter working this way, being the accumulation
of filter1, filter2, ... filtern, then your meta_filter would be:
int meta_filter(..., const char *src, size_t len, struct strbuf *sb)
{
int ret = 0;
ret |= filter1(...., src, len, sb);
if (ret) {
src = sb->buf;
len = sb->len;
}
ret |= filter2(...., src, len, sb);
if (ret) {
src = sb->buf;
len = sb->len;
}
....
return ret | filtern(..., src, len, sb);
}
That's why subfilters the convert_to_* functions called were also rewritten
to work this way.
Signed-off-by: Pierre Habouzit <madcoder@debian.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-09-16 15:51:04 +02:00
|
|
|
#include "strbuf.h"
|
2013-11-14 20:20:58 +01:00
|
|
|
#include "hashmap.h"
|
2018-01-24 00:46:51 +01:00
|
|
|
#include "list.h"
|
2009-09-09 13:38:58 +02:00
|
|
|
#include "advice.h"
|
2011-02-23 00:41:20 +01:00
|
|
|
#include "gettext.h"
|
2011-05-20 21:59:01 +02:00
|
|
|
#include "convert.h"
|
2014-06-11 09:56:49 +02:00
|
|
|
#include "trace.h"
|
2014-08-07 13:59:17 +02:00
|
|
|
#include "string-list.h"
|
pack-revindex: drop hash table
The main entry point to the pack-revindex code is
find_pack_revindex(). This calls revindex_for_pack(), which
lazily computes and caches the revindex for the pack.
We store the cache in a very simple hash table. It's created
by init_pack_revindex(), which inserts an entry for every
packfile we know about, and we never grow or shrink the
hash. If we ever need the revindex for a pack that isn't in
the hash, we die() with an internal error.
This can lead to a race, because we may load more packs
after having called init_pack_revindex(). For example,
imagine we have one process which needs to look at the
revindex for a variety of objects (e.g., cat-file's
"%(objectsize:disk)" format). Simultaneously, git-gc is
running, which is doing a `git repack -ad`. We might hit a
sequence like:
1. We need the revidx for some packed object. We call
find_pack_revindex() and end up in init_pack_revindex()
to create the hash table for all packs we know about.
2. We look up another object and can't find it, because
the repack has removed the pack it's in. We re-scan the
pack directory and find a new pack containing the
object. It gets added to our packed_git list.
3. We call find_pack_revindex() for the new object, which
hits revindex_for_pack() for our new pack. It can't
find the packed_git in the revindex hash, and dies.
You could also replace the `repack` above with a push or
fetch to create a new pack, though these are less likely
(you would have to somehow learn about the new objects to
look them up).
Prior to 1a6d8b9 (do not discard revindex when re-preparing
packfiles, 2014-01-15), this was safe, as we threw away the
revindex whenever we re-scanned the pack directory (and thus
re-created the revindex hash on the fly). However, we don't
want to simply revert that commit, as it was solving a
different race.
So we have a few options:
- We can fix the race in 1a6d8b9 differently, by having
the bitmap code look in the revindex hash instead of
caching the pointer. But this would introduce a lot of
extra hash lookups for common bitmap operations.
- We could teach the revindex to dynamically add new packs
to the hash table. This would perform the same, but
would mean adding extra code to the revindex hash (which
currently cannot be resized at all).
- We can get rid of the hash table entirely. There is
exactly one revindex per pack, so we can just store it
in the packed_git struct. Since it's initialized lazily,
it does not add to the startup cost.
This is the best of both worlds: less code and fewer
hash table lookups. The original code likely avoided
this in the name of encapsulation. But the packed_git
and reverse_index code are fairly intimate already, so
it's not much of a loss.
This patch implements the final option. It's a minimal
conversion that retains the pack_revindex struct. No callers
need to change, and we can do further cleanup in a follow-on
patch.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-12-21 07:19:49 +01:00
|
|
|
#include "pack-revindex.h"
|
2017-03-11 23:28:18 +01:00
|
|
|
#include "hash.h"
|
2017-06-22 20:43:35 +02:00
|
|
|
#include "path.h"
|
2017-06-22 20:19:48 +02:00
|
|
|
#include "sha1-array.h"
|
2017-11-12 22:28:53 +01:00
|
|
|
#include "repository.h"
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2008-10-01 20:05:20 +02:00
|
|
|
#include <zlib.h>
|
2011-06-10 20:52:15 +02:00
|
|
|
typedef struct git_zstream {
|
|
|
|
z_stream z;
|
|
|
|
unsigned long avail_in;
|
|
|
|
unsigned long avail_out;
|
|
|
|
unsigned long total_in;
|
|
|
|
unsigned long total_out;
|
|
|
|
unsigned char *next_in;
|
|
|
|
unsigned char *next_out;
|
|
|
|
} git_zstream;
|
|
|
|
|
|
|
|
void git_inflate_init(git_zstream *);
|
|
|
|
void git_inflate_init_gzip_only(git_zstream *);
|
|
|
|
void git_inflate_end(git_zstream *);
|
|
|
|
int git_inflate(git_zstream *, int flush);
|
|
|
|
|
|
|
|
void git_deflate_init(git_zstream *, int level);
|
|
|
|
void git_deflate_init_gzip(git_zstream *, int level);
|
2013-03-15 23:21:51 +01:00
|
|
|
void git_deflate_init_raw(git_zstream *, int level);
|
2011-06-10 20:52:15 +02:00
|
|
|
void git_deflate_end(git_zstream *);
|
2011-10-28 23:48:40 +02:00
|
|
|
int git_deflate_abort(git_zstream *);
|
2011-06-10 20:52:15 +02:00
|
|
|
int git_deflate_end_gently(git_zstream *);
|
|
|
|
int git_deflate(git_zstream *, int flush);
|
|
|
|
unsigned long git_deflate_bound(git_zstream *, unsigned long);
|
2009-01-08 04:54:47 +01:00
|
|
|
|
2015-03-14 00:39:27 +01:00
|
|
|
/* The length in bytes and in hex digits of an object name (SHA-1 value). */
|
|
|
|
#define GIT_SHA1_RAWSZ 20
|
|
|
|
#define GIT_SHA1_HEXSZ (2 * GIT_SHA1_RAWSZ)
|
|
|
|
|
2017-03-26 18:01:23 +02:00
|
|
|
/* The length in byte and in hex digits of the largest possible hash value. */
|
|
|
|
#define GIT_MAX_RAWSZ GIT_SHA1_RAWSZ
|
|
|
|
#define GIT_MAX_HEXSZ GIT_SHA1_HEXSZ
|
|
|
|
|
2015-03-14 00:39:27 +01:00
|
|
|
struct object_id {
|
2017-03-26 18:01:23 +02:00
|
|
|
unsigned char hash[GIT_MAX_RAWSZ];
|
2015-03-14 00:39:27 +01:00
|
|
|
};
|
|
|
|
|
2017-11-12 22:28:53 +01:00
|
|
|
#define the_hash_algo the_repository->hash_algo
|
|
|
|
|
2006-02-26 16:13:46 +01:00
|
|
|
#if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT)
|
2005-04-30 18:51:03 +02:00
|
|
|
#define DTYPE(de) ((de)->d_type)
|
|
|
|
#else
|
2006-01-20 22:33:20 +01:00
|
|
|
#undef DT_UNKNOWN
|
|
|
|
#undef DT_DIR
|
|
|
|
#undef DT_REG
|
|
|
|
#undef DT_LNK
|
2005-04-30 18:51:03 +02:00
|
|
|
#define DT_UNKNOWN 0
|
|
|
|
#define DT_DIR 1
|
|
|
|
#define DT_REG 2
|
2005-05-13 02:16:04 +02:00
|
|
|
#define DT_LNK 3
|
2005-04-30 18:51:03 +02:00
|
|
|
#define DTYPE(de) DT_UNKNOWN
|
|
|
|
#endif
|
|
|
|
|
2007-04-22 18:43:56 +02:00
|
|
|
/* unknown mode (impossible combination S_IFIFO|S_IFCHR) */
|
|
|
|
#define S_IFINVALID 0030000
|
|
|
|
|
2007-04-10 06:14:58 +02:00
|
|
|
/*
|
|
|
|
* A "directory link" is a link to another git directory.
|
|
|
|
*
|
|
|
|
* The value 0160000 is not normally a valid mode, and
|
|
|
|
* also just happens to be S_IFDIR + S_IFLNK
|
|
|
|
*/
|
2007-05-21 22:08:28 +02:00
|
|
|
#define S_IFGITLINK 0160000
|
|
|
|
#define S_ISGITLINK(m) (((m) & S_IFMT) == S_IFGITLINK)
|
2007-04-10 06:14:58 +02:00
|
|
|
|
tree-diff: rework diff_tree() to generate diffs for multiparent cases as well
Previously diff_tree(), which is now named ll_diff_tree_sha1(), was
generating diff_filepair(s) for two trees t1 and t2, and that was
usually used for a commit as t1=HEAD~, and t2=HEAD - i.e. to see changes
a commit introduces.
In Git, however, we have fundamentally built flexibility in that a
commit can have many parents - 1 for a plain commit, 2 for a simple merge,
but also more than 2 for merging several heads at once.
For merges there is a so called combine-diff, which shows diff, a merge
introduces by itself, omitting changes done by any parent. That works
through first finding paths, that are different to all parents, and then
showing generalized diff, with separate columns for +/- for each parent.
The code lives in combine-diff.c .
There is an impedance mismatch, however, in that a commit could
generally have any number of parents, and that while diffing trees, we
divide cases for 2-tree diffs and more-than-2-tree diffs. I mean there
is no special casing for multiple parents commits in e.g.
revision-walker .
That impedance mismatch *hurts* *performance* *badly* for generating
combined diffs - in "combine-diff: optimize combine_diff_path
sets intersection" I've already removed some slowness from it, but from
the timings provided there, it could be seen, that combined diffs still
cost more than an order of magnitude more cpu time, compared to diff for
usual commits, and that would only be an optimistic estimate, if we take
into account that for e.g. linux.git there is only one merge for several
dozens of plain commits.
That slowness comes from the fact that currently, while generating
combined diff, a lot of time is spent computing diff(commit,commit^2)
just to only then intersect that huge diff to almost small set of files
from diff(commit,commit^1).
That's because at present, to compute combine-diff, for first finding
paths, that "every parent touches", we use the following combine-diff
property/definition:
D(A,P1...Pn) = D(A,P1) ^ ... ^ D(A,Pn) (w.r.t. paths)
where
D(A,P1...Pn) is combined diff between commit A, and parents Pi
and
D(A,Pi) is usual two-tree diff Pi..A
So if any of that D(A,Pi) is huge, tracting 1 n-parent combine-diff as n
1-parent diffs and intersecting results will be slow.
And usually, for linux.git and other topic-based workflows, that
D(A,P2) is huge, because, if merge-base of A and P2, is several dozens
of merges (from A, via first parent) below, that D(A,P2) will be diffing
sum of merges from several subsystems to 1 subsystem.
The solution is to avoid computing n 1-parent diffs, and to find
changed-to-all-parents paths via scanning A's and all Pi's trees
simultaneously, at each step comparing their entries, and based on that
comparison, populate paths result, and deduce we could *skip*
*recursing* into subdirectories, if at least for 1 parent, sha1 of that
dir tree is the same as in A. That would save us from doing significant
amount of needless work.
Such approach is very similar to what diff_tree() does, only there we
deal with scanning only 2 trees simultaneously, and for n+1 tree, the
logic is a bit more complex:
D(T,P1...Pn) calculation scheme
-------------------------------
D(T,P1...Pn) = D(T,P1) ^ ... ^ D(T,Pn) (regarding resulting paths set)
D(T,Pj) - diff between T..Pj
D(T,P1...Pn) - combined diff from T to parents P1,...,Pn
We start from all trees, which are sorted, and compare their entries in
lock-step:
T P1 Pn
- - -
|t| |p1| |pn|
|-| |--| ... |--| imin = argmin(p1...pn)
| | | | | |
|-| |--| |--|
|.| |. | |. |
. . .
. . .
at any time there could be 3 cases:
1) t < p[imin];
2) t > p[imin];
3) t = p[imin].
Schematic deduction of what every case means, and what to do, follows:
1) t < p[imin] -> ∀j t ∉ Pj -> "+t" ∈ D(T,Pj) -> D += "+t"; t↓
2) t > p[imin]
2.1) ∃j: pj > p[imin] -> "-p[imin]" ∉ D(T,Pj) -> D += ø; ∀ pi=p[imin] pi↓
2.2) ∀i pi = p[imin] -> pi ∉ T -> "-pi" ∈ D(T,Pi) -> D += "-p[imin]"; ∀i pi↓
3) t = p[imin]
3.1) ∃j: pj > p[imin] -> "+t" ∈ D(T,Pj) -> only pi=p[imin] remains to investigate
3.2) pi = p[imin] -> investigate δ(t,pi)
|
|
v
3.1+3.2) looking at δ(t,pi) ∀i: pi=p[imin] - if all != ø ->
⎧δ(t,pi) - if pi=p[imin]
-> D += ⎨
⎩"+t" - if pi>p[imin]
in any case t↓ ∀ pi=p[imin] pi↓
~
For comparison, here is how diff_tree() works:
D(A,B) calculation scheme
-------------------------
A B
- -
|a| |b| a < b -> a ∉ B -> D(A,B) += +a a↓
|-| |-| a > b -> b ∉ A -> D(A,B) += -b b↓
| | | | a = b -> investigate δ(a,b) a↓ b↓
|-| |-|
|.| |.|
. .
. .
~~~~~~~~
This patch generalizes diff tree-walker to work with arbitrary number of
parents as described above - i.e. now there is a resulting tree t, and
some parents trees tp[i] i=[0..nparent). The generalization builds on
the fact that usual diff
D(A,B)
is by definition the same as combined diff
D(A,[B]),
so if we could rework the code for common case and make it be not slower
for nparent=1 case, usual diff(t1,t2) generation will not be slower, and
multiparent diff tree-walker would greatly benefit generating
combine-diff.
What we do is as follows:
1) diff tree-walker ll_diff_tree_sha1() is internally reworked to be
a paths generator (new name diff_tree_paths()), with each generated path
being `struct combine_diff_path` with info for path, new sha1,mode and for
every parent which sha1,mode it was in it.
2) From that info, we can still generate usual diff queue with
struct diff_filepairs, via "exporting" generated
combine_diff_path, if we know we run for nparent=1 case.
(see emit_diff() which is now named emit_diff_first_parent_only())
3) In order for diff_can_quit_early(), which checks
DIFF_OPT_TST(opt, HAS_CHANGES))
to work, that exporting have to be happening not in bulk, but
incrementally, one diff path at a time.
For such consumers, there is a new callback in diff_options
introduced:
->pathchange(opt, struct combine_diff_path *)
which, if set to !NULL, is called for every generated path.
(see new compat ll_diff_tree_sha1() wrapper around new paths
generator for setup)
4) The paths generation itself, is reworked from previous
ll_diff_tree_sha1() code according to "D(A,P1...Pn) calculation
scheme" provided above:
On the start we allocate [nparent] arrays in place what was
earlier just for one parent tree.
then we just generalize loops, and comparison according to the
algorithm.
Some notes(*):
1) alloca(), for small arrays, is used for "runs not slower for
nparent=1 case than before" goal - if we change it to xmalloc()/free()
the timings get ~1% worse. For alloca() we use just-introduced
xalloca/xalloca_free compatibility wrappers, so it should not be a
portability problem.
2) For every parent tree, we need to keep a tag, whether entry from that
parent equals to entry from minimal parent. For performance reasons I'm
keeping that tag in entry's mode field in unused bit - see S_IFXMIN_NEQ.
Not doing so, we'd need to alloca another [nparent] array, which hurts
performance.
3) For emitted paths, memory could be reused, if we know the path was
processed via callback and will not be needed later. We use efficient
hand-made realloc-style path_appendnew(), that saves us from ~1-1.5%
of potential additional slowdown.
4) goto(s) are used in several places, as the code executes a little bit
faster with lowered register pressure.
Also
- we should now check for FIND_COPIES_HARDER not only when two entries
names are the same, and their hashes are equal, but also for a case,
when a path was removed from some of all parents having it.
The reason is, if we don't, that path won't be emitted at all (see
"a > xi" case), and we'll just skip it, and FIND_COPIES_HARDER wants
all paths - with diff or without - to be emitted, to be later analyzed
for being copies sources.
The new check is only necessary for nparent >1, as for nparent=1 case
xmin_eqtotal always =1 =nparent, and a path is always added to diff as
removal.
~~~~~~~~
Timings for
# without -c, i.e. testing only nparent=1 case
`git log --raw --no-abbrev --no-renames`
before and after the patch are as follows:
navy.git linux.git v3.10..v3.11
before 0.611s 1.889s
after 0.619s 1.907s
slowdown 1.3% 0.9%
This timings show we did no harm to usual diff(tree1,tree2) generation.
From the table we can see that we actually did ~1% slowdown, but I think
I've "earned" that 1% in the previous patch ("tree-diff: reuse base
str(buf) memory on sub-tree recursion", HEAD~~) so for nparent=1 case,
net timings stays approximately the same.
The output also stayed the same.
(*) If we revert 1)-4) to more usual techniques, for nparent=1 case,
we'll get ~2-2.5% of additional slowdown, which I've tried to avoid, as
"do no harm for nparent=1 case" rule.
For linux.git, combined diff will run an order of magnitude faster and
appropriate timings will be provided in the next commit, as we'll be
taking advantage of the new diff tree-walker for combined-diff
generation there.
P.S. and combined diff is not some exotic/for-play-only stuff - for
example for a program I write to represent Git archives as readonly
filesystem, there is initial scan with
`git log --reverse --raw --no-abbrev --no-renames -c`
to extract log of what was created/changed when, as a result building a
map
{} sha1 -> in which commit (and date) a content was added
that `-c` means also show combined diff for merges, and without them, if
a merge is non-trivial (merges changes from two parents with both having
separate changes to a file), or an evil one, the map will not be full,
i.e. some valid sha1 would be absent from it.
That case was my initial motivation for combined diffs speedup.
Signed-off-by: Kirill Smelkov <kirr@mns.spb.ru>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-04-06 23:46:26 +02:00
|
|
|
/*
|
|
|
|
* Some mode bits are also used internally for computations.
|
|
|
|
*
|
|
|
|
* They *must* not overlap with any valid modes, and they *must* not be emitted
|
|
|
|
* to outside world - i.e. appear on disk or network. In other words, it's just
|
|
|
|
* temporary fields, which we internally use, but they have to stay in-house.
|
|
|
|
*
|
|
|
|
* ( such approach is valid, as standard S_IF* fits into 16 bits, and in Git
|
|
|
|
* codebase mode is `unsigned int` which is assumed to be at least 32 bits )
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* used internally in tree-diff */
|
|
|
|
#define S_DIFFTREE_IFXMIN_NEQ 0x80000000
|
|
|
|
|
|
|
|
|
2005-07-14 03:46:20 +02:00
|
|
|
/*
|
|
|
|
* Intensive research over the course of many years has shown that
|
|
|
|
* port 9418 is totally unused by anything else. Or
|
|
|
|
*
|
|
|
|
* Your search - "port 9418" - did not match any documents.
|
|
|
|
*
|
|
|
|
* as www.google.com puts it.
|
2005-09-12 20:23:00 +02:00
|
|
|
*
|
|
|
|
* This port has been properly assigned for git use by IANA:
|
|
|
|
* git (Assigned-9418) [I06-050728-0001].
|
|
|
|
*
|
|
|
|
* git 9418/tcp git pack transfer service
|
|
|
|
* git 9418/udp git pack transfer service
|
|
|
|
*
|
|
|
|
* with Linus Torvalds <torvalds@osdl.org> as the point of
|
|
|
|
* contact. September 2005.
|
|
|
|
*
|
|
|
|
* See http://www.iana.org/assignments/port-numbers
|
2005-07-14 03:46:20 +02:00
|
|
|
*/
|
|
|
|
#define DEFAULT_GIT_PORT 9418
|
|
|
|
|
2005-04-08 00:13:13 +02:00
|
|
|
/*
|
|
|
|
* Basic data structures for the directory cache
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define CACHE_SIGNATURE 0x44495243 /* "DIRC" */
|
|
|
|
struct cache_header {
|
2013-08-18 21:41:51 +02:00
|
|
|
uint32_t hdr_signature;
|
|
|
|
uint32_t hdr_version;
|
|
|
|
uint32_t hdr_entries;
|
2005-04-08 00:13:13 +02:00
|
|
|
};
|
|
|
|
|
2012-04-04 18:12:43 +02:00
|
|
|
#define INDEX_FORMAT_LB 2
|
|
|
|
#define INDEX_FORMAT_UB 4
|
|
|
|
|
2005-04-08 00:13:13 +02:00
|
|
|
/*
|
|
|
|
* The "cache_time" is just the low 32 bits of the
|
|
|
|
* time. It doesn't matter if it overflows - we only
|
|
|
|
* check it for equality in the 32 bits we save.
|
|
|
|
*/
|
|
|
|
struct cache_time {
|
2013-08-18 21:41:51 +02:00
|
|
|
uint32_t sec;
|
|
|
|
uint32_t nsec;
|
2005-04-08 00:13:13 +02:00
|
|
|
};
|
|
|
|
|
2013-06-20 10:37:50 +02:00
|
|
|
struct stat_data {
|
|
|
|
struct cache_time sd_ctime;
|
|
|
|
struct cache_time sd_mtime;
|
|
|
|
unsigned int sd_dev;
|
|
|
|
unsigned int sd_ino;
|
|
|
|
unsigned int sd_uid;
|
|
|
|
unsigned int sd_gid;
|
|
|
|
unsigned int sd_size;
|
|
|
|
};
|
|
|
|
|
2005-04-08 00:13:13 +02:00
|
|
|
struct cache_entry {
|
2013-11-14 20:21:58 +01:00
|
|
|
struct hashmap_entry ent;
|
2013-06-20 10:37:50 +02:00
|
|
|
struct stat_data ce_stat_data;
|
2005-04-15 19:44:27 +02:00
|
|
|
unsigned int ce_mode;
|
2008-01-15 01:03:17 +01:00
|
|
|
unsigned int ce_flags;
|
2012-07-11 11:22:37 +02:00
|
|
|
unsigned int ce_namelen;
|
2014-06-13 14:19:36 +02:00
|
|
|
unsigned int index; /* for link extension */
|
2016-09-05 22:07:52 +02:00
|
|
|
struct object_id oid;
|
2006-01-07 10:33:54 +01:00
|
|
|
char name[FLEX_ARRAY]; /* more */
|
2005-04-08 00:13:13 +02:00
|
|
|
};
|
|
|
|
|
2005-04-16 07:51:44 +02:00
|
|
|
#define CE_STAGEMASK (0x3000)
|
2008-08-17 08:02:08 +02:00
|
|
|
#define CE_EXTENDED (0x4000)
|
2006-02-09 06:15:24 +01:00
|
|
|
#define CE_VALID (0x8000)
|
2005-04-16 17:33:23 +02:00
|
|
|
#define CE_STAGESHIFT 12
|
2005-04-16 07:51:44 +02:00
|
|
|
|
2008-10-01 06:04:01 +02:00
|
|
|
/*
|
2014-06-13 14:19:25 +02:00
|
|
|
* Range 0xFFFF0FFF in ce_flags is divided into
|
2008-10-01 06:04:01 +02:00
|
|
|
* two parts: in-memory flags and on-disk ones.
|
|
|
|
* Flags in CE_EXTENDED_FLAGS will get saved on-disk
|
|
|
|
* if you want to save a new flag, add it in
|
|
|
|
* CE_EXTENDED_FLAGS
|
|
|
|
*
|
|
|
|
* In-memory only flags
|
|
|
|
*/
|
2010-11-27 07:22:16 +01:00
|
|
|
#define CE_UPDATE (1 << 16)
|
|
|
|
#define CE_REMOVE (1 << 17)
|
|
|
|
#define CE_UPTODATE (1 << 18)
|
|
|
|
#define CE_ADDED (1 << 19)
|
Fix name re-hashing semantics
We handled the case of removing and re-inserting cache entries badly,
which is something that merging commonly needs to do (removing the
different stages, and then re-inserting one of them as the merged
state).
We even had a rather ugly special case for this failure case, where
replace_index_entry() basically turned itself into a no-op if the new
and the old entries were the same, exactly because the hash routines
didn't handle it on their own.
So what this patch does is to not just have the UNHASHED bit, but a
HASHED bit too, and when you insert an entry into the name hash, that
involves:
- clear the UNHASHED bit, because now it's valid again for lookup
(which is really all that UNHASHED meant)
- if we're being lazy, we're done here (but we still want to clear the
UNHASHED bit regardless of lazy mode, since we can become unlazy
later, and so we need the UNHASHED bit to always be set correctly,
even if we never actually insert the entry into the hash list)
- if it was already hashed, we just leave it on the list
- otherwise mark it HASHED and insert it into the list
this all means that unhashing and rehashing a name all just works
automatically. Obviously, you cannot change the name of an entry (that
would be a serious bug), but nothing can validly do that anyway (you'd
have to allocate a new struct cache_entry anyway since the name length
could change), so that's not a new limitation.
The code actually gets simpler in many ways, although the lazy hashing
does mean that there are a few odd cases (ie something can be marked
unhashed even though it was never on the hash in the first place, and
isn't actually marked hashed!).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-02-23 05:37:40 +01:00
|
|
|
|
2010-11-27 07:22:16 +01:00
|
|
|
#define CE_HASHED (1 << 20)
|
2017-09-22 18:35:40 +02:00
|
|
|
#define CE_FSMONITOR_VALID (1 << 21)
|
2010-11-27 07:22:16 +01:00
|
|
|
#define CE_WT_REMOVE (1 << 22) /* remove in work directory */
|
|
|
|
#define CE_CONFLICTED (1 << 23)
|
2008-01-15 01:03:17 +01:00
|
|
|
|
2010-11-27 07:22:16 +01:00
|
|
|
#define CE_UNPACKED (1 << 24)
|
unpack-trees: move all skip-worktree checks back to unpack_trees()
Earlier, the will_have_skip_worktree() checks are done in various
places, which makes it hard to traverse the index tree-alike, required
by excluded_from_list(). This patch moves all the checks into two
loops in unpack_trees().
Entries in index in this operation can be classified into two
groups: ones already in index before unpack_trees() is called and ones
added to index after traverse_trees() is called.
In both groups, before checking file status on worktree, the future
skip-worktree bit must be checked, so that if an entry will be outside
worktree, worktree should not be checked.
For the first group, the future skip-worktree bit is precomputed and
stored as CE_NEW_SKIP_WORKTREE in the first loop before
traverse_trees() is called so that *way_merge() function does not need
to compute it again.
For the second group, because we don't know what entries will be in
this group until traverse_trees() finishes, operations that need
future skip-worktree check is delayed until CE_NEW_SKIP_WORKTREE is
computed in the second loop. CE_ADDED is used to mark entries in the
second group.
CE_ADDED and CE_NEW_SKIP_WORKTREE are temporary flags used in
unpack_trees(). CE_ADDED is only used by add_to_index(), which should
not be called while unpack_trees() is running.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-27 07:24:04 +01:00
|
|
|
#define CE_NEW_SKIP_WORKTREE (1 << 25)
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-07 23:59:54 +01:00
|
|
|
|
checkout: avoid unnecessary match_pathspec calls
In checkout_paths() we do this
- for all updated items, call match_pathspec
- for all items, call match_pathspec (inside unmerge_cache)
- for all items, call match_pathspec (for showing "path .. is unmerged)
- for updated items, call match_pathspec and update paths
That's a lot of duplicate match_pathspec(s) and the function is not
exactly cheap to be called so many times, especially on large indexes.
This patch makes it call match_pathspec once per updated index entry,
save the result in ce_flags and reuse the results in the following
loops.
The changes in 0a1283b (checkout $tree $path: do not clobber local
changes in $path not in $tree - 2011-09-30) limit the affected paths
to ones we read from $tree. We do not do anything to other modified
entries in this case, so the "for all items" above could be modified
to "for all updated items". But..
The command's behavior now is modified slightly: unmerged entries that
match $path, but not updated by $tree, are now NOT touched. Although
this should be considered a bug fix, not a regression. A new test is
added for this change.
And while at there, free ps_matched after use.
The following command is tested on webkit, 215k entries. The pattern
is chosen mainly to make match_pathspec sweat:
git checkout -- "*[a-zA-Z]*[a-zA-Z]*[a-zA-Z]*"
before after
real 0m3.493s 0m2.737s
user 0m2.239s 0m1.586s
sys 0m1.252s 0m1.151s
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-27 06:58:21 +01:00
|
|
|
/* used to temporarily mark paths matched by pathspecs */
|
|
|
|
#define CE_MATCHED (1 << 26)
|
|
|
|
|
2014-06-13 14:19:39 +02:00
|
|
|
#define CE_UPDATE_IN_BASE (1 << 27)
|
2014-06-13 14:19:43 +02:00
|
|
|
#define CE_STRIP_NAME (1 << 28)
|
2014-06-13 14:19:39 +02:00
|
|
|
|
2008-10-01 06:04:01 +02:00
|
|
|
/*
|
|
|
|
* Extended on-disk flags
|
|
|
|
*/
|
2010-11-27 07:22:16 +01:00
|
|
|
#define CE_INTENT_TO_ADD (1 << 29)
|
|
|
|
#define CE_SKIP_WORKTREE (1 << 30)
|
2008-10-01 06:04:01 +02:00
|
|
|
/* CE_EXTENDED2 is for future extension */
|
2015-12-29 07:35:46 +01:00
|
|
|
#define CE_EXTENDED2 (1U << 31)
|
2008-10-01 06:04:01 +02:00
|
|
|
|
2009-08-20 15:46:57 +02:00
|
|
|
#define CE_EXTENDED_FLAGS (CE_INTENT_TO_ADD | CE_SKIP_WORKTREE)
|
2008-10-01 06:04:01 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Safeguard to avoid saving wrong flags:
|
|
|
|
* - CE_EXTENDED2 won't get saved until its semantic is known
|
|
|
|
* - Bits in 0x0000FFFF have been saved in ce_flags already
|
|
|
|
* - Bits in 0x003F0000 are currently in-memory flags
|
|
|
|
*/
|
|
|
|
#if CE_EXTENDED_FLAGS & 0x803FFFFF
|
|
|
|
#error "CE_EXTENDED_FLAGS out of range"
|
|
|
|
#endif
|
|
|
|
|
2016-02-16 23:34:44 +01:00
|
|
|
/* Forward structure decls */
|
2013-07-14 10:35:25 +02:00
|
|
|
struct pathspec;
|
2016-02-16 23:34:44 +01:00
|
|
|
struct child_process;
|
2013-07-14 10:35:25 +02:00
|
|
|
|
2008-02-23 05:41:17 +01:00
|
|
|
/*
|
|
|
|
* Copy the sha1 and stat state of a cache entry from one to
|
|
|
|
* another. But we never change the name, or the hash state!
|
|
|
|
*/
|
2013-06-02 17:46:51 +02:00
|
|
|
static inline void copy_cache_entry(struct cache_entry *dst,
|
|
|
|
const struct cache_entry *src)
|
2008-02-23 05:41:17 +01:00
|
|
|
{
|
2013-11-14 20:22:27 +01:00
|
|
|
unsigned int state = dst->ce_flags & CE_HASHED;
|
2008-02-23 05:41:17 +01:00
|
|
|
|
|
|
|
/* Don't copy hash chain and name */
|
2013-11-14 20:21:58 +01:00
|
|
|
memcpy(&dst->ce_stat_data, &src->ce_stat_data,
|
|
|
|
offsetof(struct cache_entry, name) -
|
|
|
|
offsetof(struct cache_entry, ce_stat_data));
|
2008-02-23 05:41:17 +01:00
|
|
|
|
|
|
|
/* Restore the hash state */
|
2013-11-14 20:22:27 +01:00
|
|
|
dst->ce_flags = (dst->ce_flags & ~CE_HASHED) | state;
|
2008-02-23 05:41:17 +01:00
|
|
|
}
|
|
|
|
|
2012-07-11 11:22:37 +02:00
|
|
|
static inline unsigned create_ce_flags(unsigned stage)
|
2008-01-19 08:42:00 +01:00
|
|
|
{
|
2012-07-11 11:22:37 +02:00
|
|
|
return (stage << CE_STAGESHIFT);
|
2008-01-19 08:42:00 +01:00
|
|
|
}
|
|
|
|
|
2012-07-11 11:22:37 +02:00
|
|
|
#define ce_namelen(ce) ((ce)->ce_namelen)
|
2005-04-16 17:33:23 +02:00
|
|
|
#define ce_size(ce) cache_entry_size(ce_namelen(ce))
|
2008-01-15 01:03:17 +01:00
|
|
|
#define ce_stage(ce) ((CE_STAGEMASK & (ce)->ce_flags) >> CE_STAGESHIFT)
|
2008-01-19 08:45:24 +01:00
|
|
|
#define ce_uptodate(ce) ((ce)->ce_flags & CE_UPTODATE)
|
2009-08-20 15:46:57 +02:00
|
|
|
#define ce_skip_worktree(ce) ((ce)->ce_flags & CE_SKIP_WORKTREE)
|
2008-01-19 08:45:24 +01:00
|
|
|
#define ce_mark_uptodate(ce) ((ce)->ce_flags |= CE_UPTODATE)
|
2015-08-22 03:08:05 +02:00
|
|
|
#define ce_intent_to_add(ce) ((ce)->ce_flags & CE_INTENT_TO_ADD)
|
2005-04-16 17:33:23 +02:00
|
|
|
|
2005-04-17 07:26:31 +02:00
|
|
|
#define ce_permissions(mode) (((mode) & 0100) ? 0755 : 0644)
|
2005-05-05 14:38:25 +02:00
|
|
|
static inline unsigned int create_ce_mode(unsigned int mode)
|
|
|
|
{
|
|
|
|
if (S_ISLNK(mode))
|
2008-01-15 01:03:17 +01:00
|
|
|
return S_IFLNK;
|
2007-05-21 22:08:28 +02:00
|
|
|
if (S_ISDIR(mode) || S_ISGITLINK(mode))
|
2008-01-15 01:03:17 +01:00
|
|
|
return S_IFGITLINK;
|
|
|
|
return S_IFREG | ce_permissions(mode);
|
2005-05-05 14:38:25 +02:00
|
|
|
}
|
2013-06-02 17:46:51 +02:00
|
|
|
static inline unsigned int ce_mode_from_stat(const struct cache_entry *ce,
|
|
|
|
unsigned int mode)
|
2007-02-17 07:43:48 +01:00
|
|
|
{
|
2007-03-02 22:11:30 +01:00
|
|
|
extern int trust_executable_bit, has_symlinks;
|
|
|
|
if (!has_symlinks && S_ISREG(mode) &&
|
2008-01-15 01:03:17 +01:00
|
|
|
ce && S_ISLNK(ce->ce_mode))
|
2007-03-02 22:11:30 +01:00
|
|
|
return ce->ce_mode;
|
2007-02-17 07:43:48 +01:00
|
|
|
if (!trust_executable_bit && S_ISREG(mode)) {
|
2008-01-15 01:03:17 +01:00
|
|
|
if (ce && S_ISREG(ce->ce_mode))
|
2007-02-17 07:43:48 +01:00
|
|
|
return ce->ce_mode;
|
|
|
|
return create_ce_mode(0666);
|
|
|
|
}
|
|
|
|
return create_ce_mode(mode);
|
|
|
|
}
|
2008-01-31 10:17:48 +01:00
|
|
|
static inline int ce_to_dtype(const struct cache_entry *ce)
|
|
|
|
{
|
|
|
|
unsigned ce_mode = ntohl(ce->ce_mode);
|
|
|
|
if (S_ISREG(ce_mode))
|
|
|
|
return DT_REG;
|
|
|
|
else if (S_ISDIR(ce_mode) || S_ISGITLINK(ce_mode))
|
|
|
|
return DT_DIR;
|
|
|
|
else if (S_ISLNK(ce_mode))
|
|
|
|
return DT_LNK;
|
|
|
|
else
|
|
|
|
return DT_UNKNOWN;
|
|
|
|
}
|
2010-10-04 12:53:11 +02:00
|
|
|
static inline unsigned int canon_mode(unsigned int mode)
|
|
|
|
{
|
|
|
|
if (S_ISREG(mode))
|
|
|
|
return S_IFREG | ce_permissions(mode);
|
|
|
|
if (S_ISLNK(mode))
|
|
|
|
return S_IFLNK;
|
|
|
|
if (S_ISDIR(mode))
|
|
|
|
return S_IFDIR;
|
|
|
|
return S_IFGITLINK;
|
|
|
|
}
|
2005-04-17 07:26:31 +02:00
|
|
|
|
2011-10-25 20:00:04 +02:00
|
|
|
#define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1)
|
2005-04-16 06:45:38 +02:00
|
|
|
|
2014-06-13 14:19:27 +02:00
|
|
|
#define SOMETHING_CHANGED (1 << 0) /* unclassified changes go here */
|
|
|
|
#define CE_ENTRY_CHANGED (1 << 1)
|
|
|
|
#define CE_ENTRY_REMOVED (1 << 2)
|
|
|
|
#define CE_ENTRY_ADDED (1 << 3)
|
2014-06-13 14:19:29 +02:00
|
|
|
#define RESOLVE_UNDO_CHANGED (1 << 4)
|
2014-06-13 14:19:31 +02:00
|
|
|
#define CACHE_TREE_CHANGED (1 << 5)
|
2014-06-13 14:19:44 +02:00
|
|
|
#define SPLIT_INDEX_ORDERED (1 << 6)
|
2015-03-08 11:12:39 +01:00
|
|
|
#define UNTRACKED_CHANGED (1 << 7)
|
2017-09-22 18:35:40 +02:00
|
|
|
#define FSMONITOR_CHANGED (1 << 8)
|
2014-06-13 14:19:27 +02:00
|
|
|
|
2014-06-13 14:19:36 +02:00
|
|
|
struct split_index;
|
2015-03-08 11:12:33 +01:00
|
|
|
struct untracked_cache;
|
|
|
|
|
2007-04-02 03:14:06 +02:00
|
|
|
struct index_state {
|
|
|
|
struct cache_entry **cache;
|
2012-04-04 18:12:43 +02:00
|
|
|
unsigned int version;
|
2007-04-02 03:14:06 +02:00
|
|
|
unsigned int cache_nr, cache_alloc, cache_changed;
|
2009-12-25 09:30:51 +01:00
|
|
|
struct string_list *resolve_undo;
|
2007-04-02 03:14:06 +02:00
|
|
|
struct cache_tree *cache_tree;
|
2014-06-13 14:19:36 +02:00
|
|
|
struct split_index *split_index;
|
make USE_NSEC work as expected
Since the filesystem ext4 is now defined as stable in Linux v2.6.28,
and ext4 supports nanonsecond resolution timestamps natively, it is
time to make USE_NSEC work as expected.
This will make racy git situations less likely to happen. For 'git
checkout' this means it will be less likely that we have to open, read
the contents of the file into RAM, and check if file is really
modified or not. The result sould be a litle less used CPU time, less
pagefaults and a litle faster program, at least for 'git checkout'.
Since the number of possible racy git situations would increase when
disks gets faster, this patch would be more and more helpfull as times
go by. For a fast Solid State Disk, this patch should be helpfull.
Note that, when file operations starts to take less than 1 nanosecond,
one would again start to get more racy git situations.
For more info on racy git, see Documentation/technical/racy-git.txt
For more info on ext4, see http://kernelnewbies.org/Ext4
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-02-19 21:08:29 +01:00
|
|
|
struct cache_time timestamp;
|
unpack_trees(): protect the handcrafted in-core index from read_cache()
unpack_trees() rebuilds the in-core index from scratch by allocating a new
structure and finishing it off by copying the built one to the final
index.
The resulting in-core index is Ok for most use, but read_cache() does not
recognize it as such. The function is meant to be no-op if you already
have loaded the index, until you call discard_cache().
This change the way read_cache() detects an already initialized in-core
index, by introducing an extra bit, and marks the handcrafted in-core
index as initialized, to avoid this problem.
A better fix in the longer term would be to change the read_cache() API so
that it will always discard and re-read from the on-disk index to avoid
confusion. But there are higher level API that have relied on the current
semantics, and they and their users all need to get converted, which is
outside the scope of 'maint' track.
An example of such a higher level API is write_cache_as_tree(), which is
used by git-write-tree as well as later Porcelains like git-merge, revert
and cherry-pick. In the longer term, we should remove read_cache() from
there and add one to cmd_write_tree(); other callers expect that the
in-core index they prepared is what gets written as a tree so no other
change is necessary for this particular codepath.
The original version of this patch marked the index by pointing an
otherwise wasted malloc'ed memory with o->result.alloc, but this version
uses Linus's idea to use a new "initialized" bit, which is conceptually
much cleaner.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-08-23 21:57:30 +02:00
|
|
|
unsigned name_hash_initialized : 1,
|
2018-01-07 23:30:14 +01:00
|
|
|
initialized : 1,
|
|
|
|
drop_cache_tree : 1;
|
2013-11-14 20:21:58 +01:00
|
|
|
struct hashmap name_hash;
|
2013-11-14 20:20:58 +01:00
|
|
|
struct hashmap dir_hash;
|
2014-04-10 20:31:21 +02:00
|
|
|
unsigned char sha1[20];
|
2015-03-08 11:12:33 +01:00
|
|
|
struct untracked_cache *untracked;
|
2017-09-22 18:35:40 +02:00
|
|
|
uint64_t fsmonitor_last_update;
|
2017-10-28 01:26:37 +02:00
|
|
|
struct ewah_bitmap *fsmonitor_dirty;
|
2007-04-02 03:14:06 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
extern struct index_state the_index;
|
|
|
|
|
2008-03-21 21:16:24 +01:00
|
|
|
/* Name hashing */
|
2017-03-23 14:47:04 +01:00
|
|
|
extern int test_lazy_init_name_hash(struct index_state *istate, int try_threaded);
|
2008-03-21 21:16:24 +01:00
|
|
|
extern void add_name_hash(struct index_state *istate, struct cache_entry *ce);
|
2013-02-28 00:57:48 +01:00
|
|
|
extern void remove_name_hash(struct index_state *istate, struct cache_entry *ce);
|
|
|
|
extern void free_name_hash(struct index_state *istate);
|
2008-03-21 21:16:24 +01:00
|
|
|
|
|
|
|
|
2007-04-02 08:26:07 +02:00
|
|
|
#ifndef NO_THE_INDEX_COMPATIBILITY_MACROS
|
2007-04-02 03:14:06 +02:00
|
|
|
#define active_cache (the_index.cache)
|
|
|
|
#define active_nr (the_index.cache_nr)
|
|
|
|
#define active_alloc (the_index.cache_alloc)
|
|
|
|
#define active_cache_changed (the_index.cache_changed)
|
|
|
|
#define active_cache_tree (the_index.cache_tree)
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2007-04-02 08:26:07 +02:00
|
|
|
#define read_cache() read_index(&the_index)
|
read-cache: fix reading the shared index for other repos
read_index_from() takes a path argument for the location of the index
file. For reading the shared index in split index mode however it just
ignores that path argument, and reads it from the gitdir of the current
repository.
This works as long as an index in the_repository is read. Once that
changes, such as when we read the index of a submodule, or of a
different working tree than the current one, the gitdir of
the_repository will no longer contain the appropriate shared index,
and git will fail to read it.
For example t3007-ls-files-recurse-submodules.sh was broken with
GIT_TEST_SPLIT_INDEX set in 188dce131f ("ls-files: use repository
object", 2017-06-22), and t7814-grep-recurse-submodules.sh was also
broken in a similar manner, probably by introducing struct repository
there, although I didn't track down the exact commit for that.
be489d02d2 ("revision.c: --indexed-objects add objects from all
worktrees", 2017-08-23) breaks with split index mode in a similar
manner, not erroring out when it can't read the index, but instead
carrying on with pruning, without taking the index of the worktree into
account.
Fix this by passing an additional gitdir parameter to read_index_from,
to indicate where it should look for and read the shared index from.
read_cache_from() defaults to using the gitdir of the_repository. As it
is mostly a convenience macro, having to pass get_git_dir() for every
call seems overkill, and if necessary users can have more control by
using read_index_from().
Helped-by: Brandon Williams <bmwill@google.com>
Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-01-07 23:30:13 +01:00
|
|
|
#define read_cache_from(path) read_index_from(&the_index, (path), (get_git_dir()))
|
2008-11-14 01:36:30 +01:00
|
|
|
#define read_cache_preload(pathspec) read_index_preload(&the_index, (pathspec))
|
checkout: Fix "initial checkout" detection
Earlier commit 5521883 (checkout: do not lose staged removal, 2008-09-07)
tightened the rule to prevent switching branches from losing local
changes, so that staged removal of paths can be protected, while
attempting to keep a loophole to still allow a special case of switching
out of an un-checked-out state.
However, the loophole was made a bit too tight, and did not allow
switching from one branch (in an un-checked-out state) to check out
another branch.
The change to builtin-checkout.c in this commit loosens it to allow this,
by not insisting the original commit and the new commit to be the same.
It also introduces a new function, is_index_unborn (and an associated
macro, is_cache_unborn), to check if the repository is truly in an
un-checked-out state more reliably, by making sure that $GIT_INDEX_FILE
did not exist when populating the in-core index structure. A few places
the earlier commit 5521883 added the check for the initial checkout
condition are updated to use this function.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-11-12 20:52:35 +01:00
|
|
|
#define is_cache_unborn() is_index_unborn(&the_index)
|
2008-06-27 18:21:58 +02:00
|
|
|
#define read_cache_unmerged() read_index_unmerged(&the_index)
|
2007-04-02 08:26:07 +02:00
|
|
|
#define discard_cache() discard_index(&the_index)
|
2008-02-07 17:40:13 +01:00
|
|
|
#define unmerged_cache() unmerged_index(&the_index)
|
2007-04-02 08:26:07 +02:00
|
|
|
#define cache_name_pos(name, namelen) index_name_pos(&the_index,(name),(namelen))
|
|
|
|
#define add_cache_entry(ce, option) add_index_entry(&the_index, (ce), (option))
|
2008-07-21 02:25:56 +02:00
|
|
|
#define rename_cache_entry_at(pos, new_name) rename_index_entry_at(&the_index, (pos), (new_name))
|
2007-04-02 08:26:07 +02:00
|
|
|
#define remove_cache_entry_at(pos) remove_index_entry_at(&the_index, (pos))
|
|
|
|
#define remove_file_from_cache(path) remove_file_from_index(&the_index, (path))
|
2016-09-14 23:07:47 +02:00
|
|
|
#define add_to_cache(path, st, flags) add_to_index(&the_index, (path), (st), (flags))
|
|
|
|
#define add_file_to_cache(path, flags) add_file_to_index(&the_index, (path), (flags))
|
2016-09-14 23:07:46 +02:00
|
|
|
#define chmod_cache_entry(ce, flip) chmod_index_entry(&the_index, (ce), (flip))
|
2009-08-21 10:57:59 +02:00
|
|
|
#define refresh_cache(flags) refresh_index(&the_index, (flags), NULL, NULL, NULL)
|
2007-11-10 09:15:03 +01:00
|
|
|
#define ce_match_stat(ce, st, options) ie_match_stat(&the_index, (ce), (st), (options))
|
|
|
|
#define ce_modified(ce, st, options) ie_modified(&the_index, (ce), (st), (options))
|
2013-09-17 09:06:14 +02:00
|
|
|
#define cache_dir_exists(name, namelen) index_dir_exists(&the_index, (name), (namelen))
|
|
|
|
#define cache_file_exists(name, namelen, igncase) index_file_exists(&the_index, (name), (namelen), (igncase))
|
2008-10-16 17:07:26 +02:00
|
|
|
#define cache_name_is_other(name, namelen) index_name_is_other(&the_index, (name), (namelen))
|
2009-12-25 09:30:51 +01:00
|
|
|
#define resolve_undo_clear() resolve_undo_clear_index(&the_index)
|
2009-12-25 22:40:02 +01:00
|
|
|
#define unmerge_cache_entry_at(at) unmerge_index_entry_at(&the_index, at)
|
2009-12-25 20:57:11 +01:00
|
|
|
#define unmerge_cache(pathspec) unmerge_index(&the_index, pathspec)
|
2013-04-13 15:28:31 +02:00
|
|
|
#define read_blob_data_from_cache(path, sz) read_blob_data_from_index(&the_index, (path), (sz))
|
2007-04-02 08:26:07 +02:00
|
|
|
#endif
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2007-02-28 20:45:56 +01:00
|
|
|
enum object_type {
|
|
|
|
OBJ_BAD = -1,
|
|
|
|
OBJ_NONE = 0,
|
|
|
|
OBJ_COMMIT = 1,
|
|
|
|
OBJ_TREE = 2,
|
|
|
|
OBJ_BLOB = 3,
|
|
|
|
OBJ_TAG = 4,
|
|
|
|
/* 5 for future expansion */
|
|
|
|
OBJ_OFS_DELTA = 6,
|
|
|
|
OBJ_REF_DELTA = 7,
|
2008-02-25 22:46:04 +01:00
|
|
|
OBJ_ANY,
|
2010-05-14 11:31:35 +02:00
|
|
|
OBJ_MAX
|
2007-02-28 20:45:56 +01:00
|
|
|
};
|
|
|
|
|
2007-12-01 07:22:38 +01:00
|
|
|
static inline enum object_type object_type(unsigned int mode)
|
|
|
|
{
|
|
|
|
return S_ISDIR(mode) ? OBJ_TREE :
|
|
|
|
S_ISGITLINK(mode) ? OBJ_COMMIT :
|
|
|
|
OBJ_BLOB;
|
|
|
|
}
|
|
|
|
|
2013-03-08 10:29:08 +01:00
|
|
|
/* Double-check local_repo_env below if you add to this list. */
|
2005-05-10 07:57:58 +02:00
|
|
|
#define GIT_DIR_ENVIRONMENT "GIT_DIR"
|
$GIT_COMMON_DIR: a new environment variable
This variable is intended to support multiple working directories
attached to a repository. Such a repository may have a main working
directory, created by either "git init" or "git clone" and one or more
linked working directories. These working directories and the main
repository share the same repository directory.
In linked working directories, $GIT_COMMON_DIR must be defined to point
to the real repository directory and $GIT_DIR points to an unused
subdirectory inside $GIT_COMMON_DIR. File locations inside the
repository are reorganized from the linked worktree view point:
- worktree-specific such as HEAD, logs/HEAD, index, other top-level
refs and unrecognized files are from $GIT_DIR.
- the rest like objects, refs, info, hooks, packed-refs, shallow...
are from $GIT_COMMON_DIR (except info/sparse-checkout, but that's
a separate patch)
Scripts are supposed to retrieve paths in $GIT_DIR with "git rev-parse
--git-path", which will take care of "$GIT_DIR vs $GIT_COMMON_DIR"
business.
The redirection is done by git_path(), git_pathdup() and
strbuf_git_path(). The selected list of paths goes to $GIT_COMMON_DIR,
not the other way around in case a developer adds a new
worktree-specific file and it's accidentally promoted to be shared
across repositories (this includes unknown files added by third party
commands)
The list of known files that belong to $GIT_DIR are:
ADD_EDIT.patch BISECT_ANCESTORS_OK BISECT_EXPECTED_REV BISECT_LOG
BISECT_NAMES CHERRY_PICK_HEAD COMMIT_MSG FETCH_HEAD HEAD MERGE_HEAD
MERGE_MODE MERGE_RR NOTES_EDITMSG NOTES_MERGE_WORKTREE ORIG_HEAD
REVERT_HEAD SQUASH_MSG TAG_EDITMSG fast_import_crash_* logs/HEAD
next-index-* rebase-apply rebase-merge rsync-refs-* sequencer/*
shallow_*
Path mapping is NOT done for git_path_submodule(). Multi-checkouts are
not supported as submodules.
Helped-by: Jens Lehmann <Jens.Lehmann@web.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-11-30 09:24:36 +01:00
|
|
|
#define GIT_COMMON_DIR_ENVIRONMENT "GIT_COMMON_DIR"
|
ref namespaces: infrastructure
Add support for dividing the refs of a single repository into multiple
namespaces, each of which can have its own branches, tags, and HEAD.
Git can expose each namespace as an independent repository to pull from
and push to, while sharing the object store, and exposing all the refs
to operations such as git-gc.
Storing multiple repositories as namespaces of a single repository
avoids storing duplicate copies of the same objects, such as when
storing multiple branches of the same source. The alternates mechanism
provides similar support for avoiding duplicates, but alternates do not
prevent duplication between new objects added to the repositories
without ongoing maintenance, while namespaces do.
To specify a namespace, set the GIT_NAMESPACE environment variable to
the namespace. For each ref namespace, git stores the corresponding
refs in a directory under refs/namespaces/. For example,
GIT_NAMESPACE=foo will store refs under refs/namespaces/foo/. You can
also specify namespaces via the --namespace option to git.
Note that namespaces which include a / will expand to a hierarchy of
namespaces; for example, GIT_NAMESPACE=foo/bar will store refs under
refs/namespaces/foo/refs/namespaces/bar/. This makes paths in
GIT_NAMESPACE behave hierarchically, so that cloning with
GIT_NAMESPACE=foo/bar produces the same result as cloning with
GIT_NAMESPACE=foo and cloning from that repo with GIT_NAMESPACE=bar. It
also avoids ambiguity with strange namespace paths such as
foo/refs/heads/, which could otherwise generate directory/file conflicts
within the refs directory.
Add the infrastructure for ref namespaces: handle the GIT_NAMESPACE
environment variable and --namespace option, and support iterating over
refs in a namespace.
Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Jamey Sharp <jamey@minilop.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-07-05 19:54:44 +02:00
|
|
|
#define GIT_NAMESPACE_ENVIRONMENT "GIT_NAMESPACE"
|
2007-06-06 09:10:42 +02:00
|
|
|
#define GIT_WORK_TREE_ENVIRONMENT "GIT_WORK_TREE"
|
2013-03-08 10:30:25 +01:00
|
|
|
#define GIT_PREFIX_ENVIRONMENT "GIT_PREFIX"
|
2016-10-07 20:18:48 +02:00
|
|
|
#define GIT_SUPER_PREFIX_ENVIRONMENT "GIT_INTERNAL_SUPER_PREFIX"
|
2005-05-10 07:57:58 +02:00
|
|
|
#define DEFAULT_GIT_DIR_ENVIRONMENT ".git"
|
2005-05-10 02:57:56 +02:00
|
|
|
#define DB_ENVIRONMENT "GIT_OBJECT_DIRECTORY"
|
2005-04-21 19:55:18 +02:00
|
|
|
#define INDEX_ENVIRONMENT "GIT_INDEX_FILE"
|
2005-07-30 09:58:28 +02:00
|
|
|
#define GRAFT_ENVIRONMENT "GIT_GRAFT_FILE"
|
2013-12-05 14:02:45 +01:00
|
|
|
#define GIT_SHALLOW_FILE_ENVIRONMENT "GIT_SHALLOW_FILE"
|
2006-12-19 10:28:15 +01:00
|
|
|
#define TEMPLATE_DIR_ENVIRONMENT "GIT_TEMPLATE_DIR"
|
|
|
|
#define CONFIG_ENVIRONMENT "GIT_CONFIG"
|
2010-08-23 21:16:00 +02:00
|
|
|
#define CONFIG_DATA_ENVIRONMENT "GIT_CONFIG_PARAMETERS"
|
2006-12-19 10:28:15 +01:00
|
|
|
#define EXEC_PATH_ENVIRONMENT "GIT_EXEC_PATH"
|
2008-05-20 08:49:26 +02:00
|
|
|
#define CEILING_DIRECTORIES_ENVIRONMENT "GIT_CEILING_DIRECTORIES"
|
2009-11-18 07:50:58 +01:00
|
|
|
#define NO_REPLACE_OBJECTS_ENVIRONMENT "GIT_NO_REPLACE_OBJECTS"
|
2015-06-11 23:34:59 +02:00
|
|
|
#define GIT_REPLACE_REF_BASE_ENVIRONMENT "GIT_REPLACE_REF_BASE"
|
Add basic infrastructure to assign attributes to paths
This adds the basic infrastructure to assign attributes to
paths, in a way similar to what the exclusion mechanism does
based on $GIT_DIR/info/exclude and .gitignore files.
An attribute is just a simple string that does not contain any
whitespace. They can be specified in $GIT_DIR/info/attributes
file, and .gitattributes file in each directory.
Each line in these files defines a pattern matching rule.
Similar to the exclusion mechanism, a later match overrides an
earlier match in the same file, and entries from .gitattributes
file in the same directory takes precedence over the ones from
parent directories. Lines in $GIT_DIR/info/attributes file are
used as the lowest precedence default rules.
A line is either a comment (an empty line, or a line that begins
with a '#'), or a rule, which is a whitespace separated list of
tokens. The first token on the line is a shell glob pattern.
The rest are names of attributes, each of which can optionally
be prefixed with '!'. Such a line means "if a path matches this
glob, this attribute is set (or unset -- if the attribute name
is prefixed with '!'). For glob matching, the same "if the
pattern does not have a slash in it, the basename of the path is
matched with fnmatch(3) against the pattern, otherwise, the path
is matched with the pattern with FNM_PATHNAME" rule as the
exclusion mechanism is used.
This does not define what an attribute means. Tying an
attribute to various effects it has on git operation for paths
that have it will be specified separately.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-04-12 10:07:32 +02:00
|
|
|
#define GITATTRIBUTES_FILE ".gitattributes"
|
|
|
|
#define INFOATTRIBUTES_FILE "info/attributes"
|
attribute macro support
This adds "attribute macros" (for lack of better name). So far,
we have low-level attributes such as crlf and diff, which are
defined in operational terms --- setting or unsetting them on a
particular path directly affects what is done to the path. For
example, in order to decline diffs or crlf conversions on a
binary blob, no diffs on PostScript files, and treat all other
files normally, you would have something like these:
* diff crlf
*.ps !diff
proprietary.o !diff !crlf
That is fine as the operation goes, but gets unwieldy rather
rapidly, when we start adding more low-level attributes that are
defined in operational terms. A near-term example of such an
attribute would be 'merge-3way' which would control if git
should attempt the usual 3-way file-level merge internally, or
leave merging to a specialized external program of user's
choice. When it is added, we do _not_ want to force the users
to update the above to:
* diff crlf merge-3way
*.ps !diff
proprietary.o !diff !crlf !merge-3way
The way this patch solves this issue is to realize that the
attributes the user is assigning to paths are not defined in
terms of operations but in terms of what they are.
All of the three low-level attributes usually make sense for
most of the files that sane SCM users have git operate on (these
files are typically called "text'). Only a few cases, such as
binary blob, need exception to decline the "usual treatment
given to text files" -- and people mark them as "binary".
So this allows the $GIT_DIR/info/alternates and .gitattributes
at the toplevel of the project to also specify attributes that
assigns other attributes. The syntax is '[attr]' followed by an
attribute name followed by a list of attribute names:
[attr] binary !diff !crlf !merge-3way
When "binary" attribute is set to a path, if the path has not
got diff/crlf/merge-3way attribute set or unset by other rules,
this rule unsets the three low-level attributes.
It is expected that the user level .gitattributes will be
expressed mostly in terms of attributes based on what the files
are, and the above sample would become like this:
(built-in attribute configuration)
[attr] binary !diff !crlf !merge-3way
* diff crlf merge-3way
(project specific .gitattributes)
proprietary.o binary
(user preference $GIT_DIR/info/attributes)
*.ps !diff
There are a few caveats.
* As described above, you can define these macros only in
$GIT_DIR/info/attributes and toplevel .gitattributes.
* There is no attempt to detect circular definition of macro
attributes, and definitions are evaluated from bottom to top
as usual to fill in other attributes that have not yet got
values. The following would work as expected:
[attr] text diff crlf
[attr] ps text !diff
*.ps ps
while this would most likely not (I haven't tried):
[attr] ps text !diff
[attr] text diff crlf
*.ps ps
* When a macro says "[attr] A B !C", saying that a path does
not have attribute A does not let you tell anything about
attributes B or C. That is, given this:
[attr] text diff crlf
[attr] ps text !diff
*.txt !ps
path hello.txt, which would match "*.txt" pattern, would have
"ps" attribute set to zero, but that does not make text
attribute of hello.txt set to false (nor diff attribute set to
true).
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-04-14 17:54:37 +02:00
|
|
|
#define ATTRIBUTE_MACRO_PREFIX "[attr]"
|
2017-08-02 21:49:16 +02:00
|
|
|
#define GITMODULES_FILE ".gitmodules"
|
2009-10-09 12:21:57 +02:00
|
|
|
#define GIT_NOTES_REF_ENVIRONMENT "GIT_NOTES_REF"
|
|
|
|
#define GIT_NOTES_DEFAULT_REF "refs/notes/commits"
|
2010-03-12 18:04:26 +01:00
|
|
|
#define GIT_NOTES_DISPLAY_REF_ENVIRONMENT "GIT_NOTES_DISPLAY_REF"
|
2010-03-12 18:04:32 +01:00
|
|
|
#define GIT_NOTES_REWRITE_REF_ENVIRONMENT "GIT_NOTES_REWRITE_REF"
|
|
|
|
#define GIT_NOTES_REWRITE_MODE_ENVIRONMENT "GIT_NOTES_REWRITE_MODE"
|
add global --literal-pathspecs option
Git takes pathspec arguments in many places to limit the
scope of an operation. These pathspecs are treated not as
literal paths, but as glob patterns that can be fed to
fnmatch. When a user is giving a specific pattern, this is a
nice feature.
However, when programatically providing pathspecs, it can be
a nuisance. For example, to find the latest revision which
modified "$foo", one can use "git rev-list -- $foo". But if
"$foo" contains glob characters (e.g., "f*"), it will
erroneously match more entries than desired. The caller
needs to quote the characters in $foo, and even then, the
results may not be exactly the same as with a literal
pathspec. For instance, the depth checks in
match_pathspec_depth do not kick in if we match via fnmatch.
This patch introduces a global command-line option (i.e.,
one for "git" itself, not for specific commands) to turn
this behavior off. It also has a matching environment
variable, which can make it easier if you are a script or
porcelain interface that is going to issue many such
commands.
This option cannot turn off globbing for particular
pathspecs. That could eventually be done with a ":(noglob)"
magic pathspec prefix. However, that level of granularity is
more cumbersome to use for many cases, and doing ":(noglob)"
right would mean converting the whole codebase to use
"struct pathspec", as the usual "const char **pathspec"
cannot represent extra per-item flags.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-12-19 23:37:30 +01:00
|
|
|
#define GIT_LITERAL_PATHSPECS_ENVIRONMENT "GIT_LITERAL_PATHSPECS"
|
2013-07-14 10:36:08 +02:00
|
|
|
#define GIT_GLOB_PATHSPECS_ENVIRONMENT "GIT_GLOB_PATHSPECS"
|
|
|
|
#define GIT_NOGLOB_PATHSPECS_ENVIRONMENT "GIT_NOGLOB_PATHSPECS"
|
2013-07-14 10:36:09 +02:00
|
|
|
#define GIT_ICASE_PATHSPECS_ENVIRONMENT "GIT_ICASE_PATHSPECS"
|
2016-10-03 22:49:18 +02:00
|
|
|
#define GIT_QUARANTINE_ENVIRONMENT "GIT_QUARANTINE_PATH"
|
git: add --no-optional-locks option
Some tools like IDEs or fancy editors may periodically run
commands like "git status" in the background to keep track
of the state of the repository. Some of these commands may
refresh the index and write out the result in an
opportunistic way: if they can get the index lock, then they
update the on-disk index with any updates they find. And if
not, then their in-core refresh is lost and just has to be
recomputed by the next caller.
But taking the index lock may conflict with other operations
in the repository. Especially ones that the user is doing
themselves, which _aren't_ opportunistic. In other words,
"git status" knows how to back off when somebody else is
holding the lock, but other commands don't know that status
would be happy to drop the lock if somebody else wanted it.
There are a couple possible solutions:
1. Have some kind of "pseudo-lock" that allows other
commands to tell status that they want the lock.
This is likely to be complicated and error-prone to
implement (and maybe even impossible with just
dotlocks to work from, as it requires some
inter-process communication).
2. Avoid background runs of commands like "git status"
that want to do opportunistic updates, preferring
instead plumbing like diff-files, etc.
This is awkward for a couple of reasons. One is that
"status --porcelain" reports a lot more about the
repository state than is available from individual
plumbing commands. And two is that we actually _do_
want to see the refreshed index. We just don't want to
take a lock or write out the result. Whereas commands
like diff-files expect us to refresh the index
separately and write it to disk so that they can depend
on the result. But that write is exactly what we're
trying to avoid.
3. Ask "status" not to lock or write the index.
This is easy to implement. The big downside is that any
work done in refreshing the index for such a call is
lost when the process exits. So a background process
may end up re-hashing a changed file multiple times
until the user runs a command that does an index
refresh themselves.
This patch implements the option 3. The idea (and the test)
is largely stolen from a Git for Windows patch by Johannes
Schindelin, 67e5ce7f63 (status: offer *not* to lock the
index and update it, 2016-08-12). The twist here is that
instead of making this an option to "git status", it becomes
a "git" option and matching environment variable.
The reason there is two-fold:
1. An environment variable is carried through to
sub-processes. And whether an invocation is a
background process or not should apply to the whole
process tree. So you could do "git --no-optional-locks
foo", and if "foo" is a script or alias that calls
"status", you'll still get the effect.
2. There may be other programs that want the same
treatment.
I've punted here on finding more callers to convert,
since "status" is the obvious one to call as a repeated
background job. But "git diff"'s opportunistic refresh
of the index may be a good candidate.
The test is taken from 67e5ce7f63, and it's worth repeating
Johannes's explanation:
Note that the regression test added in this commit does
not *really* verify that no index.lock file was written;
that test is not possible in a portable way. Instead, we
verify that .git/index is rewritten *only* when `git
status` is run without `--no-optional-locks`.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-27 08:54:30 +02:00
|
|
|
#define GIT_OPTIONAL_LOCKS_ENVIRONMENT "GIT_OPTIONAL_LOCKS"
|
2005-04-21 19:55:18 +02:00
|
|
|
|
2017-10-16 19:55:24 +02:00
|
|
|
/*
|
|
|
|
* Environment variable used in handshaking the wire protocol.
|
|
|
|
* Contains a colon ':' separated list of keys with optional values
|
|
|
|
* 'key[=value]'. Presence of unknown keys and values must be
|
|
|
|
* ignored.
|
|
|
|
*/
|
|
|
|
#define GIT_PROTOCOL_ENVIRONMENT "GIT_PROTOCOL"
|
2017-10-16 19:55:29 +02:00
|
|
|
/* HTTP header used to handshake the wire protocol */
|
|
|
|
#define GIT_PROTOCOL_HEADER "Git-Protocol"
|
2017-10-16 19:55:24 +02:00
|
|
|
|
2010-02-25 00:34:14 +01:00
|
|
|
/*
|
setup: suppress implicit "." work-tree for bare repos
If an explicit GIT_DIR is given without a working tree, we
implicitly assume that the current working directory should
be used as the working tree. E.g.,:
GIT_DIR=/some/repo.git git status
would compare against the cwd.
Unfortunately, we fool this rule for sub-invocations of git
by setting GIT_DIR internally ourselves. For example:
git init foo
cd foo/.git
git status ;# fails, as we expect
git config alias.st status
git status ;# does not fail, but should
What happens is that we run setup_git_directory when doing
alias lookup (since we need to see the config), set GIT_DIR
as a result, and then leave GIT_WORK_TREE blank (because we
do not have one). Then when we actually run the status
command, we do setup_git_directory again, which sees our
explicit GIT_DIR and uses the cwd as an implicit worktree.
It's tempting to argue that we should be suppressing that
second invocation of setup_git_directory, as it could use
the values we already found in memory. However, the problem
still exists for sub-processes (e.g., if "git status" were
an external command).
You can see another example with the "--bare" option, which
sets GIT_DIR explicitly. For example:
git init foo
cd foo/.git
git status ;# fails
git --bare status ;# does NOT fail
We need some way of telling sub-processes "even though
GIT_DIR is set, do not use cwd as an implicit working tree".
We could do it by putting a special token into
GIT_WORK_TREE, but the obvious choice (an empty string) has
some portability problems.
Instead, we add a new boolean variable, GIT_IMPLICIT_WORK_TREE,
which suppresses the use of cwd as a working tree when
GIT_DIR is set. We trigger the new variable when we know we
are in a bare setting.
The variable is left intentionally undocumented, as this is
an internal detail (for now, anyway). If somebody comes up
with a good alternate use for it, and once we are confident
we have shaken any bugs out of it, we can consider promoting
it further.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-08 10:32:22 +01:00
|
|
|
* This environment variable is expected to contain a boolean indicating
|
|
|
|
* whether we should or should not treat:
|
|
|
|
*
|
|
|
|
* GIT_DIR=foo.git git ...
|
|
|
|
*
|
|
|
|
* as if GIT_WORK_TREE=. was given. It's not expected that users will make use
|
|
|
|
* of this, but we use it internally to communicate to sub-processes that we
|
|
|
|
* are in a bare repo. If not set, defaults to true.
|
|
|
|
*/
|
|
|
|
#define GIT_IMPLICIT_WORK_TREE_ENVIRONMENT "GIT_IMPLICIT_WORK_TREE"
|
|
|
|
|
2010-02-25 00:34:14 +01:00
|
|
|
/*
|
2013-03-08 10:29:08 +01:00
|
|
|
* Repository-local GIT_* environment variables; these will be cleared
|
|
|
|
* when git spawns a sub-process that runs inside another repository.
|
|
|
|
* The array is NULL-terminated, which makes it easy to pass in the "env"
|
|
|
|
* parameter of a run-command invocation, or to do a simple walk.
|
2010-02-25 00:34:14 +01:00
|
|
|
*/
|
2013-03-08 10:29:08 +01:00
|
|
|
extern const char * const local_repo_env[];
|
2010-02-25 00:34:14 +01:00
|
|
|
|
2017-06-20 21:19:32 +02:00
|
|
|
extern void setup_git_env(void);
|
|
|
|
|
config: only read .git/config from configured repos
When git_config() runs, it looks in the system, user-wide,
and repo-level config files. It gets the latter by calling
git_pathdup(), which in turn calls get_git_dir(). If we
haven't set up the git repository yet, this may simply
return ".git", and we will look at ".git/config". This
seems like it would be helpful (presumably we haven't set up
the repository yet, so it tries to find it), but it turns
out to be a bad idea for a few reasons:
- it's not sufficient, and therefore hides bugs in a
confusing way. Config will be respected if commands are
run from the top-level of the working tree, but not from
a subdirectory.
- it's not always true that we haven't set up the
repository _yet_; we may not want to do it at all. For
instance, if you run "git init /some/path" from inside
another repository, it should not load config from the
existing repository.
- there might be a path ".git/config", but it is not the
actual repository we would find via setup_git_directory().
This may happen, e.g., if you are storing a git
repository inside another git repository, but have
munged one of the files in such a way that the
inner repository is not valid (e.g., by removing HEAD).
We have at least two bugs of the second type in git-init,
introduced by ae5f677 (lazily load core.sharedrepository,
2016-03-11). It causes init to use git_configset(), which
loads all of the config, including values from the current
repo (if any). This shows up in two ways:
1. If we happen to be in an existing repository directory,
we'll read and respect core.sharedrepository from it,
even though it should have no bearing on the new
repository. A new test in t1301 covers this.
2. Similarly, if we're in an existing repo that sets
core.logallrefupdates, that will cause init to fail to
set it in a newly created repository (because it thinks
that the user's templates already did so). A new test
in t0001 covers this.
We also need to adjust an existing test in t1302, which
gives another example of why this patch is an improvement.
That test creates an embedded repository with a bogus
core.repositoryformatversion of "99". It wants to make sure
that we actually stop at the bogus repo rather than
continuing upward to find the outer repo. So it checks that
"git config core.repositoryformatversion" returns 99. But
that only works because we blindly read ".git/config", even
though we _know_ we're in a repository whose vintage we do
not understand.
After this patch, we avoid reading config from the unknown
vintage repository at all, which is a safer choice. But we
need to tweak the test, since core.repositoryformatversion
will not return 99; it will claim that it could not find the
variable at all.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-13 05:24:15 +02:00
|
|
|
/*
|
|
|
|
* Returns true iff we have a configured git repository (either via
|
|
|
|
* setup_git_directory, or in the environment via $GIT_DIR).
|
|
|
|
*/
|
|
|
|
int have_git_dir(void);
|
|
|
|
|
2007-01-07 11:00:28 +01:00
|
|
|
extern int is_bare_repository_cfg;
|
|
|
|
extern int is_bare_repository(void);
|
2007-01-20 03:09:34 +01:00
|
|
|
extern int is_inside_git_dir(void);
|
Clean up work-tree handling
The old version of work-tree support was an unholy mess, barely readable,
and not to the point.
For example, why do you have to provide a worktree, when it is not used?
As in "git status". Now it works.
Another riddle was: if you can have work trees inside the git dir, why
are some programs complaining that they need a work tree?
IOW it is allowed to call
$ git --git-dir=../ --work-tree=. bla
when you really want to. In this case, you are both in the git directory
and in the working tree. So, programs have to actually test for the right
thing, namely if they are inside a working tree, and not if they are
inside a git directory.
Also, GIT_DIR=../.git should behave the same as if no GIT_DIR was
specified, unless there is a repository in the current working directory.
It does now.
The logic to determine if a repository is bare, or has a work tree
(tertium non datur), is this:
--work-tree=bla overrides GIT_WORK_TREE, which overrides core.bare = true,
which overrides core.worktree, which overrides GIT_DIR/.. when GIT_DIR
ends in /.git, which overrides the directory in which .git/ was found.
In related news, a long standing bug was fixed: when in .git/bla/x.git/,
which is a bare repository, git formerly assumed ../.. to be the
appropriate git dir. This problem was reported by Shawn Pearce to have
caused much pain, where a colleague mistakenly ran "git init" in "/" a
long time ago, and bare repositories just would not work.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-08-01 02:30:14 +02:00
|
|
|
extern char *git_work_tree_cfg;
|
2007-06-06 09:10:42 +02:00
|
|
|
extern int is_inside_work_tree(void);
|
2006-08-23 12:39:11 +02:00
|
|
|
extern const char *get_git_dir(void);
|
$GIT_COMMON_DIR: a new environment variable
This variable is intended to support multiple working directories
attached to a repository. Such a repository may have a main working
directory, created by either "git init" or "git clone" and one or more
linked working directories. These working directories and the main
repository share the same repository directory.
In linked working directories, $GIT_COMMON_DIR must be defined to point
to the real repository directory and $GIT_DIR points to an unused
subdirectory inside $GIT_COMMON_DIR. File locations inside the
repository are reorganized from the linked worktree view point:
- worktree-specific such as HEAD, logs/HEAD, index, other top-level
refs and unrecognized files are from $GIT_DIR.
- the rest like objects, refs, info, hooks, packed-refs, shallow...
are from $GIT_COMMON_DIR (except info/sparse-checkout, but that's
a separate patch)
Scripts are supposed to retrieve paths in $GIT_DIR with "git rev-parse
--git-path", which will take care of "$GIT_DIR vs $GIT_COMMON_DIR"
business.
The redirection is done by git_path(), git_pathdup() and
strbuf_git_path(). The selected list of paths goes to $GIT_COMMON_DIR,
not the other way around in case a developer adds a new
worktree-specific file and it's accidentally promoted to be shared
across repositories (this includes unknown files added by third party
commands)
The list of known files that belong to $GIT_DIR are:
ADD_EDIT.patch BISECT_ANCESTORS_OK BISECT_EXPECTED_REV BISECT_LOG
BISECT_NAMES CHERRY_PICK_HEAD COMMIT_MSG FETCH_HEAD HEAD MERGE_HEAD
MERGE_MODE MERGE_RR NOTES_EDITMSG NOTES_MERGE_WORKTREE ORIG_HEAD
REVERT_HEAD SQUASH_MSG TAG_EDITMSG fast_import_crash_* logs/HEAD
next-index-* rebase-apply rebase-merge rsync-refs-* sequencer/*
shallow_*
Path mapping is NOT done for git_path_submodule(). Multi-checkouts are
not supported as submodules.
Helped-by: Jens Lehmann <Jens.Lehmann@web.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-11-30 09:24:36 +01:00
|
|
|
extern const char *get_git_common_dir(void);
|
2005-05-10 07:57:58 +02:00
|
|
|
extern char *get_object_directory(void);
|
|
|
|
extern char *get_index_file(void);
|
2005-07-30 09:58:28 +02:00
|
|
|
extern char *get_graft_file(void);
|
2007-08-01 02:29:38 +02:00
|
|
|
extern int set_git_dir(const char *path);
|
2015-09-14 00:17:42 +02:00
|
|
|
extern int get_common_dir_noenv(struct strbuf *sb, const char *gitdir);
|
2014-11-30 09:24:44 +01:00
|
|
|
extern int get_common_dir(struct strbuf *sb, const char *gitdir);
|
ref namespaces: infrastructure
Add support for dividing the refs of a single repository into multiple
namespaces, each of which can have its own branches, tags, and HEAD.
Git can expose each namespace as an independent repository to pull from
and push to, while sharing the object store, and exposing all the refs
to operations such as git-gc.
Storing multiple repositories as namespaces of a single repository
avoids storing duplicate copies of the same objects, such as when
storing multiple branches of the same source. The alternates mechanism
provides similar support for avoiding duplicates, but alternates do not
prevent duplication between new objects added to the repositories
without ongoing maintenance, while namespaces do.
To specify a namespace, set the GIT_NAMESPACE environment variable to
the namespace. For each ref namespace, git stores the corresponding
refs in a directory under refs/namespaces/. For example,
GIT_NAMESPACE=foo will store refs under refs/namespaces/foo/. You can
also specify namespaces via the --namespace option to git.
Note that namespaces which include a / will expand to a hierarchy of
namespaces; for example, GIT_NAMESPACE=foo/bar will store refs under
refs/namespaces/foo/refs/namespaces/bar/. This makes paths in
GIT_NAMESPACE behave hierarchically, so that cloning with
GIT_NAMESPACE=foo/bar produces the same result as cloning with
GIT_NAMESPACE=foo and cloning from that repo with GIT_NAMESPACE=bar. It
also avoids ambiguity with strange namespace paths such as
foo/refs/heads/, which could otherwise generate directory/file conflicts
within the refs directory.
Add the infrastructure for ref namespaces: handle the GIT_NAMESPACE
environment variable and --namespace option, and support iterating over
refs in a namespace.
Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Jamey Sharp <jamey@minilop.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-07-05 19:54:44 +02:00
|
|
|
extern const char *get_git_namespace(void);
|
|
|
|
extern const char *strip_namespace(const char *namespaced_ref);
|
2016-10-07 20:18:48 +02:00
|
|
|
extern const char *get_super_prefix(void);
|
Clean up work-tree handling
The old version of work-tree support was an unholy mess, barely readable,
and not to the point.
For example, why do you have to provide a worktree, when it is not used?
As in "git status". Now it works.
Another riddle was: if you can have work trees inside the git dir, why
are some programs complaining that they need a work tree?
IOW it is allowed to call
$ git --git-dir=../ --work-tree=. bla
when you really want to. In this case, you are both in the git directory
and in the working tree. So, programs have to actually test for the right
thing, namely if they are inside a working tree, and not if they are
inside a git directory.
Also, GIT_DIR=../.git should behave the same as if no GIT_DIR was
specified, unless there is a repository in the current working directory.
It does now.
The logic to determine if a repository is bare, or has a work tree
(tertium non datur), is this:
--work-tree=bla overrides GIT_WORK_TREE, which overrides core.bare = true,
which overrides core.worktree, which overrides GIT_DIR/.. when GIT_DIR
ends in /.git, which overrides the directory in which .git/ was found.
In related news, a long standing bug was fixed: when in .git/bla/x.git/,
which is a bare repository, git formerly assumed ../.. to be the
appropriate git dir. This problem was reported by Shawn Pearce to have
caused much pain, where a colleague mistakenly ran "git init" in "/" a
long time ago, and bare repositories just would not work.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-08-01 02:30:14 +02:00
|
|
|
extern const char *get_git_work_tree(void);
|
2015-06-09 20:24:35 +02:00
|
|
|
|
2016-01-22 23:27:33 +01:00
|
|
|
/*
|
|
|
|
* Return true if the given path is a git directory; note that this _just_
|
|
|
|
* looks at the directory itself. If you want to know whether "foo/.git"
|
|
|
|
* is a repository, you must feed that path, not just "foo".
|
|
|
|
*/
|
|
|
|
extern int is_git_directory(const char *path);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Return 1 if the given path is the root of a git repository or
|
|
|
|
* submodule, else 0. Will not return 1 for bare repositories with the
|
|
|
|
* exception of creating a bare repository in "foo/.git" and calling
|
|
|
|
* is_git_repository("foo").
|
|
|
|
*
|
|
|
|
* If we run into read errors, we err on the side of saying "yes, it is",
|
|
|
|
* as we usually consider sub-repos precious, and would prefer to err on the
|
|
|
|
* side of not disrupting or deleting them.
|
|
|
|
*/
|
|
|
|
extern int is_nonbare_repository_dir(struct strbuf *path);
|
|
|
|
|
2015-06-09 20:24:35 +02:00
|
|
|
#define READ_GITFILE_ERR_STAT_FAILED 1
|
|
|
|
#define READ_GITFILE_ERR_NOT_A_FILE 2
|
|
|
|
#define READ_GITFILE_ERR_OPEN_FAILED 3
|
|
|
|
#define READ_GITFILE_ERR_READ_FAILED 4
|
|
|
|
#define READ_GITFILE_ERR_INVALID_FORMAT 5
|
|
|
|
#define READ_GITFILE_ERR_NO_PATH 6
|
|
|
|
#define READ_GITFILE_ERR_NOT_A_REPO 7
|
2015-06-15 21:39:52 +02:00
|
|
|
#define READ_GITFILE_ERR_TOO_LARGE 8
|
2017-01-25 00:56:50 +01:00
|
|
|
extern void read_gitfile_error_die(int error_code, const char *path, const char *dir);
|
2015-06-09 20:24:35 +02:00
|
|
|
extern const char *read_gitfile_gently(const char *path, int *return_error_code);
|
|
|
|
#define read_gitfile(path) read_gitfile_gently((path), NULL)
|
2017-01-25 00:56:49 +01:00
|
|
|
extern const char *resolve_gitdir_gently(const char *suspect, int *return_error_code);
|
|
|
|
#define resolve_gitdir(path) resolve_gitdir_gently((path), NULL)
|
|
|
|
|
2008-04-27 19:39:21 +02:00
|
|
|
extern void set_git_work_tree(const char *tree);
|
2005-05-10 07:57:58 +02:00
|
|
|
|
|
|
|
#define ALTERNATE_DB_ENVIRONMENT "GIT_ALTERNATE_OBJECT_DIRECTORIES"
|
2005-04-21 19:55:18 +02:00
|
|
|
|
2007-11-03 12:23:11 +01:00
|
|
|
extern void setup_work_tree(void);
|
2017-03-13 21:10:45 +01:00
|
|
|
/*
|
2017-06-14 20:07:37 +02:00
|
|
|
* Find the commondir and gitdir of the repository that contains the current
|
|
|
|
* working directory, without changing the working directory or other global
|
|
|
|
* state. The result is appended to commondir and gitdir. If the discovered
|
|
|
|
* gitdir does not correspond to a worktree, then 'commondir' and 'gitdir' will
|
|
|
|
* both have the same result appended to the buffer. The return value is
|
|
|
|
* either 0 upon success and non-zero if no repository was found.
|
2017-03-13 21:10:45 +01:00
|
|
|
*/
|
2017-06-14 20:07:37 +02:00
|
|
|
extern int discover_git_directory(struct strbuf *commondir,
|
|
|
|
struct strbuf *gitdir);
|
2005-11-26 08:14:15 +01:00
|
|
|
extern const char *setup_git_directory_gently(int *);
|
2005-08-17 03:06:34 +02:00
|
|
|
extern const char *setup_git_directory(void);
|
2010-11-11 15:08:03 +01:00
|
|
|
extern char *prefix_path(const char *prefix, int len, const char *path);
|
2013-07-14 10:36:03 +02:00
|
|
|
extern char *prefix_path_gently(const char *prefix, int len, int *remaining, const char *path);
|
2017-03-21 02:21:27 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Concatenate "prefix" (if len is non-zero) and "path", with no
|
|
|
|
* connecting characters (so "prefix" should end with a "/").
|
|
|
|
* Unlike prefix_path, this should be used if the named file does
|
|
|
|
* not have to interact with index entry; i.e. name of a random file
|
|
|
|
* on the filesystem.
|
|
|
|
*
|
2017-03-21 02:28:49 +01:00
|
|
|
* The return value is always a newly allocated string (even if the
|
|
|
|
* prefix was empty).
|
2017-03-21 02:21:27 +01:00
|
|
|
*/
|
2017-03-21 02:28:49 +01:00
|
|
|
extern char *prefix_filename(const char *prefix, const char *path);
|
2017-03-21 02:21:27 +01:00
|
|
|
|
2009-10-18 09:27:24 +02:00
|
|
|
extern int check_filename(const char *prefix, const char *name);
|
2012-06-18 20:18:21 +02:00
|
|
|
extern void verify_filename(const char *prefix,
|
|
|
|
const char *name,
|
|
|
|
int diagnose_misspelt_rev);
|
2006-04-27 00:09:27 +02:00
|
|
|
extern void verify_non_filename(const char *prefix, const char *name);
|
2012-06-21 20:09:50 +02:00
|
|
|
extern int path_inside_repo(const char *prefix, const char *path);
|
2005-08-17 03:06:34 +02:00
|
|
|
|
2008-04-27 19:39:27 +02:00
|
|
|
#define INIT_DB_QUIET 0x0001
|
2016-09-25 05:14:37 +02:00
|
|
|
#define INIT_DB_EXIST_OK 0x0002
|
2008-04-27 19:39:27 +02:00
|
|
|
|
2016-09-25 05:14:37 +02:00
|
|
|
extern int init_db(const char *git_dir, const char *real_git_dir,
|
|
|
|
const char *template_dir, unsigned int flags);
|
2008-04-27 19:39:27 +02:00
|
|
|
|
2013-07-16 11:27:36 +02:00
|
|
|
extern void sanitize_stdfds(void);
|
2014-02-08 08:08:51 +01:00
|
|
|
extern int daemonize(void);
|
2013-07-16 11:27:36 +02:00
|
|
|
|
2005-04-08 00:13:13 +02:00
|
|
|
#define alloc_nr(x) (((x)+16)*3/2)
|
|
|
|
|
2007-06-11 15:39:44 +02:00
|
|
|
/*
|
|
|
|
* Realloc the buffer pointed at by variable 'x' so that it can hold
|
|
|
|
* at least 'nr' entries; the number of entries currently allocated
|
|
|
|
* is 'alloc', using the standard growing factor alloc_nr() macro.
|
|
|
|
*
|
2010-10-08 18:46:59 +02:00
|
|
|
* DO NOT USE any expression with side-effect for 'x', 'nr', or 'alloc'.
|
2007-06-11 15:39:44 +02:00
|
|
|
*/
|
|
|
|
#define ALLOC_GROW(x, nr, alloc) \
|
|
|
|
do { \
|
2007-06-17 00:37:39 +02:00
|
|
|
if ((nr) > alloc) { \
|
Extend --pretty=oneline to cover the first paragraph,
so that an ugly commit message like this can be
handled sanely.
Currently, --pretty=oneline and --pretty=email (hence
format-patch) take and use only the first line of the commit log
message. This changes them to:
- Take the first paragraph, where the definition of the first
paragraph is "skip all blank lines from the beginning, and
then grab everything up to the next empty line".
- Replace all line breaks with a whitespace.
This change would not affect a well-behaved commit message that
adheres to the convention of "single line summary, a blank line,
and then body of message", as its first paragraph always
consists of a single line. Commit messages from different
culture, such as the ones imported from CVS/SVN, can however get
chomped with the existing behaviour at the first linebreak in
the middle of sentence right now, which would become much easier
to see with this change.
The Subject: and --pretty=oneline output would become very long
and unsightly for non-conforming commits, but their messages are
already ugly anyway, and thischange at least avoids the loss of
information.
The Subject: line from a multi-line paragraph is folded using
RFC2822 line folding rules at the places where line breaks were
in the original.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-06-12 07:10:55 +02:00
|
|
|
if (alloc_nr(alloc) < (nr)) \
|
|
|
|
alloc = (nr); \
|
|
|
|
else \
|
|
|
|
alloc = alloc_nr(alloc); \
|
2014-09-16 20:56:57 +02:00
|
|
|
REALLOC_ARRAY(x, alloc); \
|
2007-06-11 15:39:44 +02:00
|
|
|
} \
|
2010-08-13 00:11:15 +02:00
|
|
|
} while (0)
|
2007-06-11 15:39:44 +02:00
|
|
|
|
2005-04-09 18:48:20 +02:00
|
|
|
/* Initialize and use the cache information */
|
2014-06-13 14:19:23 +02:00
|
|
|
struct lock_file;
|
2007-04-02 08:26:07 +02:00
|
|
|
extern int read_index(struct index_state *);
|
2013-07-14 10:35:49 +02:00
|
|
|
extern int read_index_preload(struct index_state *, const struct pathspec *pathspec);
|
2014-06-13 14:19:51 +02:00
|
|
|
extern int do_read_index(struct index_state *istate, const char *path,
|
|
|
|
int must_exist); /* for testting only! */
|
read-cache: fix reading the shared index for other repos
read_index_from() takes a path argument for the location of the index
file. For reading the shared index in split index mode however it just
ignores that path argument, and reads it from the gitdir of the current
repository.
This works as long as an index in the_repository is read. Once that
changes, such as when we read the index of a submodule, or of a
different working tree than the current one, the gitdir of
the_repository will no longer contain the appropriate shared index,
and git will fail to read it.
For example t3007-ls-files-recurse-submodules.sh was broken with
GIT_TEST_SPLIT_INDEX set in 188dce131f ("ls-files: use repository
object", 2017-06-22), and t7814-grep-recurse-submodules.sh was also
broken in a similar manner, probably by introducing struct repository
there, although I didn't track down the exact commit for that.
be489d02d2 ("revision.c: --indexed-objects add objects from all
worktrees", 2017-08-23) breaks with split index mode in a similar
manner, not erroring out when it can't read the index, but instead
carrying on with pruning, without taking the index of the worktree into
account.
Fix this by passing an additional gitdir parameter to read_index_from,
to indicate where it should look for and read the shared index from.
read_cache_from() defaults to using the gitdir of the_repository. As it
is mostly a convenience macro, having to pass get_git_dir() for every
call seems overkill, and if necessary users can have more control by
using read_index_from().
Helped-by: Brandon Williams <bmwill@google.com>
Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-01-07 23:30:13 +01:00
|
|
|
extern int read_index_from(struct index_state *, const char *path,
|
|
|
|
const char *gitdir);
|
checkout: Fix "initial checkout" detection
Earlier commit 5521883 (checkout: do not lose staged removal, 2008-09-07)
tightened the rule to prevent switching branches from losing local
changes, so that staged removal of paths can be protected, while
attempting to keep a loophole to still allow a special case of switching
out of an un-checked-out state.
However, the loophole was made a bit too tight, and did not allow
switching from one branch (in an un-checked-out state) to check out
another branch.
The change to builtin-checkout.c in this commit loosens it to allow this,
by not insisting the original commit and the new commit to be the same.
It also introduces a new function, is_index_unborn (and an associated
macro, is_cache_unborn), to check if the repository is truly in an
un-checked-out state more reliably, by making sure that $GIT_INDEX_FILE
did not exist when populating the in-core index structure. A few places
the earlier commit 5521883 added the check for the initial checkout
condition are updated to use this function.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-11-12 20:52:35 +01:00
|
|
|
extern int is_index_unborn(struct index_state *);
|
2008-06-27 18:21:58 +02:00
|
|
|
extern int read_index_unmerged(struct index_state *);
|
2017-10-05 22:32:11 +02:00
|
|
|
|
|
|
|
/* For use with `write_locked_index()`. */
|
2014-06-13 14:19:23 +02:00
|
|
|
#define COMMIT_LOCK (1 << 0)
|
2018-03-01 21:40:20 +01:00
|
|
|
#define SKIP_IF_UNCHANGED (1 << 1)
|
2017-10-05 22:32:11 +02:00
|
|
|
|
|
|
|
/*
|
read-cache: drop explicit `CLOSE_LOCK`-flag
`write_locked_index()` takes two flags: `COMMIT_LOCK` and `CLOSE_LOCK`.
At most one is allowed. But it is also possible to use no flag, i.e.,
`0`. But when `write_locked_index()` calls `do_write_index()`, the
temporary file, a.k.a. the lockfile, will be closed. So passing `0` is
effectively the same as `CLOSE_LOCK`, which seems like a bug.
We might feel tempted to restructure the code in order to close the file
later, or conditionally. It also feels a bit unfortunate that we simply
"happen" to close the lock by way of an implementation detail of
lockfiles. But note that we need to close the temporary file before
`stat`-ing it, at least on Windows. See 9f41c7a6b (read-cache: close
index.lock in do_write_index, 2017-04-26).
Drop `CLOSE_LOCK` and make it explicit that `write_locked_index()`
always closes the lock. Whether it is also committed is governed by the
remaining flag, `COMMIT_LOCK`.
This means we neither have nor suggest that we have a mode to write the
index and leave the file open. Whatever extra contents we might
eventually want to write, we should probably write it from within
`write_locked_index()` itself anyway.
Signed-off-by: Martin Ågren <martin.agren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-06 22:12:12 +02:00
|
|
|
* Write the index while holding an already-taken lock. Close the lock,
|
|
|
|
* and if `COMMIT_LOCK` is given, commit it.
|
2017-10-05 22:32:11 +02:00
|
|
|
*
|
|
|
|
* Unless a split index is in use, write the index into the lockfile.
|
|
|
|
*
|
|
|
|
* With a split index, write the shared index to a temporary file,
|
|
|
|
* adjust its permissions and rename it into place, then write the
|
|
|
|
* split index to the lockfile. If the temporary file for the shared
|
|
|
|
* index cannot be created, fall back to the behavior described in
|
|
|
|
* the previous paragraph.
|
read-cache: leave lock in right state in `write_locked_index()`
If the original version of `write_locked_index()` returned with an
error, it didn't roll back the lockfile unless the error occured at the
very end, during closing/committing. See commit 03b866477 (read-cache:
new API write_locked_index instead of write_index/write_cache,
2014-06-13).
In commit 9f41c7a6b (read-cache: close index.lock in do_write_index,
2017-04-26), we learned to close the lock slightly earlier in the
callstack. That was mostly a side-effect of lockfiles being implemented
using temporary files, but didn't cause any real harm.
Recently, commit 076aa2cbd (tempfile: auto-allocate tempfiles on heap,
2017-09-05) introduced a subtle bug. If the temporary file is deleted
(i.e., the lockfile is rolled back), the tempfile-pointer in the `struct
lock_file` will be left dangling. Thus, an attempt to reuse the
lockfile, or even just to roll it back, will induce undefined behavior
-- most likely a crash.
Besides not crashing, we clearly want to make things consistent. The
guarantees which the lockfile-machinery itself provides is A) if we ask
to commit and it fails, roll back, and B) if we ask to close and it
fails, do _not_ roll back. Let's do the same for consistency.
Do not delete the temporary file in `do_write_index()`. One of its
callers, `write_locked_index()` will thereby avoid rolling back the
lock. The other caller, `write_shared_index()`, will delete its
temporary file anyway. Both of these callers will avoid undefined
behavior (crashing).
Teach `write_locked_index(..., COMMIT_LOCK)` to roll back the lock
before returning. If we have already succeeded and committed, it will be
a noop. Simplify the existing callers where we now have a superfluous
call to `rollback_lockfile()`. That should keep future readers from
wondering why the callers are inconsistent.
Signed-off-by: Martin Ågren <martin.agren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-06 22:12:13 +02:00
|
|
|
*
|
|
|
|
* With `COMMIT_LOCK`, the lock is always committed or rolled back.
|
|
|
|
* Without it, the lock is closed, but neither committed nor rolled
|
|
|
|
* back.
|
2018-03-01 21:40:20 +01:00
|
|
|
*
|
|
|
|
* If `SKIP_IF_UNCHANGED` is given and the index is unchanged, nothing
|
|
|
|
* is written (and the lock is rolled back if `COMMIT_LOCK` is given).
|
2017-10-05 22:32:11 +02:00
|
|
|
*/
|
2014-06-13 14:19:23 +02:00
|
|
|
extern int write_locked_index(struct index_state *, struct lock_file *lock, unsigned flags);
|
2017-10-05 22:32:11 +02:00
|
|
|
|
2007-04-02 08:26:07 +02:00
|
|
|
extern int discard_index(struct index_state *);
|
2017-05-08 11:41:42 +02:00
|
|
|
extern void move_index_extensions(struct index_state *dst, struct index_state *src);
|
2008-03-06 21:46:09 +01:00
|
|
|
extern int unmerged_index(const struct index_state *);
|
2017-12-21 20:19:06 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns 1 if the index differs from HEAD, 0 otherwise. When on an unborn
|
|
|
|
* branch, returns 1 if there are entries in the index, 0 otherwise. If an
|
|
|
|
* strbuf is provided, the space-separated list of files that differ will be
|
|
|
|
* appended to it.
|
|
|
|
*/
|
|
|
|
extern int index_has_changes(struct strbuf *sb);
|
|
|
|
|
verify_path: disallow symlinks in .gitmodules
There are a few reasons it's not a good idea to make
.gitmodules a symlink, including:
1. It won't be portable to systems without symlinks.
2. It may behave inconsistently, since Git may look at
this file in the index or a tree without bothering to
resolve any symbolic links. We don't do this _yet_, but
the config infrastructure is there and it's planned for
the future.
With some clever code, we could make (2) work. And some
people may not care about (1) if they only work on one
platform. But there are a few security reasons to simply
disallow it:
a. A symlinked .gitmodules file may circumvent any fsck
checks of the content.
b. Git may read and write from the on-disk file without
sanity checking the symlink target. So for example, if
you link ".gitmodules" to "../oops" and run "git
submodule add", we'll write to the file "oops" outside
the repository.
Again, both of those are problems that _could_ be solved
with sufficient code, but given the complications in (1) and
(2), we're better off just outlawing it explicitly.
Note the slightly tricky call to verify_path() in
update-index's update_one(). There we may not have a mode if
we're not updating from the filesystem (e.g., we might just
be removing the file). Passing "0" as the mode there works
fine; since it's not a symlink, we'll just skip the extra
checks.
Signed-off-by: Jeff King <peff@peff.net>
2018-05-05 02:03:35 +02:00
|
|
|
extern int verify_path(const char *path, unsigned mode);
|
2017-04-14 21:12:28 +02:00
|
|
|
extern int strcmp_offset(const char *s1, const char *s2, size_t *first_change);
|
2015-10-21 19:54:11 +02:00
|
|
|
extern int index_dir_exists(struct index_state *istate, const char *name, int namelen);
|
|
|
|
extern void adjust_dirname_case(struct index_state *istate, char *name);
|
2013-09-17 09:06:14 +02:00
|
|
|
extern struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int igncase);
|
2017-01-19 04:18:51 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Searches for an entry defined by name and namelen in the given index.
|
|
|
|
* If the return value is positive (including 0) it is the position of an
|
|
|
|
* exact match. If the return value is negative, the negated value minus 1
|
|
|
|
* is the position where the entry would be inserted.
|
|
|
|
* Example: The current index consists of these files and its stages:
|
|
|
|
*
|
|
|
|
* b#0, d#0, f#1, f#3
|
|
|
|
*
|
|
|
|
* index_name_pos(&index, "a", 1) -> -1
|
|
|
|
* index_name_pos(&index, "b", 1) -> 0
|
|
|
|
* index_name_pos(&index, "c", 1) -> -2
|
|
|
|
* index_name_pos(&index, "d", 1) -> 1
|
|
|
|
* index_name_pos(&index, "e", 1) -> -3
|
|
|
|
* index_name_pos(&index, "f", 1) -> -3
|
|
|
|
* index_name_pos(&index, "g", 1) -> -5
|
|
|
|
*/
|
2008-03-06 21:46:09 +01:00
|
|
|
extern int index_name_pos(const struct index_state *, const char *name, int namelen);
|
2017-01-19 04:18:51 +01:00
|
|
|
|
2005-05-08 06:55:21 +02:00
|
|
|
#define ADD_CACHE_OK_TO_ADD 1 /* Ok to add */
|
|
|
|
#define ADD_CACHE_OK_TO_REPLACE 2 /* Ok to replace file/directory */
|
2005-06-25 11:25:29 +02:00
|
|
|
#define ADD_CACHE_SKIP_DFCHECK 4 /* Ok to skip DF conflict checks */
|
2007-08-09 22:42:50 +02:00
|
|
|
#define ADD_CACHE_JUST_APPEND 8 /* Append only; tree.c::read_tree() */
|
2008-08-21 10:44:53 +02:00
|
|
|
#define ADD_CACHE_NEW_ONLY 16 /* Do not replace existing ones */
|
2014-06-13 14:19:42 +02:00
|
|
|
#define ADD_CACHE_KEEP_CACHE_TREE 32 /* Do not invalidate cache-tree */
|
2007-04-02 08:26:07 +02:00
|
|
|
extern int add_index_entry(struct index_state *, struct cache_entry *ce, int option);
|
2008-07-21 02:25:56 +02:00
|
|
|
extern void rename_index_entry_at(struct index_state *, int pos, const char *new_name);
|
2017-01-19 04:18:52 +01:00
|
|
|
|
|
|
|
/* Remove entry, return true if there are more entries to go. */
|
2007-04-02 08:26:07 +02:00
|
|
|
extern int remove_index_entry_at(struct index_state *, int pos);
|
2017-01-19 04:18:52 +01:00
|
|
|
|
check_updates(): effective removal of cache entries marked CE_REMOVE
Below is oprofile output from GIT command 'git chekcout -q my-v2.6.25'
(move from tag v2.6.27 to tag v2.6.25 of the Linux kernel):
CPU: Core 2, speed 1999.95 MHz (estimated)
Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit
mask of 0x00 (Unhalted core cycles) count 20000
Counted INST_RETIRED_ANY_P events (number of instructions retired) with a
unit mask of 0x00 (No unit mask) count 20000
CPU_CLK_UNHALT...|INST_RETIRED:2...|
samples| %| samples| %|
------------------------------------
409247 100.000 342878 100.000 git
CPU_CLK_UNHALT...|INST_RETIRED:2...|
samples| %| samples| %|
------------------------------------
260476 63.6476 257843 75.1996 libz.so.1.2.3
100876 24.6492 64378 18.7758 kernel-2.6.28.4_2.vmlinux
30850 7.5382 7874 2.2964 libc-2.9.so
14775 3.6103 8390 2.4469 git
2020 0.4936 4325 1.2614 libcrypto.so.0.9.8
191 0.0467 32 0.0093 libpthread-2.9.so
58 0.0142 36 0.0105 ld-2.9.so
1 2.4e-04 0 0 libldap-2.3.so.0.2.31
Detail list of the top 20 function entries (libz counted in one blob):
CPU_CLK_UNHALTED INST_RETIRED_ANY_P
samples % samples % image name symbol name
260476 63.6862 257843 75.2725 libz.so.1.2.3 /lib/libz.so.1.2.3
16587 4.0555 3636 1.0615 libc-2.9.so memcpy
7710 1.8851 277 0.0809 libc-2.9.so memmove
3679 0.8995 1108 0.3235 kernel-2.6.28.4_2.vmlinux d_validate
3546 0.8670 2607 0.7611 kernel-2.6.28.4_2.vmlinux __getblk
3174 0.7760 1813 0.5293 libc-2.9.so _int_malloc
2396 0.5858 3681 1.0746 kernel-2.6.28.4_2.vmlinux copy_to_user
2270 0.5550 2528 0.7380 kernel-2.6.28.4_2.vmlinux __link_path_walk
2205 0.5391 1797 0.5246 kernel-2.6.28.4_2.vmlinux ext4_mark_iloc_dirty
2103 0.5142 1203 0.3512 kernel-2.6.28.4_2.vmlinux find_first_zero_bit
2077 0.5078 997 0.2911 kernel-2.6.28.4_2.vmlinux do_get_write_access
2070 0.5061 514 0.1501 git cache_name_compare
2043 0.4995 1501 0.4382 kernel-2.6.28.4_2.vmlinux rcu_irq_exit
2022 0.4944 1732 0.5056 kernel-2.6.28.4_2.vmlinux __ext4_get_inode_loc
2020 0.4939 4325 1.2626 libcrypto.so.0.9.8 /usr/lib/libcrypto.so.0.9.8
1965 0.4804 1384 0.4040 git patch_delta
1708 0.4176 984 0.2873 kernel-2.6.28.4_2.vmlinux rcu_sched_grace_period
1682 0.4112 727 0.2122 kernel-2.6.28.4_2.vmlinux sysfs_slab_alias
1659 0.4056 290 0.0847 git find_pack_entry_one
1480 0.3619 1307 0.3816 kernel-2.6.28.4_2.vmlinux ext4_writepage_trans_blocks
Notice the memmove line, where the CPU did 7710 / 277 = 27.8 cycles
per instruction, and compared to the total cycles spent inside the
source code of GIT for this command, all the memmove() calls
translates to (7710 * 100) / 14775 = 52.2% of this.
Retesting with a GIT program compiled for gcov usage, I found out that
the memmove() calls came from remove_index_entry_at() in read-cache.c,
where we have:
memmove(istate->cache + pos,
istate->cache + pos + 1,
(istate->cache_nr - pos) * sizeof(struct cache_entry *));
remove_index_entry_at() is called 4902 times from check_updates() in
unpack-trees.c, and each time called we move each cache_entry pointers
(from the removed one) one step to the left.
Since we have 28828 entries in the cache this time, and if we on
average move half of them each time, we in total move approximately
4902 * 0.5 * 28828 * 4 = 282 629 712 bytes, or twice this amount if
each pointer is 8 bytes (64 bit).
OK, is seems that the function check_updates() is called 28 times, so
the estimated guess above had been more correct if check_updates() had
been called only once, but the point is: we get lots of bytes moved.
To fix this, and use an O(N) algorithm instead, where N is the number
of cache_entries, we delete/remove all entries in one loop through all
entries.
From a retest, the new remove_marked_cache_entries() from the patch
below, ended up with the following output line from oprofile:
46 0.0105 15 0.0041 git remove_marked_cache_entries
If we can trust the numbers from oprofile in this case, we saved
approximately ((7710 - 46) * 20000) / (2 * 1000 * 1000 * 1000) = 0.077
seconds CPU time with this fix for this particular test. And notice
that now the CPU did only 46 / 15 = 3.1 cycles/instruction.
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-02-18 23:18:03 +01:00
|
|
|
extern void remove_marked_cache_entries(struct index_state *istate);
|
2007-04-02 08:26:07 +02:00
|
|
|
extern int remove_file_from_index(struct index_state *, const char *path);
|
2008-05-21 21:04:34 +02:00
|
|
|
#define ADD_CACHE_VERBOSE 1
|
|
|
|
#define ADD_CACHE_PRETEND 2
|
2008-05-25 23:03:50 +02:00
|
|
|
#define ADD_CACHE_IGNORE_ERRORS 4
|
2008-07-21 10:24:17 +02:00
|
|
|
#define ADD_CACHE_IGNORE_REMOVAL 8
|
2008-08-21 10:44:53 +02:00
|
|
|
#define ADD_CACHE_INTENT 16
|
2017-01-19 04:18:53 +01:00
|
|
|
/*
|
|
|
|
* These two are used to add the contents of the file at path
|
|
|
|
* to the index, marking the working tree up-to-date by storing
|
|
|
|
* the cached stat info in the resulting cache entry. A caller
|
|
|
|
* that has already run lstat(2) on the path can call
|
|
|
|
* add_to_index(), and all others can call add_file_to_index();
|
|
|
|
* the latter will do necessary lstat(2) internally before
|
|
|
|
* calling the former.
|
|
|
|
*/
|
2016-09-14 23:07:47 +02:00
|
|
|
extern int add_to_index(struct index_state *, const char *path, struct stat *, int flags);
|
|
|
|
extern int add_file_to_index(struct index_state *, const char *path, int flags);
|
2017-01-19 04:18:53 +01:00
|
|
|
|
2014-01-27 15:45:08 +01:00
|
|
|
extern struct cache_entry *make_cache_entry(unsigned int mode, const unsigned char *sha1, const char *path, int stage, unsigned int refresh_options);
|
2016-09-14 23:07:46 +02:00
|
|
|
extern int chmod_index_entry(struct index_state *, struct cache_entry *ce, char flip);
|
Convert "struct cache_entry *" to "const ..." wherever possible
I attempted to make index_state->cache[] a "const struct cache_entry **"
to find out how existing entries in index are modified and where. The
question I have is what do we do if we really need to keep track of on-disk
changes in the index. The result is
- diff-lib.c: setting CE_UPTODATE
- name-hash.c: setting CE_HASHED
- preload-index.c, read-cache.c, unpack-trees.c and
builtin/update-index: obvious
- entry.c: write_entry() may refresh the checked out entry via
fill_stat_cache_info(). This causes "non-const struct cache_entry
*" in builtin/apply.c, builtin/checkout-index.c and
builtin/checkout.c
- builtin/ls-files.c: --with-tree changes stagemask and may set
CE_UPDATE
Of these, write_entry() and its call sites are probably most
interesting because it modifies on-disk info. But this is stat info
and can be retrieved via refresh, at least for porcelain
commands. Other just uses ce_flags for local purposes.
So, keeping track of "dirty" entries is just a matter of setting a
flag in index modification functions exposed by read-cache.c. Except
unpack-trees, the rest of the code base does not do anything funny
behind read-cache's back.
The actual patch is less valueable than the summary above. But if
anyone wants to re-identify the above sites. Applying this patch, then
this:
diff --git a/cache.h b/cache.h
index 430d021..1692891 100644
--- a/cache.h
+++ b/cache.h
@@ -267,7 +267,7 @@ static inline unsigned int canon_mode(unsigned int mode)
#define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1)
struct index_state {
- struct cache_entry **cache;
+ const struct cache_entry **cache;
unsigned int version;
unsigned int cache_nr, cache_alloc, cache_changed;
struct string_list *resolve_undo;
will help quickly identify them without bogus warnings.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-09 17:29:00 +02:00
|
|
|
extern int ce_same_name(const struct cache_entry *a, const struct cache_entry *b);
|
2014-02-04 03:20:09 +01:00
|
|
|
extern void set_object_name_for_intent_to_add_entry(struct cache_entry *ce);
|
2008-10-16 17:07:26 +02:00
|
|
|
extern int index_name_is_other(const struct index_state *, const char *, int);
|
2017-01-10 21:06:10 +01:00
|
|
|
extern void *read_blob_data_from_index(const struct index_state *, const char *, unsigned long *);
|
2007-11-10 09:15:03 +01:00
|
|
|
|
|
|
|
/* do stat comparison even if CE_VALID is true */
|
|
|
|
#define CE_MATCH_IGNORE_VALID 01
|
|
|
|
/* do not check the contents but report dirty on racily-clean entries */
|
2009-12-14 12:43:58 +01:00
|
|
|
#define CE_MATCH_RACY_IS_DIRTY 02
|
|
|
|
/* do stat comparison even if CE_SKIP_WORKTREE is true */
|
|
|
|
#define CE_MATCH_IGNORE_SKIP_WORKTREE 04
|
2014-01-27 15:45:07 +01:00
|
|
|
/* ignore non-existent files during stat update */
|
|
|
|
#define CE_MATCH_IGNORE_MISSING 0x08
|
2014-01-27 15:45:08 +01:00
|
|
|
/* enable stat refresh */
|
|
|
|
#define CE_MATCH_REFRESH 0x10
|
2017-09-22 18:35:40 +02:00
|
|
|
/* don't refresh_fsmonitor state or do stat comparison even if CE_FSMONITOR_VALID is true */
|
|
|
|
#define CE_MATCH_IGNORE_FSMONITOR 0X20
|
|
|
|
extern int ie_match_stat(struct index_state *, const struct cache_entry *, struct stat *, unsigned int);
|
|
|
|
extern int ie_modified(struct index_state *, const struct cache_entry *, struct stat *, unsigned int);
|
2007-11-10 09:15:03 +01:00
|
|
|
|
2011-05-08 10:47:33 +02:00
|
|
|
#define HASH_WRITE_OBJECT 1
|
|
|
|
#define HASH_FORMAT_CHECK 2
|
2017-11-16 17:38:28 +01:00
|
|
|
#define HASH_RENORMALIZE 4
|
2017-08-20 22:09:29 +02:00
|
|
|
extern int index_fd(struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags);
|
2017-08-20 22:09:28 +02:00
|
|
|
extern int index_path(struct object_id *oid, const char *path, struct stat *st, unsigned flags);
|
2013-06-20 10:37:50 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Record to sd the data from st that we use to check whether a file
|
|
|
|
* might have changed.
|
|
|
|
*/
|
|
|
|
extern void fill_stat_data(struct stat_data *sd, struct stat *st);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Return 0 if st is consistent with a file not having been changed
|
|
|
|
* since sd was filled. If there are differences, return a
|
|
|
|
* combination of MTIME_CHANGED, CTIME_CHANGED, OWNER_CHANGED,
|
|
|
|
* INODE_CHANGED, and DATA_CHANGED.
|
|
|
|
*/
|
|
|
|
extern int match_stat_data(const struct stat_data *sd, struct stat *st);
|
2015-03-08 11:12:37 +01:00
|
|
|
extern int match_stat_data_racy(const struct index_state *istate,
|
|
|
|
const struct stat_data *sd, struct stat *st);
|
2013-06-20 10:37:50 +02:00
|
|
|
|
2005-05-15 23:23:12 +02:00
|
|
|
extern void fill_stat_cache_info(struct cache_entry *ce, struct stat *st);
|
|
|
|
|
2006-05-19 18:56:35 +02:00
|
|
|
#define REFRESH_REALLY 0x0001 /* ignore_valid */
|
|
|
|
#define REFRESH_UNMERGED 0x0002 /* allow unmerged */
|
|
|
|
#define REFRESH_QUIET 0x0004 /* be quiet about it */
|
|
|
|
#define REFRESH_IGNORE_MISSING 0x0008 /* ignore non-existent */
|
2008-07-20 08:25:00 +02:00
|
|
|
#define REFRESH_IGNORE_SUBMODULES 0x0010 /* ignore submodules */
|
2009-08-21 10:57:58 +02:00
|
|
|
#define REFRESH_IN_PORCELAIN 0x0020 /* user friendly output, not "needs update" */
|
2013-07-14 10:35:54 +02:00
|
|
|
extern int refresh_index(struct index_state *, unsigned int flags, const struct pathspec *pathspec, char *seen, const char *header_msg);
|
merge: avoid "safer crlf" during recording of merge results
When merge_recursive() decides what the correct blob object merge
result for a path should be, it uses update_file_flags() helper
function to write it out to a working tree file and then calls
add_cacheinfo(). The add_cacheinfo() function in turn calls
make_cache_entry() to create a new cache entry to replace the
higher-stage entries for the path that represents the conflict.
The make_cache_entry() function calls refresh_cache_entry() to fill
in the cached stat information. To mark a cache entry as
up-to-date, the data is re-read from the file in the working tree,
and goes through convert_to_git() conversion to be compared with the
blob object name the new cache entry records.
It is important to note that this happens while the higher-stage
entries, which are going to be replaced with the new entry, are
still in the index. Unfortunately, the convert_to_git() conversion
has a misguided "safer crlf" mechanism baked in, and looks at the
existing cache entry for the path to decide how to convert the
contents in the working tree file. If our side (i.e. stage#2)
records a text blob with CRLF in it, even when the system is
configured to record LF in blobs and convert them to CRLF upon
checkout (and back to LF upon checkin), the "safer crlf" mechanism
stops us doing so.
This especially poses a problem during a renormalizing merge, where
the merge result for the path is computed by first "normalizing" the
blobs involved in the merge by using convert_to_working_tree()
followed by convert_to_git() with "safer crlf" disabled. The merge
result that is computed correctly and fed to add_cacheinfo() via
update_file_flags() does _not_ match what refresh_cache_entry() sees
by converting the working tree file via convert_to_git().
We can work this around by not refreshing the new cache entry in
make_cache_entry() called by add_cacheinfo(). After add_cacheinfo()
adds the new entry, we can call refresh_cache_entry() on that,
knowing that addition of this new cache entry would have removed the
stale cache entries that had CRLF in stage #2 that were carried over
before the renormalizing merge started and will not interfere with
the correct recording of the result.
The test update was taken from a series by Torsten Bögershausen
that attempted to fix this with a different approach.
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Reviewed-by: Torsten Bögershausen <tboegi@web.de>
2016-07-08 19:59:15 +02:00
|
|
|
extern struct cache_entry *refresh_cache_entry(struct cache_entry *, unsigned int);
|
2006-05-19 18:56:35 +02:00
|
|
|
|
2017-10-06 22:12:14 +02:00
|
|
|
/*
|
|
|
|
* Opportunistically update the index but do not complain if we can't.
|
|
|
|
* The lockfile is always committed or rolled back.
|
|
|
|
*/
|
2011-03-21 18:16:10 +01:00
|
|
|
extern void update_index_if_able(struct index_state *, struct lock_file *);
|
_GIT_INDEX_OUTPUT: allow plumbing to output to an alternative index file.
When defined, this allows plumbing commands that update the
index (add, apply, checkout-index, merge-recursive, mv,
read-tree, rm, update-index, and write-tree) to write their
resulting index to an alternative index file while holding a
lock to the original index file. With this, git-commit that
jumps the index does not have to make an extra copy of the index
file, and more importantly, it can do the update while holding
the lock on the index.
However, I think the interface to let an environment variable
specify the output is a mistake, as shown in the documentation.
If a curious user has the environment variable set to something
other than the file GIT_INDEX_FILE points at, almost everything
will break. This should instead be a command line parameter to
tell these plumbing commands to write the result in the named
file, to prevent stupid mistakes.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-04-01 08:09:02 +02:00
|
|
|
|
|
|
|
extern int hold_locked_index(struct lock_file *, int);
|
2007-04-01 08:27:41 +02:00
|
|
|
extern void set_alternate_index_output(const char *);
|
2014-10-01 12:28:42 +02:00
|
|
|
|
2017-04-14 22:32:21 +02:00
|
|
|
extern int verify_index_checksum;
|
2017-10-18 16:27:25 +02:00
|
|
|
extern int verify_ce_order;
|
2017-04-14 22:32:21 +02:00
|
|
|
|
2006-02-27 23:47:45 +01:00
|
|
|
/* Environment bits from configuration mechanism */
|
2005-10-11 01:31:08 +02:00
|
|
|
extern int trust_executable_bit;
|
2008-07-28 08:31:28 +02:00
|
|
|
extern int trust_ctime;
|
2013-01-22 08:49:22 +01:00
|
|
|
extern int check_stat;
|
2007-06-25 00:11:24 +02:00
|
|
|
extern int quote_path_fully;
|
2007-03-02 22:11:30 +01:00
|
|
|
extern int has_symlinks;
|
2010-10-28 20:28:04 +02:00
|
|
|
extern int minimum_abbrev, default_abbrev;
|
2008-03-22 00:52:46 +01:00
|
|
|
extern int ignore_case;
|
2006-02-09 06:15:24 +01:00
|
|
|
extern int assume_unchanged;
|
2006-05-02 09:40:24 +02:00
|
|
|
extern int prefer_symlink_refs;
|
2006-03-21 03:45:47 +01:00
|
|
|
extern int warn_ambiguous_refs;
|
cat-file: disable object/refname ambiguity check for batch mode
A common use of "cat-file --batch-check" is to feed a list
of objects from "rev-list --objects" or a similar command.
In this instance, all of our input objects are 40-byte sha1
ids. However, cat-file has always allowed arbitrary revision
specifiers, and feeds the result to get_sha1().
Fortunately, get_sha1() recognizes a 40-byte sha1 before
doing any hard work trying to look up refs, meaning this
scenario should end up spending very little time converting
the input into an object sha1. However, since 798c35f
(get_sha1: warn about full or short object names that look
like refs, 2013-05-29), when we encounter this case, we
spend the extra effort to do a refname lookup anyway, just
to print a warning. This is further exacerbated by ca91993
(get_packed_ref_cache: reload packed-refs file when it
changes, 2013-06-20), which makes individual ref lookup more
expensive by requiring a stat() of the packed-refs file for
each missing ref.
With no patches, this is the time it takes to run:
$ git rev-list --objects --all >objects
$ time git cat-file --batch-check='%(objectname)' <objects
on the linux.git repository:
real 1m13.494s
user 0m25.924s
sys 0m47.532s
If we revert ca91993, the packed-refs up-to-date check, it
gets a little better:
real 0m54.697s
user 0m21.692s
sys 0m32.916s
but we are still spending quite a bit of time on ref lookup
(and we would not want to revert that patch, anyway, which
has correctness issues). If we revert 798c35f, disabling
the warning entirely, we get a much more reasonable time:
real 0m7.452s
user 0m6.836s
sys 0m0.608s
This patch does the moral equivalent of this final case (and
gets similar speedups). We introduce a global flag that
callers of get_sha1() can use to avoid paying the price for
the warning.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-12 08:20:05 +02:00
|
|
|
extern int warn_on_object_refname_ambiguity;
|
2006-02-27 23:47:45 +01:00
|
|
|
extern const char *apply_default_whitespace;
|
2009-08-04 13:16:49 +02:00
|
|
|
extern const char *apply_default_ignorewhitespace;
|
2011-10-06 20:22:24 +02:00
|
|
|
extern const char *git_attributes_file;
|
2016-05-05 00:58:12 +02:00
|
|
|
extern const char *git_hooks_path;
|
2006-07-03 22:11:47 +02:00
|
|
|
extern int zlib_compression_level;
|
Custom compression levels for objects and packs
Add config variables pack.compression and core.loosecompression ,
and switch --compression=level to pack-objects.
Loose objects will be compressed using core.loosecompression if set,
else core.compression if set, else Z_BEST_SPEED.
Packed objects will be compressed using --compression=level if seen,
else pack.compression if set, else core.compression if set,
else Z_DEFAULT_COMPRESSION. This is the "pack compression level".
Loose objects added to a pack undeltified will be recompressed
to the pack compression level if it is unequal to the current
loose compression level by the preceding rules, or if the loose
object was written while core.legacyheaders = true. Newly
deltified loose objects are always compressed to the current
pack compression level.
Previously packed objects added to a pack are recompressed
to the current pack compression level exactly when their
deltification status changes, since the previous pack data
cannot be reused.
In either case, the --no-reuse-object switch from the first
patch below will always force recompression to the current pack
compression level, instead of assuming the pack compression level
hasn't changed and pack data can be reused when possible.
This applies on top of the following patches from Nicolas Pitre:
[PATCH] allow for undeltified objects not to be reused
[PATCH] make "repack -f" imply "pack-objects --no-reuse-object"
Signed-off-by: Dana L. How <danahow@gmail.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-05-09 22:56:50 +02:00
|
|
|
extern int core_compression_level;
|
2016-11-16 02:42:40 +01:00
|
|
|
extern int pack_compression_level;
|
2006-12-23 08:34:28 +01:00
|
|
|
extern size_t packed_git_window_size;
|
2006-12-23 08:33:35 +01:00
|
|
|
extern size_t packed_git_limit;
|
2007-03-19 06:14:37 +01:00
|
|
|
extern size_t delta_base_cache_limit;
|
2011-04-05 19:44:11 +02:00
|
|
|
extern unsigned long big_file_threshold;
|
2011-10-28 23:48:40 +02:00
|
|
|
extern unsigned long pack_size_limit_cfg;
|
2014-02-18 12:24:55 +01:00
|
|
|
|
2016-09-13 05:24:23 +02:00
|
|
|
/*
|
|
|
|
* Accessors for the core.sharedrepository config which lazy-load the value
|
|
|
|
* from the config (if not already set). The "reset" function can be
|
|
|
|
* used to unset "set" or cached value, meaning that the value will be loaded
|
|
|
|
* fresh from the config file on the next call to get_shared_repository().
|
|
|
|
*/
|
2016-03-11 23:36:49 +01:00
|
|
|
void set_shared_repository(int value);
|
|
|
|
int get_shared_repository(void);
|
2016-09-13 05:24:23 +02:00
|
|
|
void reset_shared_repository(void);
|
2016-03-11 23:36:49 +01:00
|
|
|
|
2014-02-18 12:24:55 +01:00
|
|
|
/*
|
|
|
|
* Do replace refs need to be checked this run? This variable is
|
|
|
|
* initialized to true unless --no-replace-object is used or
|
|
|
|
* $GIT_NO_REPLACE_OBJECTS is set, but is set to false by some
|
|
|
|
* commands that do not want replace references to be active. As an
|
|
|
|
* optimization it is also set to false if replace references have
|
|
|
|
* been sought but there were none.
|
|
|
|
*/
|
|
|
|
extern int check_replace_refs;
|
2015-06-11 23:34:59 +02:00
|
|
|
extern char *git_replace_ref_base;
|
2014-02-18 12:24:55 +01:00
|
|
|
|
2008-06-19 00:18:44 +02:00
|
|
|
extern int fsync_object_files;
|
2008-11-14 01:36:30 +01:00
|
|
|
extern int core_preload_index;
|
2009-08-20 15:47:08 +02:00
|
|
|
extern int core_apply_sparse_checkout;
|
git on Mac OS and precomposed unicode
Mac OS X mangles file names containing unicode on file systems HFS+,
VFAT or SAMBA. When a file using unicode code points outside ASCII
is created on a HFS+ drive, the file name is converted into
decomposed unicode and written to disk. No conversion is done if
the file name is already decomposed unicode.
Calling open("\xc3\x84", ...) with a precomposed "Ä" yields the same
result as open("\x41\xcc\x88",...) with a decomposed "Ä".
As a consequence, readdir() returns the file names in decomposed
unicode, even if the user expects precomposed unicode. Unlike on
HFS+, Mac OS X stores files on a VFAT drive (e.g. an USB drive) in
precomposed unicode, but readdir() still returns file names in
decomposed unicode. When a git repository is stored on a network
share using SAMBA, file names are send over the wire and written to
disk on the remote system in precomposed unicode, but Mac OS X
readdir() returns decomposed unicode to be compatible with its
behaviour on HFS+ and VFAT.
The unicode decomposition causes many problems:
- The names "git add" and other commands get from the end user may
often be precomposed form (the decomposed form is not easily input
from the keyboard), but when the commands read from the filesystem
to see what it is going to update the index with already is on the
filesystem, readdir() will give decomposed form, which is different.
- Similarly "git log", "git mv" and all other commands that need to
compare pathnames found on the command line (often but not always
precomposed form; a command line input resulting from globbing may
be in decomposed) with pathnames found in the tree objects (should
be precomposed form to be compatible with other systems and for
consistency in general).
- The same for names stored in the index, which should be
precomposed, that may need to be compared with the names read from
readdir().
NFS mounted from Linux is fully transparent and does not suffer from
the above.
As Mac OS X treats precomposed and decomposed file names as equal,
we can
- wrap readdir() on Mac OS X to return the precomposed form, and
- normalize decomposed form given from the command line also to the
precomposed form,
to ensure that all pathnames used in Git are always in the
precomposed form. This behaviour can be requested by setting
"core.precomposedunicode" configuration variable to true.
The code in compat/precomposed_utf8.c implements basically 4 new
functions: precomposed_utf8_opendir(), precomposed_utf8_readdir(),
precomposed_utf8_closedir() and precompose_argv(). The first three
are to wrap opendir(3), readdir(3), and closedir(3) functions.
The argv[] conversion allows to use the TAB filename completion done
by the shell on command line. It tolerates other tools which use
readdir() to feed decomposed file names into git.
When creating a new git repository with "git init" or "git clone",
"core.precomposedunicode" will be set "false".
The user needs to activate this feature manually. She typically
sets core.precomposedunicode to "true" on HFS and VFAT, or file
systems mounted via SAMBA.
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Torsten Bögershausen <tboegi@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-07-08 15:50:25 +02:00
|
|
|
extern int precomposed_unicode;
|
2014-12-16 00:15:20 +01:00
|
|
|
extern int protect_hfs;
|
2014-12-16 23:46:59 +01:00
|
|
|
extern int protect_ntfs;
|
2017-09-22 18:35:40 +02:00
|
|
|
extern const char *core_fsmonitor;
|
2005-10-11 01:31:08 +02:00
|
|
|
|
2015-03-20 19:43:06 +01:00
|
|
|
/*
|
|
|
|
* Include broken refs in all ref iterations, which will
|
|
|
|
* generally choke dangerous operations rather than letting
|
|
|
|
* them silently proceed without taking the broken ref into
|
|
|
|
* account.
|
|
|
|
*/
|
|
|
|
extern int ref_paranoia;
|
|
|
|
|
git: add --no-optional-locks option
Some tools like IDEs or fancy editors may periodically run
commands like "git status" in the background to keep track
of the state of the repository. Some of these commands may
refresh the index and write out the result in an
opportunistic way: if they can get the index lock, then they
update the on-disk index with any updates they find. And if
not, then their in-core refresh is lost and just has to be
recomputed by the next caller.
But taking the index lock may conflict with other operations
in the repository. Especially ones that the user is doing
themselves, which _aren't_ opportunistic. In other words,
"git status" knows how to back off when somebody else is
holding the lock, but other commands don't know that status
would be happy to drop the lock if somebody else wanted it.
There are a couple possible solutions:
1. Have some kind of "pseudo-lock" that allows other
commands to tell status that they want the lock.
This is likely to be complicated and error-prone to
implement (and maybe even impossible with just
dotlocks to work from, as it requires some
inter-process communication).
2. Avoid background runs of commands like "git status"
that want to do opportunistic updates, preferring
instead plumbing like diff-files, etc.
This is awkward for a couple of reasons. One is that
"status --porcelain" reports a lot more about the
repository state than is available from individual
plumbing commands. And two is that we actually _do_
want to see the refreshed index. We just don't want to
take a lock or write out the result. Whereas commands
like diff-files expect us to refresh the index
separately and write it to disk so that they can depend
on the result. But that write is exactly what we're
trying to avoid.
3. Ask "status" not to lock or write the index.
This is easy to implement. The big downside is that any
work done in refreshing the index for such a call is
lost when the process exits. So a background process
may end up re-hashing a changed file multiple times
until the user runs a command that does an index
refresh themselves.
This patch implements the option 3. The idea (and the test)
is largely stolen from a Git for Windows patch by Johannes
Schindelin, 67e5ce7f63 (status: offer *not* to lock the
index and update it, 2016-08-12). The twist here is that
instead of making this an option to "git status", it becomes
a "git" option and matching environment variable.
The reason there is two-fold:
1. An environment variable is carried through to
sub-processes. And whether an invocation is a
background process or not should apply to the whole
process tree. So you could do "git --no-optional-locks
foo", and if "foo" is a script or alias that calls
"status", you'll still get the effect.
2. There may be other programs that want the same
treatment.
I've punted here on finding more callers to convert,
since "status" is the obvious one to call as a repeated
background job. But "git diff"'s opportunistic refresh
of the index may be a good candidate.
The test is taken from 67e5ce7f63, and it's worth repeating
Johannes's explanation:
Note that the regression test added in this commit does
not *really* verify that no index.lock file was written;
that test is not possible in a portable way. Instead, we
verify that .git/index is rewritten *only* when `git
status` is run without `--no-optional-locks`.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-27 08:54:30 +02:00
|
|
|
/*
|
|
|
|
* Returns the boolean value of $GIT_OPTIONAL_LOCKS (or the default value).
|
|
|
|
*/
|
|
|
|
int use_optional_locks(void);
|
|
|
|
|
2013-01-16 20:18:48 +01:00
|
|
|
/*
|
|
|
|
* The character that begins a commented line in user-editable file
|
|
|
|
* that is subject to stripspace.
|
|
|
|
*/
|
|
|
|
extern char comment_line_char;
|
2014-05-17 03:52:23 +02:00
|
|
|
extern int auto_comment_line_char;
|
2013-01-16 20:18:48 +01:00
|
|
|
|
mingw: introduce the 'core.hideDotFiles' setting
On Unix (and Linux), files and directories whose names start with a dot
are usually not shown by default. This convention is used by Git: the
.git/ directory should be left alone by regular users, and only accessed
through Git itself.
On Windows, no such convention exists. Instead, there is an explicit flag
to mark files or directories as hidden.
In the early days, Git for Windows did not mark the .git/ directory (or
for that matter, any file or directory whose name starts with a dot)
hidden. This lead to quite a bit of confusion, and even loss of data.
Consequently, Git for Windows introduced the core.hideDotFiles setting,
with three possible values: true, false, and dotGitOnly, defaulting to
marking only the .git/ directory as hidden.
The rationale: users do not need to access .git/ directly, and indeed (as
was demonstrated) should not really see that directory, either. However,
not all dot files should be hidden by default, as e.g. Eclipse does not
show them (and the user would therefore be unable to see, say, a
.gitattributes file).
In over five years since the last attempt to bring this patch into core
Git, a slightly buggy version of this patch has served Git for Windows'
users well: no single report indicated problems with the hidden .git/
directory, and the stream of problems caused by the previously non-hidden
.git/ directory simply stopped. The bugs have been fixed during the
process of getting this patch upstream.
Note that there is a funny quirk we have to pay attention to when
creating hidden files: we use Win32's _wopen() function which
transmogrifies its arguments and hands off to Win32's CreateFile()
function. That latter function errors out with ERROR_ACCESS_DENIED (the
equivalent of EACCES) when the equivalent of the O_CREAT flag was passed
and the file attributes (including the hidden flag) do not match an
existing file's. And _wopen() accepts no parameter that would be
transmogrified into said hidden flag. Therefore, we simply try again
without O_CREAT.
A slightly different method is required for our fopen()/freopen()
function as we cannot even *remove* the implicit O_CREAT flag.
Therefore, we briefly mark existing files as unhidden when opening them
via fopen()/freopen().
The ERROR_ACCESS_DENIED error can also be triggered by opening a file
that is marked as a system file (which is unlikely to be tracked in
Git), and by trying to create a file that has *just* been deleted and is
awaiting the last open handles to be released (which would be handled
better by the "Try again?" logic, a story for a different patch series,
though). In both cases, it does not matter much if we try again without
the O_CREAT flag, read: it does not hurt, either.
For details how ERROR_ACCESS_DENIED can be triggered, see
https://msdn.microsoft.com/en-us/library/windows/desktop/aa363858
Original-patch-by: Erik Faye-Lund <kusmabite@gmail.com>
Initial-Test-By: Pat Thoyts <patthoyts@users.sourceforge.net>
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-05-11 10:43:37 +02:00
|
|
|
/* Windows only */
|
|
|
|
enum hide_dotfiles_type {
|
|
|
|
HIDE_DOTFILES_FALSE = 0,
|
|
|
|
HIDE_DOTFILES_TRUE,
|
|
|
|
HIDE_DOTFILES_DOTGITONLY
|
|
|
|
};
|
|
|
|
extern enum hide_dotfiles_type hide_dotfiles;
|
|
|
|
|
2017-01-27 11:09:47 +01:00
|
|
|
enum log_refs_config {
|
|
|
|
LOG_REFS_UNSET = -1,
|
|
|
|
LOG_REFS_NONE = 0,
|
|
|
|
LOG_REFS_NORMAL,
|
|
|
|
LOG_REFS_ALWAYS
|
|
|
|
};
|
|
|
|
extern enum log_refs_config log_all_ref_updates;
|
|
|
|
|
2008-02-19 17:24:37 +01:00
|
|
|
enum branch_track {
|
2008-08-21 19:23:20 +02:00
|
|
|
BRANCH_TRACK_UNSPECIFIED = -1,
|
2008-02-19 17:24:37 +01:00
|
|
|
BRANCH_TRACK_NEVER = 0,
|
|
|
|
BRANCH_TRACK_REMOTE,
|
|
|
|
BRANCH_TRACK_ALWAYS,
|
|
|
|
BRANCH_TRACK_EXPLICIT,
|
2010-05-14 11:31:35 +02:00
|
|
|
BRANCH_TRACK_OVERRIDE
|
2008-02-19 17:24:37 +01:00
|
|
|
};
|
|
|
|
|
2008-05-11 00:36:29 +02:00
|
|
|
enum rebase_setup_type {
|
|
|
|
AUTOREBASE_NEVER = 0,
|
|
|
|
AUTOREBASE_LOCAL,
|
|
|
|
AUTOREBASE_REMOTE,
|
2010-05-14 11:31:35 +02:00
|
|
|
AUTOREBASE_ALWAYS
|
2008-05-11 00:36:29 +02:00
|
|
|
};
|
|
|
|
|
2009-03-16 16:42:51 +01:00
|
|
|
enum push_default_type {
|
|
|
|
PUSH_DEFAULT_NOTHING = 0,
|
|
|
|
PUSH_DEFAULT_MATCHING,
|
2012-04-24 09:50:03 +02:00
|
|
|
PUSH_DEFAULT_SIMPLE,
|
2011-02-16 01:54:24 +01:00
|
|
|
PUSH_DEFAULT_UPSTREAM,
|
push: Provide situational hints for non-fast-forward errors
Pushing a non-fast-forward update to a remote repository will result in
an error, but the hint text doesn't provide the correct resolution in
every case. Give better resolution advice in three push scenarios:
1) If you push your current branch and it triggers a non-fast-forward
error, you should merge remote changes with 'git pull' before pushing
again.
2) If you push to a shared repository others push to, and your local
tracking branches are not kept up to date, the 'matching refs' default
will generate non-fast-forward errors on outdated branches. If this is
your workflow, the 'matching refs' default is not for you. Consider
setting the 'push.default' configuration variable to 'current' or
'upstream' to ensure only your current branch is pushed.
3) If you explicitly specify a ref that is not your current branch or
push matching branches with ':', you will generate a non-fast-forward
error if any pushed branch tip is out of date. You should checkout the
offending branch and merge remote changes before pushing again.
Teach transport.c to recognize these scenarios and configure push.c
to hint for them. If 'git push's default behavior changes or we
discover more scenarios, extension is easy. Standardize on the
advice API and add three new advice variables, 'pushNonFFCurrent',
'pushNonFFDefault', and 'pushNonFFMatching'. Setting any of these
to 'false' will disable their affiliated advice. Setting
'pushNonFastForward' to false will disable all three, thus preserving the
config option for users who already set it, but guaranteeing new
users won't disable push advice accidentally.
Based-on-patch-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Christopher Tiwald <christiwald@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-03-20 05:31:33 +01:00
|
|
|
PUSH_DEFAULT_CURRENT,
|
|
|
|
PUSH_DEFAULT_UNSPECIFIED
|
2009-03-16 16:42:51 +01:00
|
|
|
};
|
|
|
|
|
2008-02-19 17:24:37 +01:00
|
|
|
extern enum branch_track git_branch_track;
|
2008-05-11 00:36:29 +02:00
|
|
|
extern enum rebase_setup_type autorebase;
|
2009-03-16 16:42:51 +01:00
|
|
|
extern enum push_default_type push_default;
|
2008-02-19 17:24:37 +01:00
|
|
|
|
2009-04-28 00:32:25 +02:00
|
|
|
enum object_creation_mode {
|
|
|
|
OBJECT_CREATION_USES_HARDLINKS = 0,
|
2010-05-14 11:31:35 +02:00
|
|
|
OBJECT_CREATION_USES_RENAMES = 1
|
2009-04-28 00:32:25 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
extern enum object_creation_mode object_creation_mode;
|
2009-04-25 11:57:14 +02:00
|
|
|
|
2009-10-09 12:21:57 +02:00
|
|
|
extern char *notes_ref_name;
|
|
|
|
|
2009-07-23 17:33:49 +02:00
|
|
|
extern int grafts_replace_parents;
|
|
|
|
|
introduce "extensions" form of core.repositoryformatversion
Normally we try to avoid bumps of the whole-repository
core.repositoryformatversion field. However, it is
unavoidable if we want to safely change certain aspects of
git in a backwards-incompatible way (e.g., modifying the set
of ref tips that we must traverse to generate a list of
unreachable, safe-to-prune objects).
If we were to bump the repository version for every such
change, then any implementation understanding version `X`
would also have to understand `X-1`, `X-2`, and so forth,
even though the incompatibilities may be in orthogonal parts
of the system, and there is otherwise no reason we cannot
implement one without the other (or more importantly, that
the user cannot choose to use one feature without the other,
weighing the tradeoff in compatibility only for that
particular feature).
This patch documents the existing repositoryformatversion
strategy and introduces a new format, "1", which lets a
repository specify that it must run with an arbitrary set of
extensions. This can be used, for example:
- to inform git that the objects should not be pruned based
only on the reachability of the ref tips (e.g, because it
has "clone --shared" children)
- that the refs are stored in a format besides the usual
"refs" and "packed-refs" directories
Because we bump to format "1", and because format "1"
requires that a running git knows about any extensions
mentioned, we know that older versions of the code will not
do something dangerous when confronted with these new
formats.
For example, if the user chooses to use database storage for
refs, they may set the "extensions.refbackend" config to
"db". Older versions of git will not understand format "1"
and bail. Versions of git which understand "1" but do not
know about "refbackend", or which know about "refbackend"
but not about the "db" backend, will refuse to run. This is
annoying, of course, but much better than the alternative of
claiming that there are no refs in the repository, or
writing to a location that other implementations will not
read.
Note that we are only defining the rules for format 1 here.
We do not ever write format 1 ourselves; it is a tool that
is meant to be used by users and future extensions to
provide safety with older implementations.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-06-23 12:53:58 +02:00
|
|
|
/*
|
|
|
|
* GIT_REPO_VERSION is the version we write by default. The
|
|
|
|
* _READ variant is the highest number we know how to
|
|
|
|
* handle.
|
|
|
|
*/
|
2005-11-26 00:59:09 +01:00
|
|
|
#define GIT_REPO_VERSION 0
|
introduce "extensions" form of core.repositoryformatversion
Normally we try to avoid bumps of the whole-repository
core.repositoryformatversion field. However, it is
unavoidable if we want to safely change certain aspects of
git in a backwards-incompatible way (e.g., modifying the set
of ref tips that we must traverse to generate a list of
unreachable, safe-to-prune objects).
If we were to bump the repository version for every such
change, then any implementation understanding version `X`
would also have to understand `X-1`, `X-2`, and so forth,
even though the incompatibilities may be in orthogonal parts
of the system, and there is otherwise no reason we cannot
implement one without the other (or more importantly, that
the user cannot choose to use one feature without the other,
weighing the tradeoff in compatibility only for that
particular feature).
This patch documents the existing repositoryformatversion
strategy and introduces a new format, "1", which lets a
repository specify that it must run with an arbitrary set of
extensions. This can be used, for example:
- to inform git that the objects should not be pruned based
only on the reachability of the ref tips (e.g, because it
has "clone --shared" children)
- that the refs are stored in a format besides the usual
"refs" and "packed-refs" directories
Because we bump to format "1", and because format "1"
requires that a running git knows about any extensions
mentioned, we know that older versions of the code will not
do something dangerous when confronted with these new
formats.
For example, if the user chooses to use database storage for
refs, they may set the "extensions.refbackend" config to
"db". Older versions of git will not understand format "1"
and bail. Versions of git which understand "1" but do not
know about "refbackend", or which know about "refbackend"
but not about the "db" backend, will refuse to run. This is
annoying, of course, but much better than the alternative of
claiming that there are no refs in the repository, or
writing to a location that other implementations will not
read.
Note that we are only defining the rules for format 1 here.
We do not ever write format 1 ourselves; it is a tool that
is meant to be used by users and future extensions to
provide safety with older implementations.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-06-23 12:53:58 +02:00
|
|
|
#define GIT_REPO_VERSION_READ 1
|
2015-06-23 12:54:11 +02:00
|
|
|
extern int repository_format_precious_objects;
|
2017-12-05 17:58:43 +01:00
|
|
|
extern char *repository_format_partial_clone;
|
2017-12-08 16:58:45 +01:00
|
|
|
extern const char *core_partial_clone_filter_default;
|
2016-03-11 23:36:45 +01:00
|
|
|
|
2016-03-11 23:37:07 +01:00
|
|
|
struct repository_format {
|
|
|
|
int version;
|
|
|
|
int precious_objects;
|
2017-12-05 17:58:43 +01:00
|
|
|
char *partial_clone; /* value of extensions.partialclone */
|
2016-03-11 23:37:07 +01:00
|
|
|
int is_bare;
|
2017-11-12 22:28:53 +01:00
|
|
|
int hash_algo;
|
2016-03-11 23:37:07 +01:00
|
|
|
char *work_tree;
|
|
|
|
struct string_list unknown_extensions;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Read the repository format characteristics from the config file "path" into
|
|
|
|
* "format" struct. Returns the numeric version. On error, -1 is returned,
|
|
|
|
* format->version is set to -1, and all other fields in the struct are
|
|
|
|
* undefined.
|
|
|
|
*/
|
|
|
|
int read_repository_format(struct repository_format *format, const char *path);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Verify that the repository described by repository_format is something we
|
|
|
|
* can read. If it is, return 0. Otherwise, return -1, and "err" will describe
|
|
|
|
* any errors encountered.
|
|
|
|
*/
|
|
|
|
int verify_repository_format(const struct repository_format *format,
|
|
|
|
struct strbuf *err);
|
|
|
|
|
2016-03-11 23:36:45 +01:00
|
|
|
/*
|
|
|
|
* Check the repository format version in the path found in get_git_dir(),
|
|
|
|
* and die if it is a version we don't understand. Generally one would
|
|
|
|
* set_git_dir() before calling this, and use it only for "are we in a valid
|
|
|
|
* repo?".
|
|
|
|
*/
|
|
|
|
extern void check_repository_format(void);
|
2005-11-26 00:59:09 +01:00
|
|
|
|
2005-04-09 18:48:20 +02:00
|
|
|
#define MTIME_CHANGED 0x0001
|
|
|
|
#define CTIME_CHANGED 0x0002
|
|
|
|
#define OWNER_CHANGED 0x0004
|
|
|
|
#define MODE_CHANGED 0x0008
|
|
|
|
#define INODE_CHANGED 0x0010
|
|
|
|
#define DATA_CHANGED 0x0020
|
2005-05-05 14:38:25 +02:00
|
|
|
#define TYPE_CHANGED 0x0040
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2014-02-21 17:32:06 +01:00
|
|
|
/*
|
2018-01-17 18:54:54 +01:00
|
|
|
* Put in `buf` the name of the file in the local object database that
|
|
|
|
* would be used to store a loose object with the specified sha1.
|
2014-02-21 17:32:06 +01:00
|
|
|
*/
|
2018-01-17 18:54:54 +01:00
|
|
|
extern void sha1_file_name(struct strbuf *buf, const unsigned char *sha1);
|
2014-02-21 17:32:06 +01:00
|
|
|
|
2015-09-24 23:05:45 +02:00
|
|
|
/*
|
|
|
|
* Return an abbreviated sha1 unique within this repository's object database.
|
|
|
|
* The result will be at least `len` characters long, and will be NUL
|
|
|
|
* terminated.
|
|
|
|
*
|
2016-10-20 08:19:19 +02:00
|
|
|
* The non-`_r` version returns a static buffer which remains valid until 4
|
|
|
|
* more calls to find_unique_abbrev are made.
|
2015-09-24 23:05:45 +02:00
|
|
|
*
|
|
|
|
* The `_r` variant writes to a buffer supplied by the caller, which must be at
|
|
|
|
* least `GIT_SHA1_HEXSZ + 1` bytes. The return value is the number of bytes
|
|
|
|
* written (excluding the NUL terminator).
|
|
|
|
*
|
|
|
|
* Note that while this version avoids the static buffer, it is not fully
|
|
|
|
* reentrant, as it calls into other non-reentrant git code.
|
|
|
|
*/
|
|
|
|
extern const char *find_unique_abbrev(const unsigned char *sha1, int len);
|
|
|
|
extern int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len);
|
|
|
|
|
2017-03-26 18:01:25 +02:00
|
|
|
extern const unsigned char null_sha1[GIT_MAX_RAWSZ];
|
2015-12-06 23:16:35 +01:00
|
|
|
extern const struct object_id null_oid;
|
2011-04-28 12:19:02 +02:00
|
|
|
|
|
|
|
static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2)
|
2006-08-15 22:37:19 +02:00
|
|
|
{
|
2017-08-09 12:16:45 +02:00
|
|
|
return memcmp(sha1, sha2, GIT_SHA1_RAWSZ);
|
2006-08-15 22:37:19 +02:00
|
|
|
}
|
2011-04-28 12:19:02 +02:00
|
|
|
|
2015-03-14 00:39:28 +01:00
|
|
|
static inline int oidcmp(const struct object_id *oid1, const struct object_id *oid2)
|
|
|
|
{
|
|
|
|
return hashcmp(oid1->hash, oid2->hash);
|
|
|
|
}
|
|
|
|
|
2011-04-28 12:19:02 +02:00
|
|
|
static inline int is_null_sha1(const unsigned char *sha1)
|
2006-08-17 20:54:57 +02:00
|
|
|
{
|
2011-04-28 12:19:02 +02:00
|
|
|
return !hashcmp(sha1, null_sha1);
|
2006-08-17 20:54:57 +02:00
|
|
|
}
|
2011-04-28 12:19:02 +02:00
|
|
|
|
2015-03-14 00:39:28 +01:00
|
|
|
static inline int is_null_oid(const struct object_id *oid)
|
|
|
|
{
|
|
|
|
return !hashcmp(oid->hash, null_sha1);
|
|
|
|
}
|
|
|
|
|
2006-08-23 08:49:00 +02:00
|
|
|
static inline void hashcpy(unsigned char *sha_dst, const unsigned char *sha_src)
|
|
|
|
{
|
2015-03-14 00:39:28 +01:00
|
|
|
memcpy(sha_dst, sha_src, GIT_SHA1_RAWSZ);
|
2006-08-23 08:49:00 +02:00
|
|
|
}
|
2015-03-14 00:39:28 +01:00
|
|
|
|
|
|
|
static inline void oidcpy(struct object_id *dst, const struct object_id *src)
|
|
|
|
{
|
|
|
|
hashcpy(dst->hash, src->hash);
|
2006-08-23 08:49:00 +02:00
|
|
|
}
|
2015-03-14 00:39:28 +01:00
|
|
|
|
2017-05-30 19:30:44 +02:00
|
|
|
static inline struct object_id *oiddup(const struct object_id *src)
|
|
|
|
{
|
|
|
|
struct object_id *dst = xmalloc(sizeof(struct object_id));
|
|
|
|
oidcpy(dst, src);
|
|
|
|
return dst;
|
|
|
|
}
|
|
|
|
|
2006-08-23 22:57:23 +02:00
|
|
|
static inline void hashclr(unsigned char *hash)
|
|
|
|
{
|
2015-03-14 00:39:28 +01:00
|
|
|
memset(hash, 0, GIT_SHA1_RAWSZ);
|
2006-08-23 22:57:23 +02:00
|
|
|
}
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2015-03-14 00:39:28 +01:00
|
|
|
static inline void oidclr(struct object_id *oid)
|
|
|
|
{
|
2018-01-28 01:13:14 +01:00
|
|
|
memset(oid->hash, 0, GIT_MAX_RAWSZ);
|
2015-03-14 00:39:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-11-12 09:17:52 +01:00
|
|
|
#define EMPTY_TREE_SHA1_HEX \
|
|
|
|
"4b825dc642cb6eb9a060e54bf8d69288fbee4904"
|
2011-02-07 09:17:27 +01:00
|
|
|
#define EMPTY_TREE_SHA1_BIN_LITERAL \
|
2008-11-12 09:17:52 +01:00
|
|
|
"\x4b\x82\x5d\xc6\x42\xcb\x6e\xb9\xa0\x60" \
|
|
|
|
"\xe5\x4b\xf8\xd6\x92\x88\xfb\xee\x49\x04"
|
2016-09-01 01:27:18 +02:00
|
|
|
extern const struct object_id empty_tree_oid;
|
|
|
|
#define EMPTY_TREE_SHA1_BIN (empty_tree_oid.hash)
|
2008-11-12 09:17:52 +01:00
|
|
|
|
2012-03-22 19:53:39 +01:00
|
|
|
#define EMPTY_BLOB_SHA1_HEX \
|
|
|
|
"e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"
|
|
|
|
#define EMPTY_BLOB_SHA1_BIN_LITERAL \
|
|
|
|
"\xe6\x9d\xe2\x9b\xb2\xd1\xd6\x43\x4b\x8b" \
|
|
|
|
"\x29\xae\x77\x5a\xd8\xc2\xe4\x8c\x53\x91"
|
2016-09-01 01:27:18 +02:00
|
|
|
extern const struct object_id empty_blob_oid;
|
2012-03-22 19:53:39 +01:00
|
|
|
|
|
|
|
static inline int is_empty_blob_sha1(const unsigned char *sha1)
|
|
|
|
{
|
2017-11-12 22:28:54 +01:00
|
|
|
return !hashcmp(sha1, the_hash_algo->empty_blob->hash);
|
2012-03-22 19:53:39 +01:00
|
|
|
}
|
|
|
|
|
2016-09-01 01:27:18 +02:00
|
|
|
static inline int is_empty_blob_oid(const struct object_id *oid)
|
|
|
|
{
|
2017-11-12 22:28:54 +01:00
|
|
|
return !oidcmp(oid, the_hash_algo->empty_blob);
|
2016-09-01 01:27:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline int is_empty_tree_sha1(const unsigned char *sha1)
|
|
|
|
{
|
2017-11-12 22:28:54 +01:00
|
|
|
return !hashcmp(sha1, the_hash_algo->empty_tree->hash);
|
2016-09-01 01:27:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline int is_empty_tree_oid(const struct object_id *oid)
|
|
|
|
{
|
2017-11-12 22:28:54 +01:00
|
|
|
return !oidcmp(oid, the_hash_algo->empty_tree);
|
2016-09-01 01:27:18 +02:00
|
|
|
}
|
|
|
|
|
2010-02-22 23:32:13 +01:00
|
|
|
/* set default permissions by passing mode arguments to open(2) */
|
|
|
|
int git_mkstemps_mode(char *pattern, int suffix_len, int mode);
|
|
|
|
int git_mkstemp_mode(char *pattern, int mode);
|
|
|
|
|
2008-04-16 10:34:24 +02:00
|
|
|
/*
|
|
|
|
* NOTE NOTE NOTE!!
|
|
|
|
*
|
|
|
|
* PERM_UMASK, OLD_PERM_GROUP and OLD_PERM_EVERYBODY enumerations must
|
|
|
|
* not be changed. Old repositories have core.sharedrepository written in
|
|
|
|
* numeric format, and therefore these values are preserved for compatibility
|
|
|
|
* reasons.
|
|
|
|
*/
|
2006-06-10 08:09:49 +02:00
|
|
|
enum sharedrepo {
|
2008-04-16 10:34:24 +02:00
|
|
|
PERM_UMASK = 0,
|
|
|
|
OLD_PERM_GROUP = 1,
|
|
|
|
OLD_PERM_EVERYBODY = 2,
|
|
|
|
PERM_GROUP = 0660,
|
2010-05-14 11:31:35 +02:00
|
|
|
PERM_EVERYBODY = 0664
|
2006-06-10 08:09:49 +02:00
|
|
|
};
|
|
|
|
int git_config_perm(const char *var, const char *value);
|
2013-03-30 10:53:32 +01:00
|
|
|
int adjust_shared_perm(const char *path);
|
2014-01-06 14:45:25 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Create the directory containing the named path, using care to be
|
2017-01-06 17:22:25 +01:00
|
|
|
* somewhat safe against races. Return one of the scld_error values to
|
|
|
|
* indicate success/failure. On error, set errno to describe the
|
|
|
|
* problem.
|
2014-01-06 14:45:27 +01:00
|
|
|
*
|
|
|
|
* SCLD_VANISHED indicates that one of the ancestor directories of the
|
|
|
|
* path existed at one point during the function call and then
|
|
|
|
* suddenly vanished, probably because another process pruned the
|
|
|
|
* directory while we were working. To be robust against this kind of
|
|
|
|
* race, callers might want to try invoking the function again when it
|
|
|
|
* returns SCLD_VANISHED.
|
2016-04-24 04:34:12 +02:00
|
|
|
*
|
|
|
|
* safe_create_leading_directories() temporarily changes path while it
|
|
|
|
* is working but restores it before returning.
|
|
|
|
* safe_create_leading_directories_const() doesn't modify path, even
|
|
|
|
* temporarily.
|
2014-01-06 14:45:25 +01:00
|
|
|
*/
|
|
|
|
enum scld_error {
|
|
|
|
SCLD_OK = 0,
|
|
|
|
SCLD_FAILED = -1,
|
|
|
|
SCLD_PERMS = -2,
|
2014-01-06 14:45:27 +01:00
|
|
|
SCLD_EXISTS = -3,
|
|
|
|
SCLD_VANISHED = -4
|
2014-01-06 14:45:25 +01:00
|
|
|
};
|
|
|
|
enum scld_error safe_create_leading_directories(char *path);
|
|
|
|
enum scld_error safe_create_leading_directories_const(const char *path);
|
|
|
|
|
2017-01-06 17:22:26 +01:00
|
|
|
/*
|
|
|
|
* Callback function for raceproof_create_file(). This function is
|
|
|
|
* expected to do something that makes dirname(path) permanent despite
|
|
|
|
* the fact that other processes might be cleaning up empty
|
|
|
|
* directories at the same time. Usually it will create a file named
|
|
|
|
* path, but alternatively it could create another file in that
|
|
|
|
* directory, or even chdir() into that directory. The function should
|
|
|
|
* return 0 if the action was completed successfully. On error, it
|
|
|
|
* should return a nonzero result and set errno.
|
|
|
|
* raceproof_create_file() treats two errno values specially:
|
|
|
|
*
|
|
|
|
* - ENOENT -- dirname(path) does not exist. In this case,
|
|
|
|
* raceproof_create_file() tries creating dirname(path)
|
|
|
|
* (and any parent directories, if necessary) and calls
|
|
|
|
* the function again.
|
|
|
|
*
|
|
|
|
* - EISDIR -- the file already exists and is a directory. In this
|
|
|
|
* case, raceproof_create_file() removes the directory if
|
|
|
|
* it is empty (and recursively any empty directories that
|
|
|
|
* it contains) and calls the function again.
|
|
|
|
*
|
|
|
|
* Any other errno causes raceproof_create_file() to fail with the
|
|
|
|
* callback's return value and errno.
|
|
|
|
*
|
|
|
|
* Obviously, this function should be OK with being called again if it
|
|
|
|
* fails with ENOENT or EISDIR. In other scenarios it will not be
|
|
|
|
* called again.
|
|
|
|
*/
|
|
|
|
typedef int create_file_fn(const char *path, void *cb);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Create a file in dirname(path) by calling fn, creating leading
|
|
|
|
* directories if necessary. Retry a few times in case we are racing
|
|
|
|
* with another process that is trying to clean up the directory that
|
|
|
|
* contains path. See the documentation for create_file_fn for more
|
|
|
|
* details.
|
|
|
|
*
|
|
|
|
* Return the value and set the errno that resulted from the most
|
|
|
|
* recent call of fn. fn is always called at least once, and will be
|
|
|
|
* called more than once if it returns ENOENT or EISDIR.
|
|
|
|
*/
|
|
|
|
int raceproof_create_file(const char *path, create_file_fn fn, void *cb);
|
|
|
|
|
2011-03-11 01:02:50 +01:00
|
|
|
int mkdir_in_gitdir(const char *path);
|
2017-04-05 12:24:38 +02:00
|
|
|
extern char *expand_user_path(const char *path, int real_home);
|
2011-10-04 22:02:00 +02:00
|
|
|
const char *enter_repo(const char *path, int strict);
|
2007-08-01 02:28:59 +02:00
|
|
|
static inline int is_absolute_path(const char *path)
|
|
|
|
{
|
2011-05-27 18:00:38 +02:00
|
|
|
return is_dir_sep(path[0]) || has_dos_drive_prefix(path);
|
2007-08-01 02:28:59 +02:00
|
|
|
}
|
2008-09-09 10:27:07 +02:00
|
|
|
int is_directory(const char *);
|
2016-12-12 19:16:53 +01:00
|
|
|
char *strbuf_realpath(struct strbuf *resolved, const char *path,
|
|
|
|
int die_on_error);
|
2011-03-17 12:26:46 +01:00
|
|
|
const char *real_path(const char *path);
|
2012-10-28 17:16:22 +01:00
|
|
|
const char *real_path_if_valid(const char *path);
|
2017-03-08 16:43:40 +01:00
|
|
|
char *real_pathdup(const char *path, int die_on_error);
|
2011-03-17 12:26:46 +01:00
|
|
|
const char *absolute_path(const char *path);
|
2017-01-26 18:47:45 +01:00
|
|
|
char *absolute_pathdup(const char *path);
|
2013-10-14 04:29:40 +02:00
|
|
|
const char *remove_leading_path(const char *in, const char *prefix);
|
2013-06-25 17:53:43 +02:00
|
|
|
const char *relative_path(const char *in, const char *prefix, struct strbuf *sb);
|
2013-07-14 10:36:03 +02:00
|
|
|
int normalize_path_copy_len(char *dst, const char *src, int *prefix_len);
|
2009-02-07 16:08:28 +01:00
|
|
|
int normalize_path_copy(char *dst, const char *src);
|
2012-10-28 17:16:24 +01:00
|
|
|
int longest_ancestor_length(const char *path, struct string_list *prefixes);
|
2009-02-19 20:10:49 +01:00
|
|
|
char *strip_path_suffix(const char *path, const char *suffix);
|
2009-11-09 20:26:43 +01:00
|
|
|
int daemon_avoid_alias(const char *path);
|
is_ntfs_dotgit: match other .git files
When we started to catch NTFS short names that clash with .git, we only
looked for GIT~1. This is sufficient because we only ever clone into an
empty directory, so .git is guaranteed to be the first subdirectory or
file in that directory.
However, even with a fresh clone, .gitmodules is *not* necessarily the
first file to be written that would want the NTFS short name GITMOD~1: a
malicious repository can add .gitmodul0000 and friends, which sorts
before `.gitmodules` and is therefore checked out *first*. For that
reason, we have to test not only for ~1 short names, but for others,
too.
It's hard to just adapt the existing checks in is_ntfs_dotgit(): since
Windows 2000 (i.e., in all Windows versions still supported by Git),
NTFS short names are only generated in the <prefix>~<number> form up to
number 4. After that, a *different* prefix is used, calculated from the
long file name using an undocumented, but stable algorithm.
For example, the short name of .gitmodules would be GITMOD~1, but if it
is taken, and all of ~2, ~3 and ~4 are taken, too, the short name
GI7EBA~1 will be used. From there, collisions are handled by
incrementing the number, shortening the prefix as needed (until ~9999999
is reached, in which case NTFS will not allow the file to be created).
We'd also want to handle .gitignore and .gitattributes, which suffer
from a similar problem, using the fall-back short names GI250A~1 and
GI7D29~1, respectively.
To accommodate for that, we could reimplement the hashing algorithm, but
it is just safer and simpler to provide the known prefixes. This
algorithm has been reverse-engineered and described at
https://usn.pw/blog/gen/2015/06/09/filenames/, which is defunct but
still available via https://web.archive.org/.
These can be recomputed by running the following Perl script:
-- snip --
use warnings;
use strict;
sub compute_short_name_hash ($) {
my $checksum = 0;
foreach (split('', $_[0])) {
$checksum = ($checksum * 0x25 + ord($_)) & 0xffff;
}
$checksum = ($checksum * 314159269) & 0xffffffff;
$checksum = 1 + (~$checksum & 0x7fffffff) if ($checksum & 0x80000000);
$checksum -= (($checksum * 1152921497) >> 60) * 1000000007;
return scalar reverse sprintf("%x", $checksum & 0xffff);
}
print compute_short_name_hash($ARGV[0]);
-- snap --
E.g., running that with the argument ".gitignore" will
result in "250a" (which then becomes "gi250a" in the code).
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Jeff King <peff@peff.net>
2018-05-11 16:03:54 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* These functions match their is_hfs_dotgit() counterparts; see utf8.h for
|
|
|
|
* details.
|
|
|
|
*/
|
|
|
|
int is_ntfs_dotgit(const char *name);
|
|
|
|
int is_ntfs_dotgitmodules(const char *name);
|
|
|
|
int is_ntfs_dotgitignore(const char *name);
|
|
|
|
int is_ntfs_dotgitattributes(const char *name);
|
2005-07-06 10:11:52 +02:00
|
|
|
|
2017-07-28 21:25:45 +02:00
|
|
|
/*
|
|
|
|
* Returns true iff "str" could be confused as a command-line option when
|
|
|
|
* passed to a sub-program like "ssh". Note that this has nothing to do with
|
|
|
|
* shell-quoting, which should be handled separately; we're assuming here that
|
|
|
|
* the string makes it verbatim to the sub-program.
|
|
|
|
*/
|
|
|
|
int looks_like_command_line_option(const char *str);
|
|
|
|
|
2015-04-21 06:06:27 +02:00
|
|
|
/**
|
|
|
|
* Return a newly allocated string with the evaluation of
|
|
|
|
* "$XDG_CONFIG_HOME/git/$filename" if $XDG_CONFIG_HOME is non-empty, otherwise
|
|
|
|
* "$HOME/.config/git/$filename". Return NULL upon error.
|
|
|
|
*/
|
|
|
|
extern char *xdg_config_home(const char *filename);
|
|
|
|
|
2017-03-13 21:43:54 +01:00
|
|
|
/**
|
|
|
|
* Return a newly allocated string with the evaluation of
|
|
|
|
* "$XDG_CACHE_HOME/git/$filename" if $XDG_CACHE_HOME is non-empty, otherwise
|
|
|
|
* "$HOME/.cache/git/$filename". Return NULL upon error.
|
|
|
|
*/
|
|
|
|
extern char *xdg_cache_home(const char *filename);
|
|
|
|
|
2017-06-22 02:40:19 +02:00
|
|
|
extern void *read_sha1_file_extended(const unsigned char *sha1,
|
|
|
|
enum object_type *type,
|
|
|
|
unsigned long *size, int lookup_replace);
|
2009-01-23 10:07:01 +01:00
|
|
|
static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size)
|
|
|
|
{
|
2017-06-22 02:40:19 +02:00
|
|
|
return read_sha1_file_extended(sha1, type, size, 1);
|
2011-05-15 21:54:54 +02:00
|
|
|
}
|
2014-02-28 17:29:16 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This internal function is only declared here for the benefit of
|
|
|
|
* lookup_replace_object(). Please do not call it directly.
|
|
|
|
*/
|
2011-05-15 21:54:53 +02:00
|
|
|
extern const unsigned char *do_lookup_replace_object(const unsigned char *sha1);
|
2014-02-28 17:29:16 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If object sha1 should be replaced, return the replacement object's
|
|
|
|
* name (replaced recursively, if necessary). The return value is
|
|
|
|
* either sha1 or a pointer to a permanently-allocated value. When
|
|
|
|
* object replacement is suppressed, always return sha1.
|
|
|
|
*/
|
2011-05-15 21:54:53 +02:00
|
|
|
static inline const unsigned char *lookup_replace_object(const unsigned char *sha1)
|
|
|
|
{
|
2014-02-18 12:24:55 +01:00
|
|
|
if (!check_replace_refs)
|
2011-05-15 21:54:53 +02:00
|
|
|
return sha1;
|
|
|
|
return do_lookup_replace_object(sha1);
|
2009-01-23 10:07:01 +01:00
|
|
|
}
|
2014-02-28 17:29:16 +01:00
|
|
|
|
2005-04-08 00:13:13 +02:00
|
|
|
/* Read and unpack a sha1 file into memory, write memory to a sha1 file */
|
2007-02-26 20:55:59 +01:00
|
|
|
extern int sha1_object_info(const unsigned char *, unsigned long *);
|
2018-01-28 01:13:13 +01:00
|
|
|
|
|
|
|
extern int hash_object_file(const void *buf, unsigned long len,
|
|
|
|
const char *type, struct object_id *oid);
|
|
|
|
|
2018-01-28 01:13:19 +01:00
|
|
|
extern int write_object_file(const void *buf, unsigned long len,
|
|
|
|
const char *type, struct object_id *oid);
|
|
|
|
|
2018-01-28 01:13:22 +01:00
|
|
|
extern int hash_object_file_literally(const void *buf, unsigned long len,
|
|
|
|
const char *type, struct object_id *oid,
|
|
|
|
unsigned flags);
|
2018-01-28 01:13:11 +01:00
|
|
|
|
|
|
|
extern int pretend_object_file(void *, unsigned long, enum object_type,
|
|
|
|
struct object_id *oid);
|
|
|
|
|
2018-01-28 01:13:20 +01:00
|
|
|
extern int force_object_loose(const struct object_id *oid, time_t mtime);
|
|
|
|
|
2016-10-28 15:23:07 +02:00
|
|
|
extern int git_open_cloexec(const char *name, int flags);
|
sha1_file: stop opening files with O_NOATIME
When we open object files, we try to do so with O_NOATIME.
This dates back to 144bde78e9 (Use O_NOATIME when opening
the sha1 files., 2005-04-23), which is an optimization to
avoid creating a bunch of dirty inodes when we're accessing
many objects. But a few things have changed since then:
1. In June 2005, git learned about packfiles, which means
we would do a lot fewer atime updates (rather than one
per object access, we'd generally get one per packfile).
2. In late 2006, Linux learned about "relatime", which is
generally the default on modern installs. So
performance around atimes updates is a non-issue there
these days.
All the world isn't Linux, but as it turns out, Linux
is the only platform to implement O_NOATIME in the
first place.
So it's very unlikely that this code is helping anybody
these days.
Helped-by: Jeff King <peff@peff.net>
[jc: took idea and log message from peff]
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-10-28 15:29:27 +02:00
|
|
|
#define git_open(name) git_open_cloexec(name, O_RDONLY)
|
2011-05-15 04:42:10 +02:00
|
|
|
extern void *map_sha1_file(const unsigned char *sha1, unsigned long *size);
|
2011-07-19 18:33:03 +02:00
|
|
|
extern int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz);
|
2011-05-15 04:42:10 +02:00
|
|
|
extern int parse_sha1_header(const char *hdr, unsigned long *sizep);
|
2005-04-24 03:47:23 +02:00
|
|
|
|
2005-06-03 17:05:39 +02:00
|
|
|
extern int check_sha1_signature(const unsigned char *sha1, void *buf, unsigned long size, const char *type);
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2015-08-07 23:40:24 +02:00
|
|
|
extern int finalize_object_file(const char *tmpfile, const char *filename);
|
2005-04-24 03:47:23 +02:00
|
|
|
|
2017-01-13 18:58:16 +01:00
|
|
|
/*
|
|
|
|
* Open the loose object at path, check its sha1, and return the contents,
|
|
|
|
* type, and size. If the object is a blob, then "contents" may return NULL,
|
|
|
|
* to allow streaming of large blobs.
|
|
|
|
*
|
|
|
|
* Returns 0 on success, negative on error (details may be written to stderr).
|
|
|
|
*/
|
|
|
|
int read_loose_object(const char *path,
|
|
|
|
const unsigned char *expected_sha1,
|
|
|
|
enum object_type *type,
|
|
|
|
unsigned long *size,
|
|
|
|
void **contents);
|
|
|
|
|
2014-02-21 17:32:06 +01:00
|
|
|
/*
|
2017-06-22 02:40:24 +02:00
|
|
|
* Convenience for sha1_object_info_extended() with a NULL struct
|
|
|
|
* object_info. OBJECT_INFO_SKIP_CACHED is automatically set; pass
|
|
|
|
* nonzero flags to also set other flags.
|
2014-02-21 17:32:06 +01:00
|
|
|
*/
|
2015-06-09 19:24:37 +02:00
|
|
|
extern int has_sha1_file_with_flags(const unsigned char *sha1, int flags);
|
|
|
|
static inline int has_sha1_file(const unsigned char *sha1)
|
|
|
|
{
|
|
|
|
return has_sha1_file_with_flags(sha1, 0);
|
|
|
|
}
|
2014-02-21 17:32:06 +01:00
|
|
|
|
2015-11-10 03:22:19 +01:00
|
|
|
/* Same as the above, except for struct object_id. */
|
|
|
|
extern int has_object_file(const struct object_id *oid);
|
fetch: use "quick" has_sha1_file for tag following
When we auto-follow tags in a fetch, we look at all of the
tags advertised by the remote and fetch ones where we don't
already have the tag, but we do have the object it peels to.
This involves a lot of calls to has_sha1_file(), some of
which we can reasonably expect to fail. Since 45e8a74
(has_sha1_file: re-check pack directory before giving up,
2013-08-30), this may cause many calls to
reprepare_packed_git(), which is potentially expensive.
This has gone unnoticed for several years because it
requires a fairly unique setup to matter:
1. You need to have a lot of packs on the client side to
make reprepare_packed_git() expensive (the most
expensive part is finding duplicates in an unsorted
list, which is currently quadratic).
2. You need a large number of tag refs on the server side
that are candidates for auto-following (i.e., that the
client doesn't have). Each one triggers a re-read of
the pack directory.
3. Under normal circumstances, the client would
auto-follow those tags and after one large fetch, (2)
would no longer be true. But if those tags point to
history which is disconnected from what the client
otherwise fetches, then it will never auto-follow, and
those candidates will impact it on every fetch.
So when all three are true, each fetch pays an extra
O(nr_tags * nr_packs^2) cost, mostly in string comparisons
on the pack names. This was exacerbated by 47bf4b0
(prepare_packed_git_one: refactor duplicate-pack check,
2014-06-30) which uses a slightly more expensive string
check, under the assumption that the duplicate check doesn't
happen very often (and it shouldn't; the real problem here
is how often we are calling reprepare_packed_git()).
This patch teaches fetch to use HAS_SHA1_QUICK to sacrifice
accuracy for speed, in cases where we might be racy with a
simultaneous repack. This is similar to the fix in 0eeb077
(index-pack: avoid excessive re-reading of pack directory,
2015-06-09). As with that case, it's OK for has_sha1_file()
occasionally say "no I don't have it" when we do, because
the worst case is not a corruption, but simply that we may
fail to auto-follow a tag that points to it.
Here are results from the included perf script, which sets
up a situation similar to the one described above:
Test HEAD^ HEAD
----------------------------------------------------------
5550.4: fetch 11.21(10.42+0.78) 0.08(0.04+0.02) -99.3%
Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-10-13 18:53:44 +02:00
|
|
|
extern int has_object_file_with_flags(const struct object_id *oid, int flags);
|
2015-11-10 03:22:19 +01:00
|
|
|
|
2014-02-21 17:32:06 +01:00
|
|
|
/*
|
|
|
|
* Return true iff an alternate object database has a loose object
|
|
|
|
* with the specified name. This function does not respect replace
|
|
|
|
* references.
|
|
|
|
*/
|
2008-11-10 06:59:57 +01:00
|
|
|
extern int has_loose_object_nonlocal(const unsigned char *sha1);
|
2005-04-24 03:47:23 +02:00
|
|
|
|
make commit_tree a library function
Until now, this has been part of the commit-tree builtin.
However, it is already used by other builtins (like commit,
merge, and notes), and it would be useful to access it from
library code.
The check_valid helper has to come along, too, but is given
a more library-ish name of "assert_sha1_type".
Otherwise, the code is unchanged. There are still a few
rough edges for a library function, like printing the utf8
warning to stderr, but we can address those if and when they
come up as inappropriate.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-04-02 02:05:23 +02:00
|
|
|
extern void assert_sha1_type(const unsigned char *sha1, enum object_type expect);
|
|
|
|
|
2017-02-27 19:00:11 +01:00
|
|
|
/* Helper to check and "touch" a file */
|
|
|
|
extern int check_and_freshen_file(const char *fn, int freshen);
|
|
|
|
|
2007-05-30 19:32:19 +02:00
|
|
|
extern const signed char hexval_table[256];
|
|
|
|
static inline unsigned int hexval(unsigned char c)
|
2006-09-21 01:04:46 +02:00
|
|
|
{
|
|
|
|
return hexval_table[c];
|
|
|
|
}
|
|
|
|
|
2016-09-03 17:59:20 +02:00
|
|
|
/*
|
|
|
|
* Convert two consecutive hexadecimal digits into a char. Return a
|
|
|
|
* negative value on error. Don't run over the end of short strings.
|
|
|
|
*/
|
|
|
|
static inline int hex2chr(const char *s)
|
|
|
|
{
|
2017-09-21 18:48:38 +02:00
|
|
|
unsigned int val = hexval(s[0]);
|
|
|
|
return (val & ~0xf) ? val : (val << 4) | hexval(s[1]);
|
2016-09-03 17:59:20 +02:00
|
|
|
}
|
|
|
|
|
2005-04-08 00:13:13 +02:00
|
|
|
/* Convert to/from hex/sha1 representation */
|
2010-10-28 20:28:04 +02:00
|
|
|
#define MINIMUM_ABBREV minimum_abbrev
|
|
|
|
#define DEFAULT_ABBREV default_abbrev
|
2006-01-25 10:03:18 +01:00
|
|
|
|
2016-10-01 02:19:35 +02:00
|
|
|
/* used when the code does not know or care what the default abbrev is */
|
|
|
|
#define FALLBACK_DEFAULT_ABBREV 7
|
|
|
|
|
2010-06-09 19:02:06 +02:00
|
|
|
struct object_context {
|
|
|
|
unsigned char tree[20];
|
|
|
|
unsigned mode;
|
2015-05-20 19:03:39 +02:00
|
|
|
/*
|
|
|
|
* symlink_path is only used by get_tree_entry_follow_symlinks,
|
|
|
|
* and only for symlinks that point outside the repository.
|
|
|
|
*/
|
|
|
|
struct strbuf symlink_path;
|
2017-05-19 14:54:43 +02:00
|
|
|
/*
|
2017-07-14 01:49:29 +02:00
|
|
|
* If GET_OID_RECORD_PATH is set, this will record path (if any)
|
2017-05-19 14:54:43 +02:00
|
|
|
* found when resolving the name. The caller is responsible for
|
|
|
|
* releasing the memory.
|
|
|
|
*/
|
|
|
|
char *path;
|
2010-06-09 19:02:06 +02:00
|
|
|
};
|
|
|
|
|
2017-07-14 01:49:29 +02:00
|
|
|
#define GET_OID_QUIETLY 01
|
|
|
|
#define GET_OID_COMMIT 02
|
|
|
|
#define GET_OID_COMMITTISH 04
|
|
|
|
#define GET_OID_TREE 010
|
|
|
|
#define GET_OID_TREEISH 020
|
|
|
|
#define GET_OID_BLOB 040
|
|
|
|
#define GET_OID_FOLLOW_SYMLINKS 0100
|
|
|
|
#define GET_OID_RECORD_PATH 0200
|
|
|
|
#define GET_OID_ONLY_TO_DIE 04000
|
|
|
|
|
|
|
|
#define GET_OID_DISAMBIGUATORS \
|
|
|
|
(GET_OID_COMMIT | GET_OID_COMMITTISH | \
|
|
|
|
GET_OID_TREE | GET_OID_TREEISH | \
|
|
|
|
GET_OID_BLOB)
|
2011-09-23 15:38:36 +02:00
|
|
|
|
2016-04-18 01:10:36 +02:00
|
|
|
extern int get_oid(const char *str, struct object_id *oid);
|
sha1_name: convert get_sha1* to get_oid*
Now that all the callers of get_sha1 directly or indirectly use struct
object_id, rename the functions starting with get_sha1 to start with
get_oid. Convert the internals in sha1_name.c to use struct object_id
as well, and eliminate explicit length checks where possible. Convert a
use of 40 in get_oid_basic to GIT_SHA1_HEXSZ.
Outside of sha1_name.c and cache.h, this transition was made with the
following semantic patch:
@@
expression E1, E2;
@@
- get_sha1(E1, E2.hash)
+ get_oid(E1, &E2)
@@
expression E1, E2;
@@
- get_sha1(E1, E2->hash)
+ get_oid(E1, E2)
@@
expression E1, E2;
@@
- get_sha1_committish(E1, E2.hash)
+ get_oid_committish(E1, &E2)
@@
expression E1, E2;
@@
- get_sha1_committish(E1, E2->hash)
+ get_oid_committish(E1, E2)
@@
expression E1, E2;
@@
- get_sha1_treeish(E1, E2.hash)
+ get_oid_treeish(E1, &E2)
@@
expression E1, E2;
@@
- get_sha1_treeish(E1, E2->hash)
+ get_oid_treeish(E1, E2)
@@
expression E1, E2;
@@
- get_sha1_commit(E1, E2.hash)
+ get_oid_commit(E1, &E2)
@@
expression E1, E2;
@@
- get_sha1_commit(E1, E2->hash)
+ get_oid_commit(E1, E2)
@@
expression E1, E2;
@@
- get_sha1_tree(E1, E2.hash)
+ get_oid_tree(E1, &E2)
@@
expression E1, E2;
@@
- get_sha1_tree(E1, E2->hash)
+ get_oid_tree(E1, E2)
@@
expression E1, E2;
@@
- get_sha1_blob(E1, E2.hash)
+ get_oid_blob(E1, &E2)
@@
expression E1, E2;
@@
- get_sha1_blob(E1, E2->hash)
+ get_oid_blob(E1, E2)
@@
expression E1, E2, E3, E4;
@@
- get_sha1_with_context(E1, E2, E3.hash, E4)
+ get_oid_with_context(E1, E2, &E3, E4)
@@
expression E1, E2, E3, E4;
@@
- get_sha1_with_context(E1, E2, E3->hash, E4)
+ get_oid_with_context(E1, E2, E3, E4)
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-14 01:49:28 +02:00
|
|
|
extern int get_oid_commit(const char *str, struct object_id *oid);
|
|
|
|
extern int get_oid_committish(const char *str, struct object_id *oid);
|
|
|
|
extern int get_oid_tree(const char *str, struct object_id *oid);
|
|
|
|
extern int get_oid_treeish(const char *str, struct object_id *oid);
|
|
|
|
extern int get_oid_blob(const char *str, struct object_id *oid);
|
2012-07-02 20:01:25 +02:00
|
|
|
extern void maybe_die_on_misspelt_object_name(const char *name, const char *prefix);
|
sha1_name: convert get_sha1* to get_oid*
Now that all the callers of get_sha1 directly or indirectly use struct
object_id, rename the functions starting with get_sha1 to start with
get_oid. Convert the internals in sha1_name.c to use struct object_id
as well, and eliminate explicit length checks where possible. Convert a
use of 40 in get_oid_basic to GIT_SHA1_HEXSZ.
Outside of sha1_name.c and cache.h, this transition was made with the
following semantic patch:
@@
expression E1, E2;
@@
- get_sha1(E1, E2.hash)
+ get_oid(E1, &E2)
@@
expression E1, E2;
@@
- get_sha1(E1, E2->hash)
+ get_oid(E1, E2)
@@
expression E1, E2;
@@
- get_sha1_committish(E1, E2.hash)
+ get_oid_committish(E1, &E2)
@@
expression E1, E2;
@@
- get_sha1_committish(E1, E2->hash)
+ get_oid_committish(E1, E2)
@@
expression E1, E2;
@@
- get_sha1_treeish(E1, E2.hash)
+ get_oid_treeish(E1, &E2)
@@
expression E1, E2;
@@
- get_sha1_treeish(E1, E2->hash)
+ get_oid_treeish(E1, E2)
@@
expression E1, E2;
@@
- get_sha1_commit(E1, E2.hash)
+ get_oid_commit(E1, &E2)
@@
expression E1, E2;
@@
- get_sha1_commit(E1, E2->hash)
+ get_oid_commit(E1, E2)
@@
expression E1, E2;
@@
- get_sha1_tree(E1, E2.hash)
+ get_oid_tree(E1, &E2)
@@
expression E1, E2;
@@
- get_sha1_tree(E1, E2->hash)
+ get_oid_tree(E1, E2)
@@
expression E1, E2;
@@
- get_sha1_blob(E1, E2.hash)
+ get_oid_blob(E1, &E2)
@@
expression E1, E2;
@@
- get_sha1_blob(E1, E2->hash)
+ get_oid_blob(E1, E2)
@@
expression E1, E2, E3, E4;
@@
- get_sha1_with_context(E1, E2, E3.hash, E4)
+ get_oid_with_context(E1, E2, &E3, E4)
@@
expression E1, E2, E3, E4;
@@
- get_sha1_with_context(E1, E2, E3->hash, E4)
+ get_oid_with_context(E1, E2, E3, E4)
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-14 01:49:28 +02:00
|
|
|
extern int get_oid_with_context(const char *str, unsigned flags, struct object_id *oid, struct object_context *oc);
|
2011-09-23 15:38:36 +02:00
|
|
|
|
2016-04-18 01:10:36 +02:00
|
|
|
|
2017-03-31 03:39:59 +02:00
|
|
|
typedef int each_abbrev_fn(const struct object_id *oid, void *);
|
2012-07-03 23:21:59 +02:00
|
|
|
extern int for_each_abbrev(const char *prefix, each_abbrev_fn, void *);
|
2011-09-23 15:38:36 +02:00
|
|
|
|
2016-09-27 14:38:01 +02:00
|
|
|
extern int set_disambiguate_hint_config(const char *var, const char *value);
|
|
|
|
|
2011-09-23 15:38:36 +02:00
|
|
|
/*
|
|
|
|
* Try to read a SHA1 in hexadecimal format from the 40 characters
|
|
|
|
* starting at hex. Write the 20-byte result to sha1 in binary form.
|
|
|
|
* Return 0 on success. Reading stops if a NUL is encountered in the
|
|
|
|
* input, so it is safe to pass this function an arbitrary
|
|
|
|
* null-terminated string.
|
|
|
|
*/
|
2005-04-09 21:09:27 +02:00
|
|
|
extern int get_sha1_hex(const char *hex, unsigned char *sha1);
|
2015-03-14 00:39:28 +01:00
|
|
|
extern int get_oid_hex(const char *hex, struct object_id *sha1);
|
2011-09-23 15:38:36 +02:00
|
|
|
|
2017-10-31 14:46:49 +01:00
|
|
|
/*
|
|
|
|
* Read `len` pairs of hexadecimal digits from `hex` and write the
|
|
|
|
* values to `binary` as `len` bytes. Return 0 on success, or -1 if
|
|
|
|
* the input does not consist of hex digits).
|
|
|
|
*/
|
|
|
|
extern int hex_to_bytes(unsigned char *binary, const char *hex, size_t len);
|
|
|
|
|
2015-09-24 23:05:45 +02:00
|
|
|
/*
|
|
|
|
* Convert a binary sha1 to its hex equivalent. The `_r` variant is reentrant,
|
|
|
|
* and writes the NUL-terminated output to the buffer `out`, which must be at
|
|
|
|
* least `GIT_SHA1_HEXSZ + 1` bytes, and returns a pointer to out for
|
|
|
|
* convenience.
|
|
|
|
*
|
|
|
|
* The non-`_r` variant returns a static buffer, but uses a ring of 4
|
|
|
|
* buffers, making it safe to make multiple calls for a single statement, like:
|
|
|
|
*
|
|
|
|
* printf("%s -> %s", sha1_to_hex(one), sha1_to_hex(two));
|
|
|
|
*/
|
|
|
|
extern char *sha1_to_hex_r(char *out, const unsigned char *sha1);
|
2016-06-25 01:09:19 +02:00
|
|
|
extern char *oid_to_hex_r(char *out, const struct object_id *oid);
|
2005-04-09 21:09:27 +02:00
|
|
|
extern char *sha1_to_hex(const unsigned char *sha1); /* static buffer result! */
|
2015-03-14 00:39:28 +01:00
|
|
|
extern char *oid_to_hex(const struct object_id *oid); /* same static buffer as sha1_to_hex */
|
2011-09-15 23:10:42 +02:00
|
|
|
|
2017-02-20 01:10:13 +01:00
|
|
|
/*
|
|
|
|
* Parse a 40-character hexadecimal object ID starting from hex, updating the
|
|
|
|
* pointer specified by end when parsing stops. The resulting object ID is
|
|
|
|
* stored in oid. Returns 0 on success. Parsing will stop on the first NUL or
|
|
|
|
* other invalid character. end is only updated on success; otherwise, it is
|
|
|
|
* unmodified.
|
|
|
|
*/
|
|
|
|
extern int parse_oid_hex(const char *hex, struct object_id *oid, const char **end);
|
|
|
|
|
2017-03-02 09:21:23 +01:00
|
|
|
/*
|
|
|
|
* This reads short-hand syntax that not only evaluates to a commit
|
|
|
|
* object name, but also can act as if the end user spelled the name
|
|
|
|
* of the branch from the command line.
|
|
|
|
*
|
|
|
|
* - "@{-N}" finds the name of the Nth previous branch we were on, and
|
|
|
|
* places the name of the branch in the given buf and returns the
|
|
|
|
* number of characters parsed if successful.
|
|
|
|
*
|
|
|
|
* - "<branch>@{upstream}" finds the name of the other ref that
|
|
|
|
* <branch> is configured to merge with (missing <branch> defaults
|
|
|
|
* to the current branch), and places the name of the branch in the
|
|
|
|
* given buf and returns the number of characters parsed if
|
|
|
|
* successful.
|
|
|
|
*
|
|
|
|
* If the input is not of the accepted format, it returns a negative
|
|
|
|
* number to signal an error.
|
|
|
|
*
|
|
|
|
* If the input was ok but there are not N branch switches in the
|
|
|
|
* reflog, it returns 0.
|
interpret_branch_name: allow callers to restrict expansions
The interpret_branch_name() function converts names like
@{-1} and @{upstream} into branch names. The expanded ref
names are not fully qualified, and may be outside of the
refs/heads/ namespace (e.g., "@" expands to "HEAD", and
"@{upstream}" is likely to be in "refs/remotes/").
This is OK for callers like dwim_ref() which are primarily
interested in resolving the resulting name, no matter where
it is. But callers like "git branch" treat the result as a
branch name in refs/heads/. When we expand to a ref outside
that namespace, the results are very confusing (e.g., "git
branch @" tries to create refs/heads/HEAD, which is
nonsense).
Callers can't know from the returned string how the
expansion happened (e.g., did the user really ask for a
branch named "HEAD", or did we do a bogus expansion?). One
fix would be to return some out-parameters describing the
types of expansion that occurred. This has the benefit that
the caller can generate precise error messages ("I
understood @{upstream} to mean origin/master, but that is a
remote tracking branch, so you cannot create it as a local
name").
However, out-parameters make the function interface somewhat
cumbersome. Instead, let's do the opposite: let the caller
tell us which elements to expand. That's easier to pass in,
and none of the callers give more precise error messages
than "@{upstream} isn't a valid branch name" anyway (which
should be sufficient).
The strbuf_branchname() function needs a similar parameter,
as most of the callers access interpret_branch_name()
through it.
We can break the callers down into two groups:
1. Callers that are happy with any kind of ref in the
result. We pass "0" here, so they continue to work
without restrictions. This includes merge_name(),
the reflog handling in add_pending_object_with_path(),
and substitute_branch_name(). This last is what powers
dwim_ref().
2. Callers that have funny corner cases (mostly in
git-branch and git-checkout). These need to make use of
the new parameter, but I've left them as "0" in this
patch, and will address them individually in follow-on
patches.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-02 09:23:01 +01:00
|
|
|
*
|
|
|
|
* If "allowed" is non-zero, it is a treated as a bitfield of allowable
|
|
|
|
* expansions: local branches ("refs/heads/"), remote branches
|
|
|
|
* ("refs/remotes/"), or "HEAD". If no "allowed" bits are set, any expansion is
|
|
|
|
* allowed, even ones to refs outside of those namespaces.
|
2017-03-02 09:21:23 +01:00
|
|
|
*/
|
interpret_branch_name: allow callers to restrict expansions
The interpret_branch_name() function converts names like
@{-1} and @{upstream} into branch names. The expanded ref
names are not fully qualified, and may be outside of the
refs/heads/ namespace (e.g., "@" expands to "HEAD", and
"@{upstream}" is likely to be in "refs/remotes/").
This is OK for callers like dwim_ref() which are primarily
interested in resolving the resulting name, no matter where
it is. But callers like "git branch" treat the result as a
branch name in refs/heads/. When we expand to a ref outside
that namespace, the results are very confusing (e.g., "git
branch @" tries to create refs/heads/HEAD, which is
nonsense).
Callers can't know from the returned string how the
expansion happened (e.g., did the user really ask for a
branch named "HEAD", or did we do a bogus expansion?). One
fix would be to return some out-parameters describing the
types of expansion that occurred. This has the benefit that
the caller can generate precise error messages ("I
understood @{upstream} to mean origin/master, but that is a
remote tracking branch, so you cannot create it as a local
name").
However, out-parameters make the function interface somewhat
cumbersome. Instead, let's do the opposite: let the caller
tell us which elements to expand. That's easier to pass in,
and none of the callers give more precise error messages
than "@{upstream} isn't a valid branch name" anyway (which
should be sufficient).
The strbuf_branchname() function needs a similar parameter,
as most of the callers access interpret_branch_name()
through it.
We can break the callers down into two groups:
1. Callers that are happy with any kind of ref in the
result. We pass "0" here, so they continue to work
without restrictions. This includes merge_name(),
the reflog handling in add_pending_object_with_path(),
and substitute_branch_name(). This last is what powers
dwim_ref().
2. Callers that have funny corner cases (mostly in
git-branch and git-checkout). These need to make use of
the new parameter, but I've left them as "0" in this
patch, and will address them individually in follow-on
patches.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-02 09:23:01 +01:00
|
|
|
#define INTERPRET_BRANCH_LOCAL (1<<0)
|
|
|
|
#define INTERPRET_BRANCH_REMOTE (1<<1)
|
|
|
|
#define INTERPRET_BRANCH_HEAD (1<<2)
|
|
|
|
extern int interpret_branch_name(const char *str, int len, struct strbuf *,
|
|
|
|
unsigned allowed);
|
2016-09-05 22:08:07 +02:00
|
|
|
extern int get_oid_mb(const char *str, struct object_id *oid);
|
2007-01-19 10:15:15 +01:00
|
|
|
|
2007-01-02 08:31:08 +01:00
|
|
|
extern int validate_headref(const char *ref);
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2005-05-20 18:09:18 +02:00
|
|
|
extern int base_name_compare(const char *name1, int len1, int mode1, const char *name2, int len2, int mode2);
|
2008-03-06 03:25:10 +01:00
|
|
|
extern int df_name_compare(const char *name1, int len1, int mode1, const char *name2, int len2, int mode2);
|
2014-06-20 04:06:44 +02:00
|
|
|
extern int name_compare(const char *name1, size_t len1, const char *name2, size_t len2);
|
2012-07-11 11:22:37 +02:00
|
|
|
extern int cache_name_stage_compare(const char *name1, int len1, int stage1, const char *name2, int len2, int stage2);
|
2005-04-08 00:13:13 +02:00
|
|
|
|
2005-04-29 01:42:27 +02:00
|
|
|
extern void *read_object_with_reference(const unsigned char *sha1,
|
2005-05-18 14:14:09 +02:00
|
|
|
const char *required_type,
|
2005-04-29 01:42:27 +02:00
|
|
|
unsigned long *size,
|
|
|
|
unsigned char *sha1_ret);
|
2005-04-21 03:06:49 +02:00
|
|
|
|
2007-12-24 09:51:01 +01:00
|
|
|
extern struct object *peel_to_type(const char *name, int namelen,
|
|
|
|
struct object *o, enum object_type);
|
|
|
|
|
convert "enum date_mode" into a struct
In preparation for adding date modes that may carry extra
information beyond the mode itself, this patch converts the
date_mode enum into a struct.
Most of the conversion is fairly straightforward; we pass
the struct as a pointer and dereference the type field where
necessary. Locations that declare a date_mode can use a "{}"
constructor. However, the tricky case is where we use the
enum labels as constants, like:
show_date(t, tz, DATE_NORMAL);
Ideally we could say:
show_date(t, tz, &{ DATE_NORMAL });
but of course C does not allow that. Likewise, we cannot
cast the constant to a struct, because we need to pass an
actual address. Our options are basically:
1. Manually add a "struct date_mode d = { DATE_NORMAL }"
definition to each caller, and pass "&d". This makes
the callers uglier, because they sometimes do not even
have their own scope (e.g., they are inside a switch
statement).
2. Provide a pre-made global "date_normal" struct that can
be passed by address. We'd also need "date_rfc2822",
"date_iso8601", and so forth. But at least the ugliness
is defined in one place.
3. Provide a wrapper that generates the correct struct on
the fly. The big downside is that we end up pointing to
a single global, which makes our wrapper non-reentrant.
But show_date is already not reentrant, so it does not
matter.
This patch implements 3, along with a minor macro to keep
the size of the callers sane.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-06-25 18:55:02 +02:00
|
|
|
struct date_mode {
|
|
|
|
enum date_mode_type {
|
|
|
|
DATE_NORMAL = 0,
|
|
|
|
DATE_RELATIVE,
|
|
|
|
DATE_SHORT,
|
|
|
|
DATE_ISO8601,
|
|
|
|
DATE_ISO8601_STRICT,
|
|
|
|
DATE_RFC2822,
|
2015-06-25 18:55:45 +02:00
|
|
|
DATE_STRFTIME,
|
2016-07-22 21:51:49 +02:00
|
|
|
DATE_RAW,
|
|
|
|
DATE_UNIX
|
convert "enum date_mode" into a struct
In preparation for adding date modes that may carry extra
information beyond the mode itself, this patch converts the
date_mode enum into a struct.
Most of the conversion is fairly straightforward; we pass
the struct as a pointer and dereference the type field where
necessary. Locations that declare a date_mode can use a "{}"
constructor. However, the tricky case is where we use the
enum labels as constants, like:
show_date(t, tz, DATE_NORMAL);
Ideally we could say:
show_date(t, tz, &{ DATE_NORMAL });
but of course C does not allow that. Likewise, we cannot
cast the constant to a struct, because we need to pass an
actual address. Our options are basically:
1. Manually add a "struct date_mode d = { DATE_NORMAL }"
definition to each caller, and pass "&d". This makes
the callers uglier, because they sometimes do not even
have their own scope (e.g., they are inside a switch
statement).
2. Provide a pre-made global "date_normal" struct that can
be passed by address. We'd also need "date_rfc2822",
"date_iso8601", and so forth. But at least the ugliness
is defined in one place.
3. Provide a wrapper that generates the correct struct on
the fly. The big downside is that we end up pointing to
a single global, which makes our wrapper non-reentrant.
But show_date is already not reentrant, so it does not
matter.
This patch implements 3, along with a minor macro to keep
the size of the callers sane.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-06-25 18:55:02 +02:00
|
|
|
} type;
|
2015-06-25 18:55:45 +02:00
|
|
|
const char *strftime_fmt;
|
2015-09-03 23:48:59 +02:00
|
|
|
int local;
|
2007-07-14 08:14:52 +02:00
|
|
|
};
|
|
|
|
|
convert "enum date_mode" into a struct
In preparation for adding date modes that may carry extra
information beyond the mode itself, this patch converts the
date_mode enum into a struct.
Most of the conversion is fairly straightforward; we pass
the struct as a pointer and dereference the type field where
necessary. Locations that declare a date_mode can use a "{}"
constructor. However, the tricky case is where we use the
enum labels as constants, like:
show_date(t, tz, DATE_NORMAL);
Ideally we could say:
show_date(t, tz, &{ DATE_NORMAL });
but of course C does not allow that. Likewise, we cannot
cast the constant to a struct, because we need to pass an
actual address. Our options are basically:
1. Manually add a "struct date_mode d = { DATE_NORMAL }"
definition to each caller, and pass "&d". This makes
the callers uglier, because they sometimes do not even
have their own scope (e.g., they are inside a switch
statement).
2. Provide a pre-made global "date_normal" struct that can
be passed by address. We'd also need "date_rfc2822",
"date_iso8601", and so forth. But at least the ugliness
is defined in one place.
3. Provide a wrapper that generates the correct struct on
the fly. The big downside is that we end up pointing to
a single global, which makes our wrapper non-reentrant.
But show_date is already not reentrant, so it does not
matter.
This patch implements 3, along with a minor macro to keep
the size of the callers sane.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-06-25 18:55:02 +02:00
|
|
|
/*
|
|
|
|
* Convenience helper for passing a constant type, like:
|
|
|
|
*
|
|
|
|
* show_date(t, tz, DATE_MODE(NORMAL));
|
|
|
|
*/
|
|
|
|
#define DATE_MODE(t) date_mode_from_type(DATE_##t)
|
|
|
|
struct date_mode *date_mode_from_type(enum date_mode_type type);
|
|
|
|
|
2017-04-26 21:29:31 +02:00
|
|
|
const char *show_date(timestamp_t time, int timezone, const struct date_mode *mode);
|
|
|
|
void show_date_relative(timestamp_t time, int tz, const struct timeval *now,
|
2012-04-23 14:30:23 +02:00
|
|
|
struct strbuf *timebuf);
|
2014-08-27 09:57:08 +02:00
|
|
|
int parse_date(const char *date, struct strbuf *out);
|
2017-04-26 21:29:31 +02:00
|
|
|
int parse_date_basic(const char *date, timestamp_t *timestamp, int *offset);
|
|
|
|
int parse_expiry_date(const char *date, timestamp_t *timestamp);
|
2014-08-27 09:57:08 +02:00
|
|
|
void datestamp(struct strbuf *out);
|
2010-01-26 20:58:00 +01:00
|
|
|
#define approxidate(s) approxidate_careful((s), NULL)
|
2017-04-26 21:29:31 +02:00
|
|
|
timestamp_t approxidate_careful(const char *, int *);
|
|
|
|
timestamp_t approxidate_relative(const char *date, const struct timeval *now);
|
convert "enum date_mode" into a struct
In preparation for adding date modes that may carry extra
information beyond the mode itself, this patch converts the
date_mode enum into a struct.
Most of the conversion is fairly straightforward; we pass
the struct as a pointer and dereference the type field where
necessary. Locations that declare a date_mode can use a "{}"
constructor. However, the tricky case is where we use the
enum labels as constants, like:
show_date(t, tz, DATE_NORMAL);
Ideally we could say:
show_date(t, tz, &{ DATE_NORMAL });
but of course C does not allow that. Likewise, we cannot
cast the constant to a struct, because we need to pass an
actual address. Our options are basically:
1. Manually add a "struct date_mode d = { DATE_NORMAL }"
definition to each caller, and pass "&d". This makes
the callers uglier, because they sometimes do not even
have their own scope (e.g., they are inside a switch
statement).
2. Provide a pre-made global "date_normal" struct that can
be passed by address. We'd also need "date_rfc2822",
"date_iso8601", and so forth. But at least the ugliness
is defined in one place.
3. Provide a wrapper that generates the correct struct on
the fly. The big downside is that we end up pointing to
a single global, which makes our wrapper non-reentrant.
But show_date is already not reentrant, so it does not
matter.
This patch implements 3, along with a minor macro to keep
the size of the callers sane.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-06-25 18:55:02 +02:00
|
|
|
void parse_date_format(const char *format, struct date_mode *mode);
|
2017-04-26 21:29:31 +02:00
|
|
|
int date_overflows(timestamp_t date);
|
2005-04-30 18:46:49 +02:00
|
|
|
|
2012-05-25 01:28:40 +02:00
|
|
|
#define IDENT_STRICT 1
|
2012-05-22 01:10:11 +02:00
|
|
|
#define IDENT_NO_DATE 2
|
ident: let callers omit name with fmt_indent
Most callers want to see all of "$name <$email> $date", but
a few want only limited parts, omitting the date, or even
the name. We already have IDENT_NO_DATE to handle the date
part, but there's not a good option for getting just the
email. Callers have to done one of:
1. Call ident_default_email; this does not respect
environment variables, nor does it promise to trim
whitespace or other crud from the result.
2. Call git_{committer,author}_info; this returns the name
and email, leaving the caller to parse out the wanted
bits.
This patch adds IDENT_NO_NAME; it stops short of adding
IDENT_NO_EMAIL, as no callers want it (nor are likely to),
and it complicates the error handling of the function.
When no name is requested, the angle brackets (<>) around
the email address are also omitted.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-05-25 01:27:24 +02:00
|
|
|
#define IDENT_NO_NAME 4
|
2006-02-19 05:31:05 +01:00
|
|
|
extern const char *git_author_info(int);
|
|
|
|
extern const char *git_committer_info(int);
|
2007-02-05 02:50:14 +01:00
|
|
|
extern const char *fmt_ident(const char *name, const char *email, const char *date_str, int);
|
2007-12-02 22:43:34 +01:00
|
|
|
extern const char *fmt_name(const char *name, const char *email);
|
2014-07-25 21:11:34 +02:00
|
|
|
extern const char *ident_default_name(void);
|
2012-05-22 01:09:43 +02:00
|
|
|
extern const char *ident_default_email(void);
|
2009-11-12 01:01:27 +01:00
|
|
|
extern const char *git_editor(void);
|
2010-02-14 12:59:59 +01:00
|
|
|
extern const char *git_pager(int stdout_is_tty);
|
2017-11-29 15:37:51 +01:00
|
|
|
extern int is_terminal_dumb(void);
|
2012-05-22 01:09:54 +02:00
|
|
|
extern int git_ident_config(const char *, const char *, void *);
|
am: reset cached ident date for each patch
When we compute the date to go in author/committer lines of
commits, or tagger lines of tags, we get the current date
once and then cache it for the rest of the program. This is
a good thing in some cases, like "git commit", because it
means we do not racily assign different times to the
author/committer fields of a single commit object.
But as more programs start to make many commits in a single
process (e.g., the recently builtin "git am"), it means that
you'll get long strings of commits with identical committer
timestamps (whereas before, we invoked "git commit" many
times and got true timestamps).
This patch addresses it by letting callers reset the cached
time, which means they'll get a fresh time on their next
call to git_committer_info() or git_author_info(). The first
caller to do so is "git am", which resets the time for each
patch it applies.
It would be nice if we could just do this automatically
before filling in the ident fields of commit and tag
objects. Unfortunately, it's hard to know where a particular
logical operation begins and ends.
For instance, if commit_tree_extended() were to call
reset_ident_date() before getting the committer/author
ident, that doesn't quite work; sometimes the author info is
passed in to us as a parameter, and it may or may not have
come from a previous call to ident_default_date(). So in
those cases, we lose the property that the committer and the
author timestamp always match.
You could similarly put a date-reset at the end of
commit_tree_extended(). That actually works in the current
code base, but it's fragile. It makes the assumption that
after commit_tree_extended() finishes, the caller has no
other operations that would logically want to fall into the
same timestamp.
So instead we provide the tool to easily do the reset, and
let the high-level callers use it to annotate their own
logical operations.
There's no automated test, because it would be inherently
racy (it depends on whether the program takes multiple
seconds to run). But you can see the effect with something
like:
# make a fake 100-patch series
top=$(git rev-parse HEAD)
bottom=$(git rev-list --first-parent -100 HEAD | tail -n 1)
git log --format=email --reverse --first-parent \
--binary -m -p $bottom..$top >patch
# now apply it; this presumably takes multiple seconds
git checkout --detach $bottom
git am <patch
# now count the number of distinct committer times;
# prior to this patch, there would only be one, but
# now we'd typically see several.
git log --format=%ct $bottom.. | sort -u
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Helped-by: Paul Tan <pyokagan@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-01 21:37:00 +02:00
|
|
|
extern void reset_ident_date(void);
|
2005-07-12 20:49:27 +02:00
|
|
|
|
2012-03-11 10:25:43 +01:00
|
|
|
struct ident_split {
|
|
|
|
const char *name_begin;
|
|
|
|
const char *name_end;
|
|
|
|
const char *mail_begin;
|
|
|
|
const char *mail_end;
|
|
|
|
const char *date_begin;
|
|
|
|
const char *date_end;
|
|
|
|
const char *tz_begin;
|
|
|
|
const char *tz_end;
|
|
|
|
};
|
|
|
|
/*
|
|
|
|
* Signals an success with 0, but time part of the result may be NULL
|
|
|
|
* if the input lacks timestamp and zone
|
|
|
|
*/
|
|
|
|
extern int split_ident_line(struct ident_split *, const char *, int);
|
|
|
|
|
2014-05-02 03:07:22 +02:00
|
|
|
/*
|
|
|
|
* Like show_date, but pull the timestamp and tz parameters from
|
|
|
|
* the ident_split. It will also sanity-check the values and produce
|
|
|
|
* a well-known sentinel date if they appear bogus.
|
|
|
|
*/
|
convert "enum date_mode" into a struct
In preparation for adding date modes that may carry extra
information beyond the mode itself, this patch converts the
date_mode enum into a struct.
Most of the conversion is fairly straightforward; we pass
the struct as a pointer and dereference the type field where
necessary. Locations that declare a date_mode can use a "{}"
constructor. However, the tricky case is where we use the
enum labels as constants, like:
show_date(t, tz, DATE_NORMAL);
Ideally we could say:
show_date(t, tz, &{ DATE_NORMAL });
but of course C does not allow that. Likewise, we cannot
cast the constant to a struct, because we need to pass an
actual address. Our options are basically:
1. Manually add a "struct date_mode d = { DATE_NORMAL }"
definition to each caller, and pass "&d". This makes
the callers uglier, because they sometimes do not even
have their own scope (e.g., they are inside a switch
statement).
2. Provide a pre-made global "date_normal" struct that can
be passed by address. We'd also need "date_rfc2822",
"date_iso8601", and so forth. But at least the ugliness
is defined in one place.
3. Provide a wrapper that generates the correct struct on
the fly. The big downside is that we end up pointing to
a single global, which makes our wrapper non-reentrant.
But show_date is already not reentrant, so it does not
matter.
This patch implements 3, along with a minor macro to keep
the size of the callers sane.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-06-25 18:55:02 +02:00
|
|
|
const char *show_ident_date(const struct ident_split *id,
|
|
|
|
const struct date_mode *mode);
|
2014-05-02 03:07:22 +02:00
|
|
|
|
2013-09-20 12:16:28 +02:00
|
|
|
/*
|
|
|
|
* Compare split idents for equality or strict ordering. Note that we
|
|
|
|
* compare only the ident part of the line, ignoring any timestamp.
|
|
|
|
*
|
|
|
|
* Because there are two fields, we must choose one as the primary key; we
|
|
|
|
* currently arbitrarily pick the email.
|
|
|
|
*/
|
|
|
|
extern int ident_cmp(const struct ident_split *, const struct ident_split *);
|
|
|
|
|
2005-06-06 06:59:54 +02:00
|
|
|
struct checkout {
|
2014-06-13 14:19:34 +02:00
|
|
|
struct index_state *istate;
|
2005-06-06 06:59:54 +02:00
|
|
|
const char *base_dir;
|
|
|
|
int base_dir_len;
|
2017-06-30 22:41:28 +02:00
|
|
|
struct delayed_checkout *delayed_checkout;
|
2005-06-06 06:59:54 +02:00
|
|
|
unsigned force:1,
|
|
|
|
quiet:1,
|
|
|
|
not_new:1,
|
|
|
|
refresh_cache:1;
|
|
|
|
};
|
2016-09-22 18:11:33 +02:00
|
|
|
#define CHECKOUT_INIT { NULL, "" }
|
2005-06-06 06:59:54 +02:00
|
|
|
|
2013-10-23 19:52:42 +02:00
|
|
|
#define TEMPORARY_FILENAME_LENGTH 25
|
2007-04-25 16:18:08 +02:00
|
|
|
extern int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath);
|
2017-06-30 22:41:28 +02:00
|
|
|
extern void enable_delayed_checkout(struct checkout *state);
|
|
|
|
extern int finish_delayed_checkout(struct checkout *state);
|
2009-07-09 22:35:31 +02:00
|
|
|
|
|
|
|
struct cache_def {
|
2014-07-05 00:41:46 +02:00
|
|
|
struct strbuf path;
|
2009-07-09 22:35:31 +02:00
|
|
|
int flags;
|
|
|
|
int track_flags;
|
|
|
|
int prefix_len_stat_func;
|
|
|
|
};
|
2014-07-05 00:41:46 +02:00
|
|
|
#define CACHE_DEF_INIT { STRBUF_INIT, 0, 0, 0 }
|
2014-07-12 01:02:34 +02:00
|
|
|
static inline void cache_def_clear(struct cache_def *cache)
|
2014-07-05 00:41:46 +02:00
|
|
|
{
|
|
|
|
strbuf_release(&cache->path);
|
|
|
|
}
|
2009-07-09 22:35:31 +02:00
|
|
|
|
2009-02-09 21:54:06 +01:00
|
|
|
extern int has_symlink_leading_path(const char *name, int len);
|
2009-07-09 22:35:31 +02:00
|
|
|
extern int threaded_has_symlink_leading_path(struct cache_def *, const char *, int);
|
2010-10-09 15:53:00 +02:00
|
|
|
extern int check_leading_path(const char *name, int len);
|
2009-02-09 21:54:06 +01:00
|
|
|
extern int has_dirs_only_path(const char *name, int len, int prefix_len);
|
checkout: fix bug that makes checkout follow symlinks in leading path
Before checking out a file, we have to confirm that all of its leading
components are real existing directories. And to reduce the number of
lstat() calls in this process, we cache the last leading path known to
contain only directories. However, when a path collision occurs (e.g.
when checking out case-sensitive files in case-insensitive file
systems), a cached path might have its file type changed on disk,
leaving the cache on an invalid state. Normally, this doesn't bring
any bad consequences as we usually check out files in index order, and
therefore, by the time the cached path becomes outdated, we no longer
need it anyway (because all files in that directory would have already
been written).
But, there are some users of the checkout machinery that do not always
follow the index order. In particular: checkout-index writes the paths
in the same order that they appear on the CLI (or stdin); and the
delayed checkout feature -- used when a long-running filter process
replies with "status=delayed" -- postpones the checkout of some entries,
thus modifying the checkout order.
When we have to check out an out-of-order entry and the lstat() cache is
invalid (due to a previous path collision), checkout_entry() may end up
using the invalid data and thrusting that the leading components are
real directories when, in reality, they are not. In the best case
scenario, where the directory was replaced by a regular file, the user
will get an error: "fatal: unable to create file 'foo/bar': Not a
directory". But if the directory was replaced by a symlink, checkout
could actually end up following the symlink and writing the file at a
wrong place, even outside the repository. Since delayed checkout is
affected by this bug, it could be used by an attacker to write
arbitrary files during the clone of a maliciously crafted repository.
Some candidate solutions considered were to disable the lstat() cache
during unordered checkouts or sort the entries before passing them to
the checkout machinery. But both ideas include some performance penalty
and they don't future-proof the code against new unordered use cases.
Instead, we now manually reset the lstat cache whenever we successfully
remove a directory. Note: We are not even checking whether the directory
was the same as the lstat cache points to because we might face a
scenario where the paths refer to the same location but differ due to
case folding, precomposed UTF-8 issues, or the presence of `..`
components in the path. Two regression tests, with case-collisions and
utf8-collisions, are also added for both checkout-index and delayed
checkout.
Note: to make the previously mentioned clone attack unfeasible, it would
be sufficient to reset the lstat cache only after the remove_subtree()
call inside checkout_entry(). This is the place where we would remove a
directory whose path collides with the path of another entry that we are
currently trying to check out (possibly a symlink). However, in the
interest of a thorough fix that does not leave Git open to
similar-but-not-identical attack vectors, we decided to intercept
all `rmdir()` calls in one fell swoop.
This addresses CVE-2021-21300.
Co-authored-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
2020-12-10 14:27:55 +01:00
|
|
|
extern void invalidate_lstat_cache(void);
|
2009-02-09 21:54:07 +01:00
|
|
|
extern void schedule_dir_for_removal(const char *name, int len);
|
|
|
|
extern void remove_scheduled_dirs(void);
|
2005-06-06 06:59:54 +02:00
|
|
|
|
2005-06-28 23:56:57 +02:00
|
|
|
extern struct alternate_object_database {
|
2005-08-15 02:25:57 +02:00
|
|
|
struct alternate_object_database *next;
|
2016-10-03 22:35:51 +02:00
|
|
|
|
2016-10-03 22:36:04 +02:00
|
|
|
/* see alt_scratch_buf() */
|
|
|
|
struct strbuf scratch;
|
|
|
|
size_t base_len;
|
2016-10-03 22:35:51 +02:00
|
|
|
|
2017-06-22 20:19:48 +02:00
|
|
|
/*
|
|
|
|
* Used to store the results of readdir(3) calls when searching
|
|
|
|
* for unique abbreviated hashes. This cache is never
|
|
|
|
* invalidated, thus it's racy and not necessarily accurate.
|
|
|
|
* That's fine for its purpose; don't use it for tasks requiring
|
|
|
|
* greater accuracy!
|
|
|
|
*/
|
|
|
|
char loose_objects_subdir_seen[256];
|
|
|
|
struct oid_array loose_objects_cache;
|
|
|
|
|
2016-10-03 22:35:51 +02:00
|
|
|
char path[FLEX_ARRAY];
|
2005-08-15 02:25:57 +02:00
|
|
|
} *alt_odb_list;
|
2005-06-28 23:56:57 +02:00
|
|
|
extern void prepare_alt_odb(void);
|
2016-08-15 23:53:24 +02:00
|
|
|
extern char *compute_alternate_path(const char *path, struct strbuf *err);
|
push: receiver end advertises refs from alternate repositories
Earlier, when pushing into a repository that borrows from alternate object
stores, we followed the longstanding design decision not to trust refs in
the alternate repository that houses the object store we are borrowing
from. If your public repository is borrowing from Linus's public
repository, you pushed into it long time ago, and now when you try to push
your updated history that is in sync with more recent history from Linus,
you will end up sending not just your own development, but also the
changes you acquired through Linus's tree, even though the objects needed
for the latter already exists at the receiving end. This is because the
receiving end does not advertise that the objects only reachable from the
borrowed repository (i.e. Linus's) are already available there.
This solves the issue by making the receiving end advertise refs from
borrowed repositories. They are not sent with their true names but with a
phoney name ".have" to make sure that the old senders will safely ignore
them (otherwise, the old senders will misbehave, trying to push matching
refs, and mirror push that deletes refs that only exist at the receiving
end).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-09-09 10:27:10 +02:00
|
|
|
typedef int alt_odb_fn(struct alternate_object_database *, void *);
|
2014-10-16 00:33:13 +02:00
|
|
|
extern int foreach_alt_odb(alt_odb_fn, void*);
|
2005-06-28 23:56:57 +02:00
|
|
|
|
2016-10-03 22:35:31 +02:00
|
|
|
/*
|
|
|
|
* Allocate a "struct alternate_object_database" but do _not_ actually
|
|
|
|
* add it to the list of alternates.
|
|
|
|
*/
|
|
|
|
struct alternate_object_database *alloc_alt_odb(const char *dir);
|
|
|
|
|
2016-10-03 22:35:03 +02:00
|
|
|
/*
|
|
|
|
* Add the directory to the on-disk alternates file; the new entry will also
|
|
|
|
* take effect in the current process.
|
|
|
|
*/
|
|
|
|
extern void add_to_alternates_file(const char *dir);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Add the directory to the in-memory list of alternates (along with any
|
|
|
|
* recursive alternates it points to), but do not modify the on-disk alternates
|
|
|
|
* file.
|
|
|
|
*/
|
|
|
|
extern void add_to_alternates_memory(const char *dir);
|
|
|
|
|
2016-10-03 22:36:04 +02:00
|
|
|
/*
|
|
|
|
* Returns a scratch strbuf pre-filled with the alternate object directory,
|
|
|
|
* including a trailing slash, which can be used to access paths in the
|
|
|
|
* alternate. Always use this over direct access to alt->scratch, as it
|
|
|
|
* cleans up any previous use of the scratch buffer.
|
|
|
|
*/
|
|
|
|
extern struct strbuf *alt_scratch_buf(struct alternate_object_database *alt);
|
|
|
|
|
2006-12-23 08:33:44 +01:00
|
|
|
struct pack_window {
|
|
|
|
struct pack_window *next;
|
|
|
|
unsigned char *base;
|
|
|
|
off_t offset;
|
|
|
|
size_t len;
|
|
|
|
unsigned int last_used;
|
|
|
|
unsigned int inuse_cnt;
|
|
|
|
};
|
|
|
|
|
2005-06-28 23:56:57 +02:00
|
|
|
extern struct packed_git {
|
|
|
|
struct packed_git *next;
|
2018-01-24 00:46:51 +01:00
|
|
|
struct list_head mru;
|
2006-12-23 08:33:44 +01:00
|
|
|
struct pack_window *windows;
|
2006-12-23 08:33:47 +01:00
|
|
|
off_t pack_size;
|
2007-04-09 07:06:28 +02:00
|
|
|
const void *index_data;
|
|
|
|
size_t index_size;
|
|
|
|
uint32_t num_objects;
|
2008-06-24 03:23:39 +02:00
|
|
|
uint32_t num_bad_objects;
|
|
|
|
unsigned char *bad_object_sha1;
|
2007-03-16 21:42:50 +01:00
|
|
|
int index_version;
|
2007-04-09 07:06:28 +02:00
|
|
|
time_t mtime;
|
2006-12-23 08:34:01 +01:00
|
|
|
int pack_fd;
|
2008-11-12 18:59:03 +01:00
|
|
|
unsigned pack_local:1,
|
2011-03-02 19:01:54 +01:00
|
|
|
pack_keep:1,
|
2015-04-20 21:55:00 +02:00
|
|
|
freshened:1,
|
2017-12-05 17:58:44 +01:00
|
|
|
do_not_close:1,
|
|
|
|
pack_promisor:1;
|
2005-08-01 02:53:44 +02:00
|
|
|
unsigned char sha1[20];
|
2015-12-21 07:20:33 +01:00
|
|
|
struct revindex_entry *revindex;
|
2006-01-07 10:33:54 +01:00
|
|
|
/* something like ".git/objects/pack/xxxxx.pack" */
|
|
|
|
char pack_name[FLEX_ARRAY]; /* more */
|
2005-06-28 23:56:57 +02:00
|
|
|
} *packed_git;
|
2005-07-01 02:15:39 +02:00
|
|
|
|
find_pack_entry: replace last_found_pack with MRU cache
Each pack has an index for looking up entries in O(log n)
time, but if we have multiple packs, we have to scan through
them linearly. This can produce a measurable overhead for
some operations.
We dealt with this long ago in f7c22cc (always start looking
up objects in the last used pack first, 2007-05-30), which
keeps what is essentially a 1-element most-recently-used
cache. In theory, we should be able to do better by keeping
a similar but longer cache, that is the same length as the
pack-list itself.
Since we now have a convenient generic MRU structure, we can
plug it in and measure. Here are the numbers for running
p5303 against linux.git:
Test HEAD^ HEAD
------------------------------------------------------------------------
5303.3: rev-list (1) 31.56(31.28+0.27) 31.30(31.08+0.20) -0.8%
5303.4: repack (1) 40.62(39.35+2.36) 40.60(39.27+2.44) -0.0%
5303.6: rev-list (50) 31.31(31.06+0.23) 31.23(31.00+0.22) -0.3%
5303.7: repack (50) 58.65(69.12+1.94) 58.27(68.64+2.05) -0.6%
5303.9: rev-list (1000) 38.74(38.40+0.33) 31.87(31.62+0.24) -17.7%
5303.10: repack (1000) 367.20(441.80+4.62) 342.00(414.04+3.72) -6.9%
The main numbers of interest here are the rev-list ones
(since that is exercising the normal object lookup code
path). The single-pack case shouldn't improve at all; the
260ms speedup there is just part of the run-to-run noise
(but it's important to note that we didn't make anything
worse with the overhead of maintaining our cache). In the
50-pack case, we see similar results. There may be a slight
improvement, but it's mostly within the noise.
The 1000-pack case does show a big improvement, though. That
carries over to the repack case, as well. Even though we
haven't touched its pack-search loop yet, it does still do a
lot of normal object lookups (e.g., for the internal
revision walk), and so improves.
As a point of reference, I also ran the 1000-pack test
against a version of HEAD^ with the last_found_pack
optimization disabled. It takes ~60s, so that gives an
indication of how much even the single-element cache is
helping.
For comparison, here's a smaller repository, git.git:
Test HEAD^ HEAD
---------------------------------------------------------------------
5303.3: rev-list (1) 1.56(1.54+0.01) 1.54(1.51+0.02) -1.3%
5303.4: repack (1) 1.84(1.80+0.10) 1.82(1.80+0.09) -1.1%
5303.6: rev-list (50) 1.58(1.55+0.02) 1.59(1.57+0.01) +0.6%
5303.7: repack (50) 2.50(3.18+0.04) 2.50(3.14+0.04) +0.0%
5303.9: rev-list (1000) 2.76(2.71+0.04) 2.24(2.21+0.02) -18.8%
5303.10: repack (1000) 13.21(19.56+0.25) 11.66(18.01+0.21) -11.7%
You can see that the percentage improvement is similar.
That's because the lookup we are optimizing is roughly
O(nr_objects * nr_packs). Since the number of packs is
constant in both tests, we'd expect the improvement to be
linear in the number of objects. But the whole process is
also linear in the number of objects, so the improvement
is a constant factor.
The exact improvement does also depend on the contents of
the packs. In p5303, the extra packs all have 5 first-parent
commits in them, which is a reasonable simulation of a
pushed-to repository. But it also means that only 250
first-parent commits are in those packs (compared to almost
50,000 total in linux.git), and the rest are in the huge
"base" pack. So once we start looking at history in taht big
pack, that's where we'll find most everything, and even the
1-element cache gets close to 100% cache hits. You could
almost certainly show better numbers with a more
pathological case (e.g., distributing the objects more
evenly across the packs). But that's simply not that
realistic a scenario, so it makes more sense to focus on
these numbers.
The implementation itself is a straightforward application
of the MRU code. We provide an MRU-ordered list of packs
that shadows the packed_git list. This is easy to do because
we only create and revise the pack list in one place. The
"reprepare" code path actually drops the whole MRU and
replaces it for simplicity. It would be more efficient to
just add new entries, but there's not much point in
optimizing here; repreparing happens rarely, and only after
doing a lot of other expensive work. The key things to keep
optimized are traversal (which is just a normal linked list,
albeit with one extra level of indirection over the regular
packed_git list), and marking (which is a constant number of
pointer assignments, though slightly more than the old
last_found_pack was; it doesn't seem to create a measurable
slowdown, though).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-29 06:09:46 +02:00
|
|
|
/*
|
2018-01-24 00:46:51 +01:00
|
|
|
* A most-recently-used ordered version of the packed_git list.
|
find_pack_entry: replace last_found_pack with MRU cache
Each pack has an index for looking up entries in O(log n)
time, but if we have multiple packs, we have to scan through
them linearly. This can produce a measurable overhead for
some operations.
We dealt with this long ago in f7c22cc (always start looking
up objects in the last used pack first, 2007-05-30), which
keeps what is essentially a 1-element most-recently-used
cache. In theory, we should be able to do better by keeping
a similar but longer cache, that is the same length as the
pack-list itself.
Since we now have a convenient generic MRU structure, we can
plug it in and measure. Here are the numbers for running
p5303 against linux.git:
Test HEAD^ HEAD
------------------------------------------------------------------------
5303.3: rev-list (1) 31.56(31.28+0.27) 31.30(31.08+0.20) -0.8%
5303.4: repack (1) 40.62(39.35+2.36) 40.60(39.27+2.44) -0.0%
5303.6: rev-list (50) 31.31(31.06+0.23) 31.23(31.00+0.22) -0.3%
5303.7: repack (50) 58.65(69.12+1.94) 58.27(68.64+2.05) -0.6%
5303.9: rev-list (1000) 38.74(38.40+0.33) 31.87(31.62+0.24) -17.7%
5303.10: repack (1000) 367.20(441.80+4.62) 342.00(414.04+3.72) -6.9%
The main numbers of interest here are the rev-list ones
(since that is exercising the normal object lookup code
path). The single-pack case shouldn't improve at all; the
260ms speedup there is just part of the run-to-run noise
(but it's important to note that we didn't make anything
worse with the overhead of maintaining our cache). In the
50-pack case, we see similar results. There may be a slight
improvement, but it's mostly within the noise.
The 1000-pack case does show a big improvement, though. That
carries over to the repack case, as well. Even though we
haven't touched its pack-search loop yet, it does still do a
lot of normal object lookups (e.g., for the internal
revision walk), and so improves.
As a point of reference, I also ran the 1000-pack test
against a version of HEAD^ with the last_found_pack
optimization disabled. It takes ~60s, so that gives an
indication of how much even the single-element cache is
helping.
For comparison, here's a smaller repository, git.git:
Test HEAD^ HEAD
---------------------------------------------------------------------
5303.3: rev-list (1) 1.56(1.54+0.01) 1.54(1.51+0.02) -1.3%
5303.4: repack (1) 1.84(1.80+0.10) 1.82(1.80+0.09) -1.1%
5303.6: rev-list (50) 1.58(1.55+0.02) 1.59(1.57+0.01) +0.6%
5303.7: repack (50) 2.50(3.18+0.04) 2.50(3.14+0.04) +0.0%
5303.9: rev-list (1000) 2.76(2.71+0.04) 2.24(2.21+0.02) -18.8%
5303.10: repack (1000) 13.21(19.56+0.25) 11.66(18.01+0.21) -11.7%
You can see that the percentage improvement is similar.
That's because the lookup we are optimizing is roughly
O(nr_objects * nr_packs). Since the number of packs is
constant in both tests, we'd expect the improvement to be
linear in the number of objects. But the whole process is
also linear in the number of objects, so the improvement
is a constant factor.
The exact improvement does also depend on the contents of
the packs. In p5303, the extra packs all have 5 first-parent
commits in them, which is a reasonable simulation of a
pushed-to repository. But it also means that only 250
first-parent commits are in those packs (compared to almost
50,000 total in linux.git), and the rest are in the huge
"base" pack. So once we start looking at history in taht big
pack, that's where we'll find most everything, and even the
1-element cache gets close to 100% cache hits. You could
almost certainly show better numbers with a more
pathological case (e.g., distributing the objects more
evenly across the packs). But that's simply not that
realistic a scenario, so it makes more sense to focus on
these numbers.
The implementation itself is a straightforward application
of the MRU code. We provide an MRU-ordered list of packs
that shadows the packed_git list. This is easy to do because
we only create and revise the pack list in one place. The
"reprepare" code path actually drops the whole MRU and
replaces it for simplicity. It would be more efficient to
just add new entries, but there's not much point in
optimizing here; repreparing happens rarely, and only after
doing a lot of other expensive work. The key things to keep
optimized are traversal (which is just a normal linked list,
albeit with one extra level of indirection over the regular
packed_git list), and marking (which is a constant number of
pointer assignments, though slightly more than the old
last_found_pack was; it doesn't seem to create a measurable
slowdown, though).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-29 06:09:46 +02:00
|
|
|
*/
|
2018-01-24 00:46:51 +01:00
|
|
|
extern struct list_head packed_git_mru;
|
find_pack_entry: replace last_found_pack with MRU cache
Each pack has an index for looking up entries in O(log n)
time, but if we have multiple packs, we have to scan through
them linearly. This can produce a measurable overhead for
some operations.
We dealt with this long ago in f7c22cc (always start looking
up objects in the last used pack first, 2007-05-30), which
keeps what is essentially a 1-element most-recently-used
cache. In theory, we should be able to do better by keeping
a similar but longer cache, that is the same length as the
pack-list itself.
Since we now have a convenient generic MRU structure, we can
plug it in and measure. Here are the numbers for running
p5303 against linux.git:
Test HEAD^ HEAD
------------------------------------------------------------------------
5303.3: rev-list (1) 31.56(31.28+0.27) 31.30(31.08+0.20) -0.8%
5303.4: repack (1) 40.62(39.35+2.36) 40.60(39.27+2.44) -0.0%
5303.6: rev-list (50) 31.31(31.06+0.23) 31.23(31.00+0.22) -0.3%
5303.7: repack (50) 58.65(69.12+1.94) 58.27(68.64+2.05) -0.6%
5303.9: rev-list (1000) 38.74(38.40+0.33) 31.87(31.62+0.24) -17.7%
5303.10: repack (1000) 367.20(441.80+4.62) 342.00(414.04+3.72) -6.9%
The main numbers of interest here are the rev-list ones
(since that is exercising the normal object lookup code
path). The single-pack case shouldn't improve at all; the
260ms speedup there is just part of the run-to-run noise
(but it's important to note that we didn't make anything
worse with the overhead of maintaining our cache). In the
50-pack case, we see similar results. There may be a slight
improvement, but it's mostly within the noise.
The 1000-pack case does show a big improvement, though. That
carries over to the repack case, as well. Even though we
haven't touched its pack-search loop yet, it does still do a
lot of normal object lookups (e.g., for the internal
revision walk), and so improves.
As a point of reference, I also ran the 1000-pack test
against a version of HEAD^ with the last_found_pack
optimization disabled. It takes ~60s, so that gives an
indication of how much even the single-element cache is
helping.
For comparison, here's a smaller repository, git.git:
Test HEAD^ HEAD
---------------------------------------------------------------------
5303.3: rev-list (1) 1.56(1.54+0.01) 1.54(1.51+0.02) -1.3%
5303.4: repack (1) 1.84(1.80+0.10) 1.82(1.80+0.09) -1.1%
5303.6: rev-list (50) 1.58(1.55+0.02) 1.59(1.57+0.01) +0.6%
5303.7: repack (50) 2.50(3.18+0.04) 2.50(3.14+0.04) +0.0%
5303.9: rev-list (1000) 2.76(2.71+0.04) 2.24(2.21+0.02) -18.8%
5303.10: repack (1000) 13.21(19.56+0.25) 11.66(18.01+0.21) -11.7%
You can see that the percentage improvement is similar.
That's because the lookup we are optimizing is roughly
O(nr_objects * nr_packs). Since the number of packs is
constant in both tests, we'd expect the improvement to be
linear in the number of objects. But the whole process is
also linear in the number of objects, so the improvement
is a constant factor.
The exact improvement does also depend on the contents of
the packs. In p5303, the extra packs all have 5 first-parent
commits in them, which is a reasonable simulation of a
pushed-to repository. But it also means that only 250
first-parent commits are in those packs (compared to almost
50,000 total in linux.git), and the rest are in the huge
"base" pack. So once we start looking at history in taht big
pack, that's where we'll find most everything, and even the
1-element cache gets close to 100% cache hits. You could
almost certainly show better numbers with a more
pathological case (e.g., distributing the objects more
evenly across the packs). But that's simply not that
realistic a scenario, so it makes more sense to focus on
these numbers.
The implementation itself is a straightforward application
of the MRU code. We provide an MRU-ordered list of packs
that shadows the packed_git list. This is easy to do because
we only create and revise the pack list in one place. The
"reprepare" code path actually drops the whole MRU and
replaces it for simplicity. It would be more efficient to
just add new entries, but there's not much point in
optimizing here; repreparing happens rarely, and only after
doing a lot of other expensive work. The key things to keep
optimized are traversal (which is just a normal linked list,
albeit with one extra level of indirection over the regular
packed_git list), and marking (which is a constant number of
pointer assignments, though slightly more than the old
last_found_pack was; it doesn't seem to create a measurable
slowdown, though).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-29 06:09:46 +02:00
|
|
|
|
2005-07-01 02:15:39 +02:00
|
|
|
struct pack_entry {
|
2007-03-07 02:44:30 +01:00
|
|
|
off_t offset;
|
2005-07-01 02:15:39 +02:00
|
|
|
unsigned char sha1[20];
|
|
|
|
struct packed_git *p;
|
|
|
|
};
|
|
|
|
|
2017-03-16 15:27:00 +01:00
|
|
|
/*
|
2017-03-28 21:45:25 +02:00
|
|
|
* Create a temporary file rooted in the object database directory, or
|
|
|
|
* die on failure. The filename is taken from "pattern", which should have the
|
|
|
|
* usual "XXXXXX" trailer, and the resulting filename is written into the
|
|
|
|
* "template" buffer. Returns the open descriptor.
|
2017-03-16 15:27:00 +01:00
|
|
|
*/
|
2018-02-14 19:59:53 +01:00
|
|
|
extern int odb_mkstemp(struct strbuf *temp_filename, const char *pattern);
|
2017-03-16 15:27:00 +01:00
|
|
|
|
|
|
|
/*
|
2017-03-16 15:27:12 +01:00
|
|
|
* Create a pack .keep file named "name" (which should generally be the output
|
|
|
|
* of odb_pack_name). Returns a file descriptor opened for writing, or -1 on
|
|
|
|
* error.
|
2017-03-16 15:27:00 +01:00
|
|
|
*/
|
2017-03-16 15:27:12 +01:00
|
|
|
extern int odb_pack_keep(const char *name);
|
2017-03-16 15:27:00 +01:00
|
|
|
|
2014-10-16 00:38:55 +02:00
|
|
|
/*
|
|
|
|
* Iterate over the files in the loose-object parts of the object
|
|
|
|
* directory "path", triggering the following callbacks:
|
|
|
|
*
|
|
|
|
* - loose_object is called for each loose object we find.
|
|
|
|
*
|
|
|
|
* - loose_cruft is called for any files that do not appear to be
|
|
|
|
* loose objects. Note that we only look in the loose object
|
|
|
|
* directories "objects/[0-9a-f]{2}/", so we will not report
|
|
|
|
* "objects/foobar" as cruft.
|
|
|
|
*
|
|
|
|
* - loose_subdir is called for each top-level hashed subdirectory
|
|
|
|
* of the object directory (e.g., "$OBJDIR/f0"). It is called
|
|
|
|
* after the objects in the directory are processed.
|
|
|
|
*
|
|
|
|
* Any callback that is NULL will be ignored. Callbacks returning non-zero
|
|
|
|
* will end the iteration.
|
2015-02-09 02:13:22 +01:00
|
|
|
*
|
|
|
|
* In the "buf" variant, "path" is a strbuf which will also be used as a
|
|
|
|
* scratch buffer, but restored to its original contents before
|
|
|
|
* the function returns.
|
2014-10-16 00:38:55 +02:00
|
|
|
*/
|
2017-02-22 00:47:35 +01:00
|
|
|
typedef int each_loose_object_fn(const struct object_id *oid,
|
2014-10-16 00:38:55 +02:00
|
|
|
const char *path,
|
|
|
|
void *data);
|
|
|
|
typedef int each_loose_cruft_fn(const char *basename,
|
|
|
|
const char *path,
|
|
|
|
void *data);
|
2017-06-24 16:09:39 +02:00
|
|
|
typedef int each_loose_subdir_fn(unsigned int nr,
|
2014-10-16 00:38:55 +02:00
|
|
|
const char *path,
|
|
|
|
void *data);
|
2017-06-24 16:09:39 +02:00
|
|
|
int for_each_file_in_obj_subdir(unsigned int subdir_nr,
|
2017-06-22 20:19:48 +02:00
|
|
|
struct strbuf *path,
|
|
|
|
each_loose_object_fn obj_cb,
|
|
|
|
each_loose_cruft_fn cruft_cb,
|
|
|
|
each_loose_subdir_fn subdir_cb,
|
|
|
|
void *data);
|
2014-10-16 00:38:55 +02:00
|
|
|
int for_each_loose_file_in_objdir(const char *path,
|
|
|
|
each_loose_object_fn obj_cb,
|
|
|
|
each_loose_cruft_fn cruft_cb,
|
|
|
|
each_loose_subdir_fn subdir_cb,
|
|
|
|
void *data);
|
2015-02-09 02:13:22 +01:00
|
|
|
int for_each_loose_file_in_objdir_buf(struct strbuf *path,
|
|
|
|
each_loose_object_fn obj_cb,
|
|
|
|
each_loose_cruft_fn cruft_cb,
|
|
|
|
each_loose_subdir_fn subdir_cb,
|
|
|
|
void *data);
|
2014-10-16 00:38:55 +02:00
|
|
|
|
2014-10-16 00:41:21 +02:00
|
|
|
/*
|
2017-08-19 00:20:38 +02:00
|
|
|
* Iterate over loose objects in both the local
|
reachable: only mark local objects as recent
When pruning and repacking a repository that has an
alternate object store configured, we may traverse a large
number of objects in the alternate. This serves no purpose,
and may be expensive to do. A longer explanation is below.
Commits d3038d2 and abcb865 taught prune and pack-objects
(respectively) to treat "recent" objects as tips for
reachability, so that we keep whole chunks of history. They
built on the object traversal in 660c889 (sha1_file: add
for_each iterators for loose and packed objects,
2014-10-15), which covers both local and alternate objects.
In both cases, covering alternate objects is unnecessary, as
both commands can only drop objects from the local
repository. In the case of prune, we traverse only the local
object directory. And in the case of repacking, while we may
or may not include local objects in our pack, we will never
reach into the alternate with "repack -d". The "-l" option
is only a question of whether we are migrating objects from
the alternate into our repository, or leaving them
untouched.
It is possible that we may drop an object that is depended
upon by another object in the alternate. For example,
imagine two repositories, A and B, with A pointing to B as
an alternate. Now imagine a commit that is in B which
references a tree that is only in A. Traversing from recent
objects in B might prevent A from dropping that tree. But
this case isn't worth covering. Repo B should take
responsibility for its own objects. It would never have had
the commit in the first place if it did not also have the
tree, and assuming it is using the same "keep recent chunks
of history" scheme, then it would itself keep the tree, as
well.
So checking the alternate objects is not worth doing, and
come with a significant performance impact. In both cases,
we skip any recent objects that have already been marked
SEEN (i.e., that we know are already reachable for prune, or
included in the pack for a repack). So there is a slight
waste of time in opening the alternate packs at all, only to
notice that we have already considered each object. But much
worse, the alternate repository may have a large number of
objects that are not reachable from the local repository at
all, and we end up adding them to the traversal.
We can fix this by considering only local unseen objects.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-03-27 12:32:41 +01:00
|
|
|
* repository and any alternates repositories (unless the
|
|
|
|
* LOCAL_ONLY flag is set).
|
2014-10-16 00:41:21 +02:00
|
|
|
*/
|
reachable: only mark local objects as recent
When pruning and repacking a repository that has an
alternate object store configured, we may traverse a large
number of objects in the alternate. This serves no purpose,
and may be expensive to do. A longer explanation is below.
Commits d3038d2 and abcb865 taught prune and pack-objects
(respectively) to treat "recent" objects as tips for
reachability, so that we keep whole chunks of history. They
built on the object traversal in 660c889 (sha1_file: add
for_each iterators for loose and packed objects,
2014-10-15), which covers both local and alternate objects.
In both cases, covering alternate objects is unnecessary, as
both commands can only drop objects from the local
repository. In the case of prune, we traverse only the local
object directory. And in the case of repacking, while we may
or may not include local objects in our pack, we will never
reach into the alternate with "repack -d". The "-l" option
is only a question of whether we are migrating objects from
the alternate into our repository, or leaving them
untouched.
It is possible that we may drop an object that is depended
upon by another object in the alternate. For example,
imagine two repositories, A and B, with A pointing to B as
an alternate. Now imagine a commit that is in B which
references a tree that is only in A. Traversing from recent
objects in B might prevent A from dropping that tree. But
this case isn't worth covering. Repo B should take
responsibility for its own objects. It would never have had
the commit in the first place if it did not also have the
tree, and assuming it is using the same "keep recent chunks
of history" scheme, then it would itself keep the tree, as
well.
So checking the alternate objects is not worth doing, and
come with a significant performance impact. In both cases,
we skip any recent objects that have already been marked
SEEN (i.e., that we know are already reachable for prune, or
included in the pack for a repack). So there is a slight
waste of time in opening the alternate packs at all, only to
notice that we have already considered each object. But much
worse, the alternate repository may have a large number of
objects that are not reachable from the local repository at
all, and we end up adding them to the traversal.
We can fix this by considering only local unseen objects.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-03-27 12:32:41 +01:00
|
|
|
#define FOR_EACH_OBJECT_LOCAL_ONLY 0x1
|
|
|
|
extern int for_each_loose_object(each_loose_object_fn, void *, unsigned flags);
|
2014-10-16 00:41:21 +02:00
|
|
|
|
2011-05-13 00:51:38 +02:00
|
|
|
struct object_info {
|
|
|
|
/* Request */
|
2013-07-12 08:34:57 +02:00
|
|
|
enum object_type *typep;
|
2011-05-13 00:51:38 +02:00
|
|
|
unsigned long *sizep;
|
2016-07-13 17:43:59 +02:00
|
|
|
off_t *disk_sizep;
|
2013-12-21 15:24:20 +01:00
|
|
|
unsigned char *delta_base_sha1;
|
2018-02-14 19:59:23 +01:00
|
|
|
struct strbuf *type_name;
|
2017-06-22 02:40:21 +02:00
|
|
|
void **contentp;
|
2011-05-13 00:51:38 +02:00
|
|
|
|
|
|
|
/* Response */
|
|
|
|
enum {
|
|
|
|
OI_CACHED,
|
|
|
|
OI_LOOSE,
|
2011-05-13 22:20:43 +02:00
|
|
|
OI_PACKED,
|
|
|
|
OI_DBCACHED
|
2011-05-13 00:51:38 +02:00
|
|
|
} whence;
|
|
|
|
union {
|
|
|
|
/*
|
|
|
|
* struct {
|
|
|
|
* ... Nothing to expose in this case
|
|
|
|
* } cached;
|
|
|
|
* struct {
|
|
|
|
* ... Nothing to expose in this case
|
|
|
|
* } loose;
|
|
|
|
*/
|
|
|
|
struct {
|
|
|
|
struct packed_git *pack;
|
|
|
|
off_t offset;
|
|
|
|
unsigned int is_delta;
|
|
|
|
} packed;
|
|
|
|
} u;
|
|
|
|
};
|
provide an initializer for "struct object_info"
An all-zero initializer is fine for this struct, but because
the first element is a pointer, call sites need to know to
use "NULL" instead of "0". Otherwise some static checkers
like "sparse" will complain; see d099b71 (Fix some sparse
warnings, 2013-07-18) for example. So let's provide an
initializer to make this easier to get right.
But let's also comment that memset() to zero is explicitly
OK[1]. One of the callers embeds object_info in another
struct which is initialized via memset (expand_data in
builtin/cat-file.c). Since our subset of C doesn't allow
assignment from a compound literal, handling this in any
other way is awkward, so we'd like to keep the ability to
initialize by memset(). By documenting this property, it
should make anybody who wants to change the initializer
think twice before doing so.
There's one other caller of interest. In parse_sha1_header(),
we did not initialize the struct fully in the first place.
This turned out not to be a bug because the sub-function it
calls does not look at any other fields except the ones we
did initialize. But that assumption might not hold in the
future, so it's a dangerous construct. This patch switches
it to initializing the whole struct, which protects us
against unexpected reads of the other fields.
[1] Obviously using memset() to initialize a pointer
violates the C standard, but we long ago decided that it
was an acceptable tradeoff in the real world.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-11 11:24:35 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Initializer for a "struct object_info" that wants no items. You may
|
|
|
|
* also memset() the memory to all-zeroes.
|
|
|
|
*/
|
|
|
|
#define OBJECT_INFO_INIT {NULL}
|
|
|
|
|
2017-06-22 02:40:19 +02:00
|
|
|
/* Invoke lookup_replace_object() on the given hash */
|
|
|
|
#define OBJECT_INFO_LOOKUP_REPLACE 1
|
2017-06-22 02:40:18 +02:00
|
|
|
/* Allow reading from a loose object file of unknown/bogus type */
|
|
|
|
#define OBJECT_INFO_ALLOW_UNKNOWN_TYPE 2
|
2017-06-22 02:40:22 +02:00
|
|
|
/* Do not check cached storage */
|
|
|
|
#define OBJECT_INFO_SKIP_CACHED 4
|
|
|
|
/* Do not retry packed storage after checking packed and loose storage */
|
|
|
|
#define OBJECT_INFO_QUICK 8
|
2013-12-11 08:46:07 +01:00
|
|
|
extern int sha1_object_info_extended(const unsigned char *, struct object_info *, unsigned flags);
|
2005-06-28 23:56:57 +02:00
|
|
|
|
2017-12-08 16:27:14 +01:00
|
|
|
/*
|
|
|
|
* Set this to 0 to prevent sha1_object_info_extended() from fetching missing
|
|
|
|
* blobs. This has a difference only if extensions.partialClone is set.
|
|
|
|
*
|
|
|
|
* Its default value is 1.
|
|
|
|
*/
|
|
|
|
extern int fetch_if_missing;
|
|
|
|
|
[PATCH] Add update-server-info.
The git-update-server-info command prepares informational files
to help clients discover the contents of a repository, and pull
from it via a dumb transport protocols. Currently, the
following files are produced.
- The $repo/info/refs file lists the name of heads and tags
available in the $repo/refs/ directory, along with their
SHA1. This can be used by git-ls-remote command running on
the client side.
- The $repo/info/rev-cache file describes the commit ancestry
reachable from references in the $repo/refs/ directory. This
file is in an append-only binary format to make the server
side friendly to rsync mirroring scheme, and can be read by
git-show-rev-cache command.
- The $repo/objects/info/pack file lists the name of the packs
available, the interdependencies among them, and the head
commits and tags contained in them. Along with the other two
files, this is designed to help clients to make smart pull
decisions.
The git-receive-pack command is changed to invoke it at the end,
so just after a push to a public repository finishes via "git
push", the server info is automatically updated.
In addition, building of the rev-cache file can be done by a
standalone git-build-rev-cache command separately.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-07-24 02:54:41 +02:00
|
|
|
/* Dumb servers support */
|
|
|
|
extern int update_server_info(int);
|
|
|
|
|
2010-11-02 20:59:07 +01:00
|
|
|
extern const char *get_log_output_encoding(void);
|
|
|
|
extern const char *get_commit_output_encoding(void);
|
|
|
|
|
2016-01-27 07:58:06 +01:00
|
|
|
/*
|
|
|
|
* This is a hack for test programs like test-dump-untracked-cache to
|
|
|
|
* ensure that they do not modify the untracked cache when reading it.
|
|
|
|
* Do not use it otherwise!
|
|
|
|
*/
|
|
|
|
extern int ignore_untracked_cache_config;
|
2014-07-28 12:10:38 +02:00
|
|
|
|
ident: keep separate "explicit" flags for author and committer
We keep track of whether the user ident was given to us
explicitly, or if we guessed at it from system parameters
like username and hostname. However, we kept only a single
variable. This covers the common cases (because the author
and committer will usually come from the same explicit
source), but can miss two cases:
1. GIT_COMMITTER_* is set explicitly, but we fallback for
GIT_AUTHOR. We claim the ident is explicit, even though
the author is not.
2. GIT_AUTHOR_* is set and we ask for author ident, but
not committer ident. We will claim the ident is
implicit, even though it is explicit.
This patch uses two variables instead of one, updates both
when we set the "fallback" values, and updates them
individually when we read from the environment.
Rather than keep user_ident_sufficiently_given as a
compatibility wrapper, we update the only two callers to
check the committer_ident, which matches their intent and
what was happening already.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-11-15 01:34:13 +01:00
|
|
|
extern int committer_ident_sufficiently_given(void);
|
|
|
|
extern int author_ident_sufficiently_given(void);
|
2005-10-12 03:47:34 +02:00
|
|
|
|
2007-03-12 20:33:18 +01:00
|
|
|
extern const char *git_commit_encoding;
|
2007-03-07 02:44:17 +01:00
|
|
|
extern const char *git_log_output_encoding;
|
2009-02-08 15:34:27 +01:00
|
|
|
extern const char *git_mailmap_file;
|
2012-12-12 12:04:04 +01:00
|
|
|
extern const char *git_mailmap_blob;
|
2005-11-28 01:09:40 +01:00
|
|
|
|
2007-06-29 19:40:46 +02:00
|
|
|
/* IO helper functions */
|
|
|
|
extern void maybe_flush_or_die(FILE *, const char *);
|
2014-09-10 12:03:52 +02:00
|
|
|
__attribute__((format (printf, 2, 3)))
|
|
|
|
extern void fprintf_or_die(FILE *, const char *fmt, ...);
|
2015-05-19 19:55:16 +02:00
|
|
|
|
|
|
|
#define COPY_READ_ERROR (-2)
|
|
|
|
#define COPY_WRITE_ERROR (-3)
|
2005-10-22 10:28:13 +02:00
|
|
|
extern int copy_fd(int ifd, int ofd);
|
2008-02-25 20:24:48 +01:00
|
|
|
extern int copy_file(const char *dst, const char *src, int mode);
|
2009-09-12 11:03:48 +02:00
|
|
|
extern int copy_file_with_time(const char *dst, const char *src, int mode);
|
2015-05-19 19:55:16 +02:00
|
|
|
|
2006-08-21 20:43:43 +02:00
|
|
|
extern void write_or_die(int fd, const void *buf, size_t count);
|
2008-05-30 17:42:16 +02:00
|
|
|
extern void fsync_or_die(int fd, const char *);
|
2005-12-15 07:17:38 +01:00
|
|
|
|
use write_str_in_full helper to avoid literal string lengths
In 2d14d65 (Use a clearer style to issue commands to remote helpers,
2009-09-03) I happened to notice two changes like this:
- write_in_full(helper->in, "list\n", 5);
+
+ strbuf_addstr(&buf, "list\n");
+ write_in_full(helper->in, buf.buf, buf.len);
+ strbuf_reset(&buf);
IMHO, it would be better to define a new function,
static inline ssize_t write_str_in_full(int fd, const char *str)
{
return write_in_full(fd, str, strlen(str));
}
and then use it like this:
- strbuf_addstr(&buf, "list\n");
- write_in_full(helper->in, buf.buf, buf.len);
- strbuf_reset(&buf);
+ write_str_in_full(helper->in, "list\n");
Thus not requiring the added allocation, and still avoiding
the maintenance risk of literal string lengths.
These days, compilers are good enough that strlen("literal")
imposes no run-time cost.
Transformed via this:
perl -pi -e \
's/write_in_full\((.*?), (".*?"), \d+\)/write_str_in_full($1, $2)/'\
$(git grep -l 'write_in_full.*"')
Signed-off-by: Jim Meyering <meyering@redhat.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-12 10:54:32 +02:00
|
|
|
extern ssize_t read_in_full(int fd, void *buf, size_t count);
|
|
|
|
extern ssize_t write_in_full(int fd, const void *buf, size_t count);
|
2014-04-10 20:31:21 +02:00
|
|
|
extern ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset);
|
|
|
|
|
use write_str_in_full helper to avoid literal string lengths
In 2d14d65 (Use a clearer style to issue commands to remote helpers,
2009-09-03) I happened to notice two changes like this:
- write_in_full(helper->in, "list\n", 5);
+
+ strbuf_addstr(&buf, "list\n");
+ write_in_full(helper->in, buf.buf, buf.len);
+ strbuf_reset(&buf);
IMHO, it would be better to define a new function,
static inline ssize_t write_str_in_full(int fd, const char *str)
{
return write_in_full(fd, str, strlen(str));
}
and then use it like this:
- strbuf_addstr(&buf, "list\n");
- write_in_full(helper->in, buf.buf, buf.len);
- strbuf_reset(&buf);
+ write_str_in_full(helper->in, "list\n");
Thus not requiring the added allocation, and still avoiding
the maintenance risk of literal string lengths.
These days, compilers are good enough that strlen("literal")
imposes no run-time cost.
Transformed via this:
perl -pi -e \
's/write_in_full\((.*?), (".*?"), \d+\)/write_str_in_full($1, $2)/'\
$(git grep -l 'write_in_full.*"')
Signed-off-by: Jim Meyering <meyering@redhat.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-12 10:54:32 +02:00
|
|
|
static inline ssize_t write_str_in_full(int fd, const char *str)
|
|
|
|
{
|
|
|
|
return write_in_full(fd, str, strlen(str));
|
|
|
|
}
|
2015-08-24 22:03:07 +02:00
|
|
|
|
write_file: add pointer+len variant
There are many callsites which could use write_file, but for
which it is a little awkward because they have a strbuf or
other pointer/len combo. Specifically:
1. write_file() takes a format string, so we have to use
"%s" or "%.*s", which are ugly.
2. Using any form of "%s" does not handle embedded NULs in
the output. That probably doesn't matter for our
call-sites, but it's nicer not to have to worry.
3. It's less efficient; we format into another strbuf
just to do the write. That's probably not measurably
slow for our uses, but it's simply inelegant.
We can fix this by providing a helper to write out the
formatted buffer, and just calling it from write_file().
Note that we don't do the usual "complete with a newline"
that write_file does. If the caller has their own buffer,
there's a reasonable chance they're doing something more
complicated than a single line, and they can call
strbuf_complete_line() themselves.
We could go even further and add strbuf_write_file(), but it
doesn't save much:
- write_file_buf(path, sb.buf, sb.len);
+ strbuf_write_file(&sb, path);
It would also be somewhat asymmetric with strbuf_read_file,
which actually returns errors rather than dying (and the
error handling is most of the benefit of write_file() in the
first place).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-08 11:12:22 +02:00
|
|
|
/**
|
|
|
|
* Open (and truncate) the file at path, write the contents of buf to it,
|
|
|
|
* and close it. Dies if any errors are encountered.
|
|
|
|
*/
|
|
|
|
extern void write_file_buf(const char *path, const char *buf, size_t len);
|
|
|
|
|
2016-07-08 11:12:42 +02:00
|
|
|
/**
|
|
|
|
* Like write_file_buf(), but format the contents into a buffer first.
|
|
|
|
* Additionally, write_file() will append a newline if one is not already
|
|
|
|
* present, making it convenient to write text files:
|
|
|
|
*
|
|
|
|
* write_file(path, "counter: %d", ctr);
|
|
|
|
*/
|
|
|
|
__attribute__((format (printf, 2, 3)))
|
2016-07-08 11:09:34 +02:00
|
|
|
extern void write_file(const char *path, const char *fmt, ...);
|
use write_str_in_full helper to avoid literal string lengths
In 2d14d65 (Use a clearer style to issue commands to remote helpers,
2009-09-03) I happened to notice two changes like this:
- write_in_full(helper->in, "list\n", 5);
+
+ strbuf_addstr(&buf, "list\n");
+ write_in_full(helper->in, buf.buf, buf.len);
+ strbuf_reset(&buf);
IMHO, it would be better to define a new function,
static inline ssize_t write_str_in_full(int fd, const char *str)
{
return write_in_full(fd, str, strlen(str));
}
and then use it like this:
- strbuf_addstr(&buf, "list\n");
- write_in_full(helper->in, buf.buf, buf.len);
- strbuf_reset(&buf);
+ write_str_in_full(helper->in, "list\n");
Thus not requiring the added allocation, and still avoiding
the maintenance risk of literal string lengths.
These days, compilers are good enough that strlen("literal")
imposes no run-time cost.
Transformed via this:
perl -pi -e \
's/write_in_full\((.*?), (".*?"), \d+\)/write_str_in_full($1, $2)/'\
$(git grep -l 'write_in_full.*"')
Signed-off-by: Jim Meyering <meyering@redhat.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-12 10:54:32 +02:00
|
|
|
|
2006-02-28 20:26:21 +01:00
|
|
|
/* pager.c */
|
|
|
|
extern void setup_pager(void);
|
2007-12-11 07:27:33 +01:00
|
|
|
extern int pager_in_use(void);
|
2006-07-30 00:27:43 +02:00
|
|
|
extern int pager_use_color;
|
2012-02-12 15:12:32 +01:00
|
|
|
extern int term_columns(void);
|
decimal_width: avoid integer overflow
The decimal_width function originally appeared in blame.c as
"lineno_width", and was designed for calculating the
print-width of small-ish integer values (line numbers in
text files). In ec7ff5b, it was made into a reusable
function, and in dc801e7, we started using it to align
diffstats.
Binary files in a diffstat show byte counts rather than line
numbers, meaning they can be quite large (e.g., consider
adding or removing a 2GB file). decimal_width is not up to
the challenge for two reasons:
1. It takes the value as an "int", whereas large files may
easily surpass this. The value may be truncated, in
which case we will produce an incorrect value.
2. It counts "up" by repeatedly multiplying another
integer by 10 until it surpasses the value. This can
cause an infinite loop when the value is close to the
largest representable integer.
For example, consider using a 32-bit signed integer,
and a value of 2,140,000,000 (just shy of 2^31-1).
We will count up and eventually see that 1,000,000,000
is smaller than our value. The next step would be to
multiply by 10 and see that 10,000,000,000 is too
large, ending the loop. But we can't represent that
value, and we have signed overflow.
This is technically undefined behavior, but a common
behavior is to lose the high bits, in which case our
iterator will certainly be less than the number. So
we'll keep multiplying, overflow again, and so on.
This patch changes the argument to a uintmax_t (the same
type we use to store the diffstat information for binary
filese), and counts "down" by repeatedly dividing our value
by 10.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-02-05 09:14:19 +01:00
|
|
|
extern int decimal_width(uintmax_t);
|
2012-10-26 17:53:52 +02:00
|
|
|
extern int check_pager_config(const char *cmd);
|
2016-02-16 23:34:44 +01:00
|
|
|
extern void prepare_pager_args(struct child_process *, const char *pager);
|
2006-02-28 20:26:21 +01:00
|
|
|
|
2008-02-16 06:01:41 +01:00
|
|
|
extern const char *editor_program;
|
2010-08-30 15:38:38 +02:00
|
|
|
extern const char *askpass_program;
|
2008-02-16 06:01:59 +01:00
|
|
|
extern const char *excludes_file;
|
2007-07-20 14:06:09 +02:00
|
|
|
|
binary patch.
This adds "binary patch" to the diff output and teaches apply
what to do with them.
On the diff generation side, traditionally, we said "Binary
files differ\n" without giving anything other than the preimage
and postimage object name on the index line. This was good
enough for applying a patch generated from your own repository
(very useful while rebasing), because the postimage would be
available in such a case. However, this was not useful when the
recipient of such a patch via e-mail were to apply it, even if
the preimage was available.
This patch allows the diff to generate "binary" patch when
operating under --full-index option. The binary patch follows
the usual extended git diff headers, and looks like this:
"GIT binary patch\n"
<length byte><data>"\n"
...
"\n"
Each line is prefixed with a "length-byte", whose value is upper
or lowercase alphabet that encodes number of bytes that the data
on the line decodes to (1..52 -- 'A' means 1, 'B' means 2, ...,
'Z' means 26, 'a' means 27, ...). <data> is 1 or more groups of
5-byte sequence, each of which encodes up to 4 bytes in base85
encoding. Because 52 / 4 * 5 = 65 and we have the length byte,
an output line is capped to 66 characters. The payload is the
same diff-delta as we use in the packfiles.
On the consumption side, git-apply now can decode and apply the
binary patch when --allow-binary-replacement is given, the diff
was generated with --full-index, and the receiving repository
has the preimage blob, which is the same condition as it always
required when accepting an "Binary files differ\n" patch.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-05-05 01:51:44 +02:00
|
|
|
/* base85 */
|
2007-04-10 00:56:33 +02:00
|
|
|
int decode_85(char *dst, const char *line, int linelen);
|
|
|
|
void encode_85(char *buf, const unsigned char *data, int bytes);
|
binary patch.
This adds "binary patch" to the diff output and teaches apply
what to do with them.
On the diff generation side, traditionally, we said "Binary
files differ\n" without giving anything other than the preimage
and postimage object name on the index line. This was good
enough for applying a patch generated from your own repository
(very useful while rebasing), because the postimage would be
available in such a case. However, this was not useful when the
recipient of such a patch via e-mail were to apply it, even if
the preimage was available.
This patch allows the diff to generate "binary" patch when
operating under --full-index option. The binary patch follows
the usual extended git diff headers, and looks like this:
"GIT binary patch\n"
<length byte><data>"\n"
...
"\n"
Each line is prefixed with a "length-byte", whose value is upper
or lowercase alphabet that encodes number of bytes that the data
on the line decodes to (1..52 -- 'A' means 1, 'B' means 2, ...,
'Z' means 26, 'a' means 27, ...). <data> is 1 or more groups of
5-byte sequence, each of which encodes up to 4 bytes in base85
encoding. Because 52 / 4 * 5 = 65 and we have the length byte,
an output line is capped to 66 characters. The payload is the
same diff-delta as we use in the packfiles.
On the consumption side, git-apply now can decode and apply the
binary patch when --allow-binary-replacement is given, the diff
was generated with --full-index, and the receiving repository
has the preimage blob, which is the same condition as it always
required when accepting an "Binary files differ\n" patch.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-05-05 01:51:44 +02:00
|
|
|
|
Add specialized object allocator
This creates a simple specialized object allocator for basic
objects.
This avoids wasting space with malloc overhead (metadata and
extra alignment), since the specialized allocator knows the
alignment, and that objects, once allocated, are never freed.
It also allows us to track some basic statistics about object
allocations. For example, for the mozilla import, it shows
object usage as follows:
blobs: 627629 (14710 kB)
trees: 1119035 (34969 kB)
commits: 196423 (8440 kB)
tags: 1336 (46 kB)
and the simpler allocator shaves off about 2.5% off the memory
footprint off a "git-rev-list --all --objects", and is a bit
faster too.
[ Side note: this concludes the series of "save memory in object storage".
The thing is, there simply isn't much more to be saved on the objects.
Doing "git-rev-list --all --objects" on the mozilla archive has a final
total RSS of 131498 pages for me: that's about 513MB. Of that, the
object overhead is now just 56MB, the rest is going somewhere else (put
another way: the fact that this patch shaves off 2.5% of the total
memory overhead, considering that objects are now not much more than 10%
of the total shows how big the wasted space really was: this makes
object allocations much more memory- and time-efficient).
I haven't looked at where the rest is, but I suspect the bulk of it is
just the pack-file loading. It may be that we should pack the tree
objects separately from the blob objects: for git-rev-list --objects, we
don't actually ever need to even look at the blobs, but since trees and
blobs are interspersed in the pack-file, we end up not being dense in
the tree accesses, so we end up looking at more pages than we strictly
need to.
So with a 535MB pack-file, it's entirely possible - even likely - that
most of the remaining RSS is just the mmap of the pack-file itself. We
don't need to map in _all_ of it, but we do end up mapping a fair
amount. ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-19 19:44:15 +02:00
|
|
|
/* alloc.c */
|
2007-04-17 07:11:43 +02:00
|
|
|
extern void *alloc_blob_node(void);
|
|
|
|
extern void *alloc_tree_node(void);
|
|
|
|
extern void *alloc_commit_node(void);
|
|
|
|
extern void *alloc_tag_node(void);
|
|
|
|
extern void *alloc_object_node(void);
|
Add specialized object allocator
This creates a simple specialized object allocator for basic
objects.
This avoids wasting space with malloc overhead (metadata and
extra alignment), since the specialized allocator knows the
alignment, and that objects, once allocated, are never freed.
It also allows us to track some basic statistics about object
allocations. For example, for the mozilla import, it shows
object usage as follows:
blobs: 627629 (14710 kB)
trees: 1119035 (34969 kB)
commits: 196423 (8440 kB)
tags: 1336 (46 kB)
and the simpler allocator shaves off about 2.5% off the memory
footprint off a "git-rev-list --all --objects", and is a bit
faster too.
[ Side note: this concludes the series of "save memory in object storage".
The thing is, there simply isn't much more to be saved on the objects.
Doing "git-rev-list --all --objects" on the mozilla archive has a final
total RSS of 131498 pages for me: that's about 513MB. Of that, the
object overhead is now just 56MB, the rest is going somewhere else (put
another way: the fact that this patch shaves off 2.5% of the total
memory overhead, considering that objects are now not much more than 10%
of the total shows how big the wasted space really was: this makes
object allocations much more memory- and time-efficient).
I haven't looked at where the rest is, but I suspect the bulk of it is
just the pack-file loading. It may be that we should pack the tree
objects separately from the blob objects: for git-rev-list --objects, we
don't actually ever need to even look at the blobs, but since trees and
blobs are interspersed in the pack-file, we end up not being dense in
the tree accesses, so we end up looking at more pages than we strictly
need to.
So with a 535MB pack-file, it's entirely possible - even likely - that
most of the remaining RSS is just the mmap of the pack-file itself. We
don't need to map in _all_ of it, but we do end up mapping a fair
amount. ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-19 19:44:15 +02:00
|
|
|
extern void alloc_report(void);
|
2014-07-13 08:42:08 +02:00
|
|
|
extern unsigned int alloc_commit_index(void);
|
Add specialized object allocator
This creates a simple specialized object allocator for basic
objects.
This avoids wasting space with malloc overhead (metadata and
extra alignment), since the specialized allocator knows the
alignment, and that objects, once allocated, are never freed.
It also allows us to track some basic statistics about object
allocations. For example, for the mozilla import, it shows
object usage as follows:
blobs: 627629 (14710 kB)
trees: 1119035 (34969 kB)
commits: 196423 (8440 kB)
tags: 1336 (46 kB)
and the simpler allocator shaves off about 2.5% off the memory
footprint off a "git-rev-list --all --objects", and is a bit
faster too.
[ Side note: this concludes the series of "save memory in object storage".
The thing is, there simply isn't much more to be saved on the objects.
Doing "git-rev-list --all --objects" on the mozilla archive has a final
total RSS of 131498 pages for me: that's about 513MB. Of that, the
object overhead is now just 56MB, the rest is going somewhere else (put
another way: the fact that this patch shaves off 2.5% of the total
memory overhead, considering that objects are now not much more than 10%
of the total shows how big the wasted space really was: this makes
object allocations much more memory- and time-efficient).
I haven't looked at where the rest is, but I suspect the bulk of it is
just the pack-file loading. It may be that we should pack the tree
objects separately from the blob objects: for git-rev-list --objects, we
don't actually ever need to even look at the blobs, but since trees and
blobs are interspersed in the pack-file, we end up not being dense in
the tree accesses, so we end up looking at more pages than we strictly
need to.
So with a 535MB pack-file, it's entirely possible - even likely - that
most of the remaining RSS is just the mmap of the pack-file itself. We
don't need to map in _all_ of it, but we do end up mapping a fair
amount. ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-19 19:44:15 +02:00
|
|
|
|
2014-06-11 09:56:49 +02:00
|
|
|
/* pkt-line.c */
|
2011-02-24 15:30:19 +01:00
|
|
|
void packet_trace_identity(const char *prog);
|
2006-09-02 18:23:48 +02:00
|
|
|
|
2007-11-18 10:12:04 +01:00
|
|
|
/* add */
|
2008-05-12 19:58:10 +02:00
|
|
|
/*
|
|
|
|
* return 0 if success, 1 - if addition of a file failed and
|
|
|
|
* ADD_FILES_IGNORE_ERRORS was specified in flags
|
|
|
|
*/
|
2016-09-14 23:07:47 +02:00
|
|
|
int add_files_to_cache(const char *prefix, const struct pathspec *pathspec, int flags);
|
2007-11-18 10:12:04 +01:00
|
|
|
|
2007-08-31 22:13:42 +02:00
|
|
|
/* diff.c */
|
|
|
|
extern int diff_auto_refresh_index;
|
|
|
|
|
2007-02-16 01:32:45 +01:00
|
|
|
/* match-trees.c */
|
2016-04-18 01:10:38 +02:00
|
|
|
void shift_tree(const struct object_id *, const struct object_id *, struct object_id *, int);
|
|
|
|
void shift_tree_by(const struct object_id *, const struct object_id *, struct object_id *, const char *);
|
2007-02-16 01:32:45 +01:00
|
|
|
|
2007-11-02 08:24:27 +01:00
|
|
|
/*
|
|
|
|
* whitespace rules.
|
|
|
|
* used by both diff and apply
|
2010-11-30 09:29:11 +01:00
|
|
|
* last two digits are tab width
|
2007-11-02 08:24:27 +01:00
|
|
|
*/
|
2010-11-30 09:29:11 +01:00
|
|
|
#define WS_BLANK_AT_EOL 0100
|
|
|
|
#define WS_SPACE_BEFORE_TAB 0200
|
|
|
|
#define WS_INDENT_WITH_NON_TAB 0400
|
|
|
|
#define WS_CR_AT_EOL 01000
|
|
|
|
#define WS_BLANK_AT_EOF 02000
|
|
|
|
#define WS_TAB_IN_INDENT 04000
|
2009-09-06 07:21:17 +02:00
|
|
|
#define WS_TRAILING_SPACE (WS_BLANK_AT_EOL|WS_BLANK_AT_EOF)
|
2010-11-30 09:29:11 +01:00
|
|
|
#define WS_DEFAULT_RULE (WS_TRAILING_SPACE|WS_SPACE_BEFORE_TAB|8)
|
|
|
|
#define WS_TAB_WIDTH_MASK 077
|
2017-06-30 02:06:53 +02:00
|
|
|
/* All WS_* -- when extended, adapt diff.c emit_symbol */
|
|
|
|
#define WS_RULE_MASK 07777
|
2007-12-06 09:14:14 +01:00
|
|
|
extern unsigned whitespace_rule_cfg;
|
|
|
|
extern unsigned whitespace_rule(const char *);
|
|
|
|
extern unsigned parse_whitespace_rule(const char *);
|
2008-06-27 00:35:21 +02:00
|
|
|
extern unsigned ws_check(const char *line, int len, unsigned ws_rule);
|
|
|
|
extern void ws_check_emit(const char *line, int len, unsigned ws_rule, FILE *stream, const char *set, const char *reset, const char *ws);
|
2007-12-13 14:32:29 +01:00
|
|
|
extern char *whitespace_error_string(unsigned ws);
|
2010-04-03 01:37:23 +02:00
|
|
|
extern void ws_fix_copy(struct strbuf *, const char *, int, unsigned, int *);
|
2008-06-27 00:36:59 +02:00
|
|
|
extern int ws_blank_line(const char *line, int len, unsigned ws_rule);
|
2010-11-30 09:29:11 +01:00
|
|
|
#define ws_tab_width(rule) ((rule) & WS_TAB_WIDTH_MASK)
|
2007-11-02 08:24:27 +01:00
|
|
|
|
2007-11-18 10:13:32 +01:00
|
|
|
/* ls-files */
|
2017-06-13 00:13:58 +02:00
|
|
|
void overlay_tree_on_index(struct index_state *istate,
|
|
|
|
const char *tree_name, const char *prefix);
|
2007-11-18 10:13:32 +01:00
|
|
|
|
2008-02-24 23:17:14 +01:00
|
|
|
char *alias_lookup(const char *alias);
|
2008-06-27 18:21:54 +02:00
|
|
|
int split_cmdline(char *cmdline, const char ***argv);
|
2010-08-07 07:13:39 +02:00
|
|
|
/* Takes a negative value returned by split_cmdline */
|
|
|
|
const char *split_cmdline_strerror(int cmdline_errno);
|
2008-02-24 23:17:14 +01:00
|
|
|
|
setup: make startup_info available everywhere
Commit a60645f (setup: remember whether repository was
found, 2010-08-05) introduced the startup_info structure,
which records some parts of the setup_git_directory()
process (notably, whether we actually found a repository or
not).
One of the uses of this data is for functions to behave
appropriately based on whether we are in a repo. But the
startup_info struct is just a pointer to storage provided by
the main program, and the only program that sets it up is
the git.c wrapper. Thus builtins have access to
startup_info, but externally linked programs do not.
Worse, library code which is accessible from both has to be
careful about accessing startup_info. This can be used to
trigger a die("BUG") via get_sha1():
$ git fast-import <<-\EOF
tag foo
from HEAD:./whatever
EOF
fatal: BUG: startup_info struct is not initialized.
Obviously that's fairly nonsensical input to feed to
fast-import, but we should never hit a die("BUG"). And there
may be other ways to trigger it if other non-builtins
resolve sha1s.
So let's point the storage for startup_info to a static
variable in setup.c, making it available to all users of the
library code. We _could_ turn startup_info into a regular
extern struct, but doing so would mean tweaking all of the
existing use sites. So let's leave the pointer indirection
in place. We can, however, drop any checks for NULL, as
they will always be false (and likewise, we can drop the
test covering this case, which was a rather artificial
situation using one of the test-* programs).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-03-05 23:10:27 +01:00
|
|
|
/* setup.c */
|
2010-08-06 04:40:35 +02:00
|
|
|
struct startup_info {
|
2010-08-06 04:46:33 +02:00
|
|
|
int have_repository;
|
2010-12-02 00:33:22 +01:00
|
|
|
const char *prefix;
|
2010-08-06 04:40:35 +02:00
|
|
|
};
|
|
|
|
extern struct startup_info *startup_info;
|
|
|
|
|
2012-10-26 17:53:49 +02:00
|
|
|
/* merge.c */
|
|
|
|
struct commit_list;
|
|
|
|
int try_merge_command(const char *strategy, size_t xopts_nr,
|
|
|
|
const char **xopts, struct commit_list *common,
|
|
|
|
const char *head_arg, struct commit_list *remotes);
|
2017-05-07 00:10:33 +02:00
|
|
|
int checkout_fast_forward(const struct object_id *from,
|
|
|
|
const struct object_id *to,
|
2012-10-26 17:53:49 +02:00
|
|
|
int overwrite_ignore);
|
|
|
|
|
2010-03-06 21:34:41 +01:00
|
|
|
|
2012-03-30 09:52:18 +02:00
|
|
|
int sane_execvp(const char *file, char *const argv[]);
|
|
|
|
|
2013-06-20 10:37:51 +02:00
|
|
|
/*
|
|
|
|
* A struct to encapsulate the concept of whether a file has changed
|
|
|
|
* since we last checked it. This uses criteria similar to those used
|
|
|
|
* for the index.
|
|
|
|
*/
|
|
|
|
struct stat_validity {
|
|
|
|
struct stat_data *sd;
|
|
|
|
};
|
|
|
|
|
|
|
|
void stat_validity_clear(struct stat_validity *sv);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Returns 1 if the path is a regular file (or a symlink to a regular
|
|
|
|
* file) and matches the saved stat_validity, 0 otherwise. A missing
|
|
|
|
* or inaccessible file is considered a match if the struct was just
|
|
|
|
* initialized, or if the previous update found an inaccessible file.
|
|
|
|
*/
|
|
|
|
int stat_validity_check(struct stat_validity *sv, const char *path);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Update the stat_validity from a file opened at descriptor fd. If
|
|
|
|
* the file is missing, inaccessible, or not a regular file, then
|
|
|
|
* future calls to stat_validity_check will match iff one of those
|
|
|
|
* conditions continues to be true.
|
|
|
|
*/
|
|
|
|
void stat_validity_update(struct stat_validity *sv, int fd);
|
|
|
|
|
2014-02-27 13:56:52 +01:00
|
|
|
int versioncmp(const char *s1, const char *s2);
|
2015-06-05 21:45:05 +02:00
|
|
|
void sleep_millisec(int millisec);
|
2014-02-27 13:56:52 +01:00
|
|
|
|
2015-11-10 12:42:38 +01:00
|
|
|
/*
|
|
|
|
* Create a directory and (if share is nonzero) adjust its permissions
|
|
|
|
* according to the shared_repository setting. Only use this for
|
|
|
|
* directories under $GIT_DIR. Don't use it for working tree
|
|
|
|
* directories.
|
|
|
|
*/
|
|
|
|
void safe_create_dir(const char *dir, int share);
|
|
|
|
|
2017-12-03 22:27:39 +01:00
|
|
|
/*
|
|
|
|
* Should we print an ellipsis after an abbreviated SHA-1 value
|
|
|
|
* when doing diff-raw output or indicating a detached HEAD?
|
|
|
|
*/
|
|
|
|
extern int print_sha1_ellipsis(void);
|
|
|
|
|
2005-04-08 00:13:13 +02:00
|
|
|
#endif /* CACHE_H */
|