2006-03-30 08:55:43 +02:00
|
|
|
#ifndef TREE_WALK_H
|
|
|
|
#define TREE_WALK_H
|
|
|
|
|
2023-02-24 01:09:25 +01:00
|
|
|
#include "hash.h"
|
|
|
|
|
|
|
|
struct index_state;
|
2018-08-15 19:54:05 +02:00
|
|
|
|
2020-02-01 12:39:22 +01:00
|
|
|
#define MAX_TRAVERSE_TREES 8
|
|
|
|
|
2019-11-17 22:04:57 +01:00
|
|
|
/**
|
|
|
|
* The tree walking API is used to traverse and inspect trees.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* An entry in a tree. Each entry has a sha1 identifier, pathname, and mode.
|
|
|
|
*/
|
2006-03-30 08:55:43 +02:00
|
|
|
struct name_entry {
|
2019-01-15 01:39:44 +01:00
|
|
|
struct object_id oid;
|
2006-03-30 08:55:43 +02:00
|
|
|
const char *path;
|
2019-01-15 01:39:44 +01:00
|
|
|
int pathlen;
|
2006-03-30 08:55:43 +02:00
|
|
|
unsigned int mode;
|
|
|
|
};
|
|
|
|
|
2019-11-17 22:04:57 +01:00
|
|
|
/**
|
|
|
|
* A semi-opaque data structure used to maintain the current state of the walk.
|
|
|
|
*/
|
2007-03-21 18:09:56 +01:00
|
|
|
struct tree_desc {
|
2019-11-17 22:04:57 +01:00
|
|
|
/*
|
|
|
|
* pointer into the memory representation of the tree. It always
|
|
|
|
* points at the current entry being visited.
|
|
|
|
*/
|
2007-03-21 18:09:56 +01:00
|
|
|
const void *buffer;
|
2019-11-17 22:04:57 +01:00
|
|
|
|
|
|
|
/* points to the current entry being visited. */
|
2007-03-21 18:09:56 +01:00
|
|
|
struct name_entry entry;
|
2019-11-17 22:04:57 +01:00
|
|
|
|
|
|
|
/* counts the number of bytes left in the `buffer`. */
|
2007-03-21 18:09:56 +01:00
|
|
|
unsigned int size;
|
tree-walk: add a mechanism for getting non-canonicalized modes
When using init_tree_desc() and tree_entry() to iterate over a tree, we
always canonicalize the modes coming out of the tree. This is a good
thing to prevent bugs or oddities in normal code paths, but it's
counter-productive for tools like fsck that want to see the exact
contents.
We can address this by adding an option to avoid the extra
canonicalization. A few notes on the implementation:
- I've attached the new option to the tree_desc struct itself. The
actual code change is in decode_tree_entry(), which is in turn
called by the public update_tree_entry(), tree_entry(), and
init_tree_desc() functions, plus their "gently" counterparts.
By letting it ride along in the struct, we can avoid changing the
signature of those functions, which are called many times. Plus it's
conceptually simpler: you really want a particular iteration of a
tree to be "raw" or not, rather than individual calls.
- We still have to set the new option somewhere. The struct is
initialized by init_tree_desc(). I added the new flags field only to
the "gently" version. That avoids disturbing the much more numerous
non-gentle callers, and it makes sense that anybody being careful
about looking at raw modes would also be careful about bogus trees
(i.e., the caller will be something like fsck in the first place).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-08-10 23:01:17 +02:00
|
|
|
|
|
|
|
/* option flags passed via init_tree_desc_gently() */
|
|
|
|
enum tree_desc_flags {
|
|
|
|
TREE_DESC_RAW_MODES = (1 << 0),
|
|
|
|
} flags;
|
2007-03-21 18:09:56 +01:00
|
|
|
};
|
|
|
|
|
2019-11-17 22:04:57 +01:00
|
|
|
/**
|
|
|
|
* Decode the entry currently being visited (the one pointed to by
|
|
|
|
* `tree_desc's` `entry` member) and return the sha1 of the entry. The
|
|
|
|
* `pathp` and `modep` arguments are set to the entry's pathname and mode
|
|
|
|
* respectively.
|
|
|
|
*/
|
2019-04-05 17:00:12 +02:00
|
|
|
static inline const struct object_id *tree_entry_extract(struct tree_desc *desc, const char **pathp, unsigned short *modep)
|
2007-03-21 18:09:56 +01:00
|
|
|
{
|
|
|
|
*pathp = desc->entry.path;
|
tree-walk: finally switch over tree descriptors to contain a pre-parsed entry
This continues 4651ece8 (Switch over tree descriptors to contain a
pre-parsed entry) and moves the only rest computational part
mode = canon_mode(mode)
from tree_entry_extract() to tree entry decode phase - to
decode_tree_entry().
The reason to do it, is that canon_mode() is at least 2 conditional
jumps for regular files, and that could be noticeable should canon_mode()
be invoked several times.
That does not matter for current Git codebase, where typical tree
traversal is
while (t->size) {
sha1 = tree_entry_extract(t, &path, &mode);
...
update_tree_entry(t);
}
i.e. we do t -> sha1,path.mode "extraction" only once per entry. In such
cases, it does not matter performance-wise, where that mode
canonicalization is done - either once in tree_entry_extract(), or once
in decode_tree_entry() called by update_tree_entry() - it is
approximately the same.
But for future code, which could need to work with several tree_desc's
in parallel, it could be handy to operate on tree_desc descriptors, and
do "extracts" only when needed, or at all, access only relevant part of
it through structure fields directly.
And for such situations, having canon_mode() be done once in decode
phase is better - we won't need to pay the performance price of 2 extra
conditional jumps on every t->mode access.
So let's move mode canonicalization to decode_tree_entry(). That was the
final bit. Now after tree entry is decoded, it is fully ready and could
be accessed either directly via field, or through tree_entry_extract()
which this time got really "totally trivial".
Signed-off-by: Kirill Smelkov <kirr@mns.spb.ru>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-02-06 12:36:31 +01:00
|
|
|
*modep = desc->entry.mode;
|
2019-01-15 01:39:44 +01:00
|
|
|
return &desc->entry.oid;
|
2007-03-21 18:09:56 +01:00
|
|
|
}
|
|
|
|
|
2019-11-17 22:04:57 +01:00
|
|
|
/**
|
|
|
|
* Calculate the length of a tree entry's pathname. This utilizes the
|
|
|
|
* memory structure of a tree entry to avoid the overhead of using a
|
|
|
|
* generic strlen().
|
|
|
|
*/
|
2011-10-24 08:36:09 +02:00
|
|
|
static inline int tree_entry_len(const struct name_entry *ne)
|
2007-03-18 04:06:24 +01:00
|
|
|
{
|
2019-01-15 01:39:44 +01:00
|
|
|
return ne->pathlen;
|
2007-03-18 04:06:24 +01:00
|
|
|
}
|
|
|
|
|
2016-09-27 22:59:51 +02:00
|
|
|
/*
|
|
|
|
* The _gently versions of these functions warn and return false on a
|
|
|
|
* corrupt tree entry rather than dying,
|
|
|
|
*/
|
|
|
|
|
2019-11-17 22:04:57 +01:00
|
|
|
/**
|
|
|
|
* Walk to the next entry in a tree. This is commonly used in conjunction
|
|
|
|
* with `tree_entry_extract` to inspect the current entry.
|
|
|
|
*/
|
2006-03-30 08:55:43 +02:00
|
|
|
void update_tree_entry(struct tree_desc *);
|
2019-11-17 22:04:57 +01:00
|
|
|
|
2016-09-27 22:59:51 +02:00
|
|
|
int update_tree_entry_gently(struct tree_desc *);
|
2019-11-17 22:04:57 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Initialize a `tree_desc` and decode its first entry. The buffer and
|
|
|
|
* size parameters are assumed to be the same as the buffer and size
|
|
|
|
* members of `struct tree`.
|
|
|
|
*/
|
2007-03-21 18:08:25 +01:00
|
|
|
void init_tree_desc(struct tree_desc *desc, const void *buf, unsigned long size);
|
2019-11-17 22:04:57 +01:00
|
|
|
|
tree-walk: add a mechanism for getting non-canonicalized modes
When using init_tree_desc() and tree_entry() to iterate over a tree, we
always canonicalize the modes coming out of the tree. This is a good
thing to prevent bugs or oddities in normal code paths, but it's
counter-productive for tools like fsck that want to see the exact
contents.
We can address this by adding an option to avoid the extra
canonicalization. A few notes on the implementation:
- I've attached the new option to the tree_desc struct itself. The
actual code change is in decode_tree_entry(), which is in turn
called by the public update_tree_entry(), tree_entry(), and
init_tree_desc() functions, plus their "gently" counterparts.
By letting it ride along in the struct, we can avoid changing the
signature of those functions, which are called many times. Plus it's
conceptually simpler: you really want a particular iteration of a
tree to be "raw" or not, rather than individual calls.
- We still have to set the new option somewhere. The struct is
initialized by init_tree_desc(). I added the new flags field only to
the "gently" version. That avoids disturbing the much more numerous
non-gentle callers, and it makes sense that anybody being careful
about looking at raw modes would also be careful about bogus trees
(i.e., the caller will be something like fsck in the first place).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-08-10 23:01:17 +02:00
|
|
|
int init_tree_desc_gently(struct tree_desc *desc, const void *buf, unsigned long size,
|
|
|
|
enum tree_desc_flags flags);
|
2006-03-30 08:55:43 +02:00
|
|
|
|
2010-08-25 04:53:11 +02:00
|
|
|
/*
|
2019-11-17 22:04:57 +01:00
|
|
|
* Visit the next entry in a tree. Returns 1 when there are more entries
|
|
|
|
* left to visit and 0 when all entries have been visited. This is
|
|
|
|
* commonly used in the test of a while loop.
|
2010-08-25 04:53:11 +02:00
|
|
|
*/
|
tree_entry(): new tree-walking helper function
This adds a "tree_entry()" function that combines the common operation of
doing a "tree_entry_extract()" + "update_tree_entry()".
It also has a simplified calling convention, designed for simple loops
that traverse over a whole tree: the arguments are pointers to the tree
descriptor and a name_entry structure to fill in, and it returns a boolean
"true" if there was an entry left to be gotten in the tree.
This allows tree traversal with
struct tree_desc desc;
struct name_entry entry;
desc.buf = tree->buffer;
desc.size = tree->size;
while (tree_entry(&desc, &entry) {
... use "entry.{path, sha1, mode, pathlen}" ...
}
which is not only shorter than writing it out in full, it's hopefully less
error prone too.
[ It's actually a tad faster too - we don't need to recalculate the entry
pathlength in both extract and update, but need to do it only once.
Also, some callers can avoid doing a "strlen()" on the result, since
it's returned as part of the name_entry structure.
However, by now we're talking just 1% speedup on "git-rev-list --objects
--all", and we're definitely at the point where tree walking is no
longer the issue any more. ]
NOTE! Not everybody wants to use this new helper function, since some of
the tree walkers very much on purpose do the descriptor update separately
from the entry extraction. So the "extract + update" sequence still
remains as the core sequence, this is just a simplified interface.
We should probably add a silly two-line inline helper function for
initializing the descriptor from the "struct tree" too, just to cut down
on the noise from that common "desc" initializer.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-05-30 18:45:45 +02:00
|
|
|
int tree_entry(struct tree_desc *, struct name_entry *);
|
2019-11-17 22:04:57 +01:00
|
|
|
|
2016-09-27 22:59:51 +02:00
|
|
|
int tree_entry_gently(struct tree_desc *, struct name_entry *);
|
tree_entry(): new tree-walking helper function
This adds a "tree_entry()" function that combines the common operation of
doing a "tree_entry_extract()" + "update_tree_entry()".
It also has a simplified calling convention, designed for simple loops
that traverse over a whole tree: the arguments are pointers to the tree
descriptor and a name_entry structure to fill in, and it returns a boolean
"true" if there was an entry left to be gotten in the tree.
This allows tree traversal with
struct tree_desc desc;
struct name_entry entry;
desc.buf = tree->buffer;
desc.size = tree->size;
while (tree_entry(&desc, &entry) {
... use "entry.{path, sha1, mode, pathlen}" ...
}
which is not only shorter than writing it out in full, it's hopefully less
error prone too.
[ It's actually a tad faster too - we don't need to recalculate the entry
pathlength in both extract and update, but need to do it only once.
Also, some callers can avoid doing a "strlen()" on the result, since
it's returned as part of the name_entry structure.
However, by now we're talking just 1% speedup on "git-rev-list --objects
--all", and we're definitely at the point where tree walking is no
longer the issue any more. ]
NOTE! Not everybody wants to use this new helper function, since some of
the tree walkers very much on purpose do the descriptor update separately
from the entry extraction. So the "extract + update" sequence still
remains as the core sequence, this is just a simplified interface.
We should probably add a silly two-line inline helper function for
initializing the descriptor from the "struct tree" too, just to cut down
on the noise from that common "desc" initializer.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-05-30 18:45:45 +02:00
|
|
|
|
2019-11-17 22:04:57 +01:00
|
|
|
/**
|
|
|
|
* Initialize a `tree_desc` and decode its first entry given the
|
|
|
|
* object ID of a tree. Returns the `buffer` member if the latter
|
|
|
|
* is a valid tree identifier and NULL otherwise.
|
|
|
|
*/
|
2019-06-27 11:28:48 +02:00
|
|
|
void *fill_tree_descriptor(struct repository *r,
|
|
|
|
struct tree_desc *desc,
|
|
|
|
const struct object_id *oid);
|
2006-03-30 08:55:43 +02:00
|
|
|
|
2008-03-06 03:59:29 +01:00
|
|
|
struct traverse_info;
|
2008-03-06 05:06:18 +01:00
|
|
|
typedef int (*traverse_callback_t)(int n, unsigned long mask, unsigned long dirmask, struct name_entry *entry, struct traverse_info *);
|
2019-11-17 22:04:57 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Traverse `n` number of trees in parallel. The `fn` callback member of
|
|
|
|
* `traverse_info` is called once for each tree entry.
|
|
|
|
*/
|
2018-11-18 17:47:57 +01:00
|
|
|
int traverse_trees(struct index_state *istate, int n, struct tree_desc *t, struct traverse_info *info);
|
2006-03-30 08:55:43 +02:00
|
|
|
|
2019-06-27 11:28:50 +02:00
|
|
|
enum get_oid_result get_tree_entry_follow_symlinks(struct repository *r, struct object_id *tree_oid, const char *name, struct object_id *result, struct strbuf *result_path, unsigned short *mode);
|
2015-05-20 19:03:38 +02:00
|
|
|
|
2019-11-17 22:04:57 +01:00
|
|
|
/**
|
|
|
|
* A structure used to maintain the state of a traversal.
|
|
|
|
*/
|
2008-03-06 03:59:29 +01:00
|
|
|
struct traverse_info {
|
2015-12-21 23:34:20 +01:00
|
|
|
const char *traverse_path;
|
2019-11-17 22:04:57 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* points to the traverse_info which was used to descend into the
|
|
|
|
* current tree. If this is the top-level tree `prev` will point to
|
|
|
|
* a dummy traverse_info.
|
|
|
|
*/
|
2008-03-06 03:59:29 +01:00
|
|
|
struct traverse_info *prev;
|
2019-11-17 22:04:57 +01:00
|
|
|
|
|
|
|
/* is the entry for the current tree (if the tree is a subtree). */
|
2019-07-31 06:38:15 +02:00
|
|
|
const char *name;
|
2019-11-17 22:04:57 +01:00
|
|
|
|
2019-07-31 06:38:15 +02:00
|
|
|
size_t namelen;
|
|
|
|
unsigned mode;
|
|
|
|
|
2019-11-17 22:04:57 +01:00
|
|
|
/* is the length of the full path for the current tree. */
|
2019-07-31 06:38:18 +02:00
|
|
|
size_t pathlen;
|
2019-11-17 22:04:57 +01:00
|
|
|
|
2011-08-29 21:26:05 +02:00
|
|
|
struct pathspec *pathspec;
|
2008-03-06 03:59:29 +01:00
|
|
|
|
2019-11-17 22:04:57 +01:00
|
|
|
/* can be used by callbacks to maintain directory-file conflicts. */
|
2013-06-16 01:44:43 +02:00
|
|
|
unsigned long df_conflicts;
|
2019-11-17 22:04:57 +01:00
|
|
|
|
|
|
|
/* a callback called for each entry in the tree.
|
|
|
|
*
|
|
|
|
* The arguments passed to the traverse callback are as follows:
|
|
|
|
*
|
|
|
|
* - `n` counts the number of trees being traversed.
|
|
|
|
*
|
|
|
|
* - `mask` has its nth bit set if something exists in the nth entry.
|
|
|
|
*
|
|
|
|
* - `dirmask` has its nth bit set if the nth tree's entry is a directory.
|
|
|
|
*
|
|
|
|
* - `entry` is an array of size `n` where the nth entry is from the nth tree.
|
|
|
|
*
|
|
|
|
* - `info` maintains the state of the traversal.
|
|
|
|
*
|
|
|
|
* Returning a negative value will terminate the traversal. Otherwise the
|
|
|
|
* return value is treated as an update mask. If the nth bit is set the nth tree
|
|
|
|
* will be updated and if the bit is not set the nth tree entry will be the
|
|
|
|
* same in the next callback invocation.
|
|
|
|
*/
|
2008-03-06 03:59:29 +01:00
|
|
|
traverse_callback_t fn;
|
2019-11-17 22:04:57 +01:00
|
|
|
|
|
|
|
/* can be anything the `fn` callback would want to use. */
|
2008-03-06 03:59:29 +01:00
|
|
|
void *data;
|
2019-11-17 22:04:57 +01:00
|
|
|
|
|
|
|
/* tells whether to stop at the first error or not. */
|
2010-08-11 10:38:07 +02:00
|
|
|
int show_all_errors;
|
2008-03-06 03:59:29 +01:00
|
|
|
};
|
2006-03-30 08:55:43 +02:00
|
|
|
|
2019-11-17 22:04:57 +01:00
|
|
|
/**
|
|
|
|
* Find an entry in a tree given a pathname and the sha1 of a tree to
|
|
|
|
* search. Returns 0 if the entry is found and -1 otherwise. The third
|
|
|
|
* and fourth parameters are set to the entry's sha1 and mode respectively.
|
|
|
|
*/
|
2019-06-27 11:28:49 +02:00
|
|
|
int get_tree_entry(struct repository *, const struct object_id *, const char *, struct object_id *, unsigned short *);
|
2019-11-17 22:04:57 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Generate the full pathname of a tree entry based from the root of the
|
|
|
|
* traversal. For example, if the traversal has recursed into another
|
|
|
|
* tree named "bar" the pathname of an entry "baz" in the "bar"
|
|
|
|
* tree would be "bar/baz".
|
|
|
|
*/
|
2019-07-31 06:38:25 +02:00
|
|
|
char *make_traverse_path(char *path, size_t pathlen, const struct traverse_info *info,
|
2019-07-31 06:38:15 +02:00
|
|
|
const char *name, size_t namelen);
|
2019-11-17 22:04:57 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Convenience wrapper to `make_traverse_path` into a strbuf.
|
|
|
|
*/
|
2019-07-31 06:38:23 +02:00
|
|
|
void strbuf_make_traverse_path(struct strbuf *out,
|
|
|
|
const struct traverse_info *info,
|
|
|
|
const char *name, size_t namelen);
|
2019-11-17 22:04:57 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Initialize a `traverse_info` given the pathname of the tree to start
|
|
|
|
* traversing from.
|
|
|
|
*/
|
2019-04-29 10:28:14 +02:00
|
|
|
void setup_traverse_info(struct traverse_info *info, const char *base);
|
2008-03-06 03:59:29 +01:00
|
|
|
|
2019-11-17 22:04:57 +01:00
|
|
|
/**
|
|
|
|
* Calculate the length of a pathname returned by `make_traverse_path`.
|
|
|
|
* This utilizes the memory structure of a tree entry to avoid the
|
|
|
|
* overhead of using a generic strlen().
|
|
|
|
*/
|
2019-07-31 06:38:20 +02:00
|
|
|
static inline size_t traverse_path_len(const struct traverse_info *info,
|
|
|
|
size_t namelen)
|
2008-03-06 03:59:29 +01:00
|
|
|
{
|
2019-07-31 06:38:20 +02:00
|
|
|
return st_add(info->pathlen, namelen);
|
2008-03-06 03:59:29 +01:00
|
|
|
}
|
2006-04-19 23:05:47 +02:00
|
|
|
|
2011-10-24 08:36:10 +02:00
|
|
|
/* in general, positive means "kind of interesting" */
|
|
|
|
enum interesting {
|
|
|
|
all_entries_not_interesting = -1, /* no, and no subsequent entries will be either */
|
|
|
|
entry_not_interesting = 0,
|
|
|
|
entry_interesting = 1,
|
|
|
|
all_entries_interesting = 2 /* yes, and all subsequent entries will be */
|
|
|
|
};
|
|
|
|
|
2018-11-18 17:47:57 +01:00
|
|
|
enum interesting tree_entry_interesting(struct index_state *istate,
|
|
|
|
const struct name_entry *,
|
|
|
|
struct strbuf *, int,
|
|
|
|
const struct pathspec *ps);
|
2010-12-15 16:02:40 +01:00
|
|
|
|
2006-03-30 08:55:43 +02:00
|
|
|
#endif
|