2023-04-11 09:41:50 +02:00
|
|
|
#include "git-compat-util.h"
|
2017-06-14 20:07:36 +02:00
|
|
|
#include "config.h"
|
2023-03-21 07:26:03 +01:00
|
|
|
#include "environment.h"
|
2023-02-24 01:09:27 +01:00
|
|
|
#include "hex.h"
|
2009-10-09 12:21:57 +02:00
|
|
|
#include "notes.h"
|
2023-04-11 09:41:49 +02:00
|
|
|
#include "object-name.h"
|
2018-05-16 01:42:15 +02:00
|
|
|
#include "object-store.h"
|
Refactor notes concatenation into a flexible interface for combining notes
When adding a note to an object that already has an existing note, the
current solution is to concatenate the contents of the two notes. However,
the caller may instead wish to _overwrite_ the existing note with the new
note, or maybe even _ignore_ the new note, and keep the existing one. There
might also be other ways of combining notes that are only known to the
caller.
Therefore, instead of unconditionally concatenating notes, we let the caller
specify how to combine notes, by passing in a pointer to a function for
combining notes. The caller may choose to implement its own function for
notes combining, but normally one of the following three conveniently
supplied notes combination functions will be sufficient:
- combine_notes_concatenate() combines the two notes by appending the
contents of the new note to the contents of the existing note.
- combine_notes_overwrite() replaces the existing note with the new note.
- combine_notes_ignore() keeps the existing note, and ignores the new note.
A combine_notes function can be passed to init_notes() to choose a default
combine_notes function for that notes tree. If NULL is given, the notes tree
falls back to combine_notes_concatenate() as the ultimate default.
A combine_notes function can also be passed directly to add_note(), to
control the notes combining behaviour for a note addition in particular.
If NULL is passed, the combine_notes function registered for the given
notes tree is used.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:19 +01:00
|
|
|
#include "blob.h"
|
2010-02-13 22:28:17 +01:00
|
|
|
#include "tree.h"
|
2009-10-09 12:21:57 +02:00
|
|
|
#include "utf8.h"
|
|
|
|
#include "strbuf.h"
|
2009-10-09 12:21:59 +02:00
|
|
|
#include "tree-walk.h"
|
2010-03-12 18:04:26 +01:00
|
|
|
#include "string-list.h"
|
|
|
|
#include "refs.h"
|
2009-10-09 12:21:59 +02:00
|
|
|
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
/*
|
|
|
|
* Use a non-balancing simple 16-tree structure with struct int_node as
|
|
|
|
* internal nodes, and struct leaf_node as leaf nodes. Each int_node has a
|
|
|
|
* 16-array of pointers to its children.
|
|
|
|
* The bottom 2 bits of each pointer is used to identify the pointer type
|
|
|
|
* - ptr & 3 == 0 - NULL pointer, assert(ptr == NULL)
|
|
|
|
* - ptr & 3 == 1 - pointer to next internal node - cast to struct int_node *
|
|
|
|
* - ptr & 3 == 2 - pointer to note entry - cast to struct leaf_node *
|
|
|
|
* - ptr & 3 == 3 - pointer to subtree entry - cast to struct leaf_node *
|
|
|
|
*
|
|
|
|
* The root node is a statically allocated struct int_node.
|
|
|
|
*/
|
|
|
|
struct int_node {
|
|
|
|
void *a[16];
|
2009-10-09 12:21:59 +02:00
|
|
|
};
|
|
|
|
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
/*
|
|
|
|
* Leaf nodes come in two variants, note entries and subtree entries,
|
|
|
|
* distinguished by the LSb of the leaf node pointer (see above).
|
2010-02-13 22:28:10 +01:00
|
|
|
* As a note entry, the key is the SHA1 of the referenced object, and the
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
* value is the SHA1 of the note object.
|
|
|
|
* As a subtree entry, the key is the prefix SHA1 (w/trailing NULs) of the
|
2010-02-13 22:28:10 +01:00
|
|
|
* referenced object, using the last byte of the key to store the length of
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
* the prefix. The value is the SHA1 of the tree object containing the notes
|
|
|
|
* subtree.
|
|
|
|
*/
|
|
|
|
struct leaf_node {
|
2017-05-30 19:30:37 +02:00
|
|
|
struct object_id key_oid;
|
|
|
|
struct object_id val_oid;
|
2009-10-09 12:21:59 +02:00
|
|
|
};
|
2009-10-09 12:21:57 +02:00
|
|
|
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
/*
|
|
|
|
* A notes tree may contain entries that are not notes, and that do not follow
|
|
|
|
* the naming conventions of notes. There are typically none/few of these, but
|
|
|
|
* we still need to keep track of them. Keep a simple linked list sorted alpha-
|
|
|
|
* betically on the non-note path. The list is populated when parsing tree
|
|
|
|
* objects in load_subtree(), and the non-notes are correctly written back into
|
|
|
|
* the tree objects produced by write_notes_tree().
|
|
|
|
*/
|
|
|
|
struct non_note {
|
|
|
|
struct non_note *next; /* grounded (last->next == NULL) */
|
|
|
|
char *path;
|
|
|
|
unsigned int mode;
|
2017-05-30 19:30:37 +02:00
|
|
|
struct object_id oid;
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
};
|
|
|
|
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
#define PTR_TYPE_NULL 0
|
|
|
|
#define PTR_TYPE_INTERNAL 1
|
|
|
|
#define PTR_TYPE_NOTE 2
|
|
|
|
#define PTR_TYPE_SUBTREE 3
|
2009-10-09 12:21:59 +02:00
|
|
|
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
#define GET_PTR_TYPE(ptr) ((uintptr_t) (ptr) & 3)
|
|
|
|
#define CLR_PTR_TYPE(ptr) ((void *) ((uintptr_t) (ptr) & ~3))
|
|
|
|
#define SET_PTR_TYPE(ptr, type) ((void *) ((uintptr_t) (ptr) | (type)))
|
2009-10-09 12:21:59 +02:00
|
|
|
|
2017-08-26 10:28:01 +02:00
|
|
|
#define GET_NIBBLE(n, sha1) ((((sha1)[(n) >> 1]) >> ((~(n) & 0x01) << 2)) & 0x0f)
|
2009-10-09 12:21:59 +02:00
|
|
|
|
2019-02-19 01:05:01 +01:00
|
|
|
#define KEY_INDEX (the_hash_algo->rawsz - 1)
|
|
|
|
#define FANOUT_PATH_SEPARATORS (the_hash_algo->rawsz - 1)
|
|
|
|
#define FANOUT_PATH_SEPARATORS_MAX ((GIT_MAX_HEXSZ / 2) - 1)
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
#define SUBTREE_SHA1_PREFIXCMP(key_sha1, subtree_sha1) \
|
2017-05-30 19:30:38 +02:00
|
|
|
(memcmp(key_sha1, subtree_sha1, subtree_sha1[KEY_INDEX]))
|
2009-10-09 12:21:59 +02:00
|
|
|
|
2010-02-13 22:28:18 +01:00
|
|
|
struct notes_tree default_notes_tree;
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
|
2016-06-13 12:04:20 +02:00
|
|
|
static struct string_list display_notes_refs = STRING_LIST_INIT_NODUP;
|
2010-03-12 18:04:26 +01:00
|
|
|
static struct notes_tree **display_notes_trees;
|
|
|
|
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
static void load_subtree(struct notes_tree *t, struct leaf_node *subtree,
|
|
|
|
struct int_node *node, unsigned int n);
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
|
|
|
|
/*
|
2009-10-09 12:22:09 +02:00
|
|
|
* Search the tree until the appropriate location for the given key is found:
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
* 1. Start at the root node, with n = 0
|
2009-10-09 12:22:09 +02:00
|
|
|
* 2. If a[0] at the current level is a matching subtree entry, unpack that
|
|
|
|
* subtree entry and remove it; restart search at the current level.
|
|
|
|
* 3. Use the nth nibble of the key as an index into a:
|
|
|
|
* - If a[n] is an int_node, recurse from #2 into that node and increment n
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
* - If a matching subtree entry, unpack that subtree entry (and remove it);
|
|
|
|
* restart search at the current level.
|
2009-10-09 12:22:09 +02:00
|
|
|
* - Otherwise, we have found one of the following:
|
|
|
|
* - a subtree entry which does not match the key
|
|
|
|
* - a note entry which may or may not match the key
|
|
|
|
* - an unused leaf node (NULL)
|
|
|
|
* In any case, set *tree and *n, and return pointer to the tree location.
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
*/
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
static void **note_tree_search(struct notes_tree *t, struct int_node **tree,
|
2009-10-09 12:22:09 +02:00
|
|
|
unsigned char *n, const unsigned char *key_sha1)
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
{
|
|
|
|
struct leaf_node *l;
|
2009-10-09 12:22:09 +02:00
|
|
|
unsigned char i;
|
|
|
|
void *p = (*tree)->a[0];
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
|
2009-10-09 12:22:09 +02:00
|
|
|
if (GET_PTR_TYPE(p) == PTR_TYPE_SUBTREE) {
|
|
|
|
l = (struct leaf_node *) CLR_PTR_TYPE(p);
|
2017-05-30 19:30:37 +02:00
|
|
|
if (!SUBTREE_SHA1_PREFIXCMP(key_sha1, l->key_oid.hash)) {
|
2009-10-09 12:22:09 +02:00
|
|
|
/* unpack tree and resume search */
|
|
|
|
(*tree)->a[0] = NULL;
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
load_subtree(t, l, *tree, *n);
|
2009-10-09 12:22:09 +02:00
|
|
|
free(l);
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
return note_tree_search(t, tree, n, key_sha1);
|
2009-10-09 12:22:09 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
i = GET_NIBBLE(*n, key_sha1);
|
|
|
|
p = (*tree)->a[i];
|
2010-02-13 22:28:09 +01:00
|
|
|
switch (GET_PTR_TYPE(p)) {
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
case PTR_TYPE_INTERNAL:
|
2009-10-09 12:22:09 +02:00
|
|
|
*tree = CLR_PTR_TYPE(p);
|
|
|
|
(*n)++;
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
return note_tree_search(t, tree, n, key_sha1);
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
case PTR_TYPE_SUBTREE:
|
|
|
|
l = (struct leaf_node *) CLR_PTR_TYPE(p);
|
2017-05-30 19:30:37 +02:00
|
|
|
if (!SUBTREE_SHA1_PREFIXCMP(key_sha1, l->key_oid.hash)) {
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
/* unpack tree and resume search */
|
2009-10-09 12:22:09 +02:00
|
|
|
(*tree)->a[i] = NULL;
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
load_subtree(t, l, *tree, *n);
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
free(l);
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
return note_tree_search(t, tree, n, key_sha1);
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
}
|
2009-10-09 12:22:09 +02:00
|
|
|
/* fall through */
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
default:
|
2009-10-09 12:22:09 +02:00
|
|
|
return &((*tree)->a[i]);
|
2009-10-09 12:21:59 +02:00
|
|
|
}
|
2009-10-09 12:22:09 +02:00
|
|
|
}
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
|
2009-10-09 12:22:09 +02:00
|
|
|
/*
|
|
|
|
* To find a leaf_node:
|
|
|
|
* Search to the tree location appropriate for the given key:
|
|
|
|
* If a note entry with matching key, return the note entry, else return NULL.
|
|
|
|
*/
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
static struct leaf_node *note_tree_find(struct notes_tree *t,
|
|
|
|
struct int_node *tree, unsigned char n,
|
2009-10-09 12:22:09 +02:00
|
|
|
const unsigned char *key_sha1)
|
|
|
|
{
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
void **p = note_tree_search(t, &tree, &n, key_sha1);
|
2009-10-09 12:22:09 +02:00
|
|
|
if (GET_PTR_TYPE(*p) == PTR_TYPE_NOTE) {
|
|
|
|
struct leaf_node *l = (struct leaf_node *) CLR_PTR_TYPE(*p);
|
2018-08-28 23:22:44 +02:00
|
|
|
if (hasheq(key_sha1, l->key_oid.hash))
|
2009-10-09 12:22:09 +02:00
|
|
|
return l;
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
}
|
|
|
|
return NULL;
|
2009-10-09 12:21:59 +02:00
|
|
|
}
|
|
|
|
|
2010-02-13 22:28:14 +01:00
|
|
|
/*
|
|
|
|
* How to consolidate an int_node:
|
|
|
|
* If there are > 1 non-NULL entries, give up and return non-zero.
|
|
|
|
* Otherwise replace the int_node at the given index in the given parent node
|
notes: do not break note_tree structure in note_tree_consolidate()
After a note is removed, note_tree_consolidate is called to eliminate
some useless nodes. The typical case is that if you had an int_node
with 2 PTR_TYPE_NOTEs in it, and remove one of them, then the
PTR_TYPE_INTERNAL pointer in the parent tree can be replaced with the
remaining PTR_TYPE_NOTE.
This works fine when PTR_TYPE_NOTEs are involved, but falls flat when
other types are involved.
To put things in more practical terms, let's say we start from an empty
notes tree, and add 3 notes:
- one for a sha1 that starts with 424
- one for a sha1 that starts with 428
- one for a sha1 that starts with 4c
To keep track of this, note_tree.root will have a PTR_TYPE_INTERNAL at
a[4], pointing to an int_node*.
In turn, that int_node* will have a PTR_TYPE_NOTE at a[0xc], pointing to
the leaf_node* with the key and value, and a PTR_TYPE_INTERNAL at a[2],
pointing to another int_node*.
That other int_node* will have 2 PTR_TYPE_NOTE, one at a[4] and the
other at a[8].
When looking for the note for the sha1 starting with 428, get_note() will
recurse through (simplified) root.a[4].a[2].a[8].
Now, if we remove the note for the sha1 that starts with 4c, we're left
with a int_node* with only one PTR_TYPE_INTERNAL entry in it. After
note_tree_consolidate runs, root.a[4] now points to what used to be
pointed at by root.a[4].a[2].
Which means looking up for the note for the sha1 starting with 428 now
fails because there is nothing at root.a[4].a[2] anymore: there is only
root.a[4].a[4] and root.a[4].a[8], which don't match the expected
structure for the lookup.
So if all there is left in an int_node* is a PTR_TYPE_INTERNAL pointer,
we can't safely remove it. I think the same applies for PTR_TYPE_SUBTREE
pointers. IOW, only PTR_TYPE_NOTEs are safe to be moved to the parent
int_node*.
This doesn't have a practical effect on git because all that happens
after a remove_note is a write_notes_tree, which just iterates the entire
note tree, but this affects anything using libgit.a that would try to do
lookups after removing notes.
Signed-off-by: Mike Hommey <mh@glandium.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-26 03:52:12 +02:00
|
|
|
* with the only NOTE entry (or a NULL entry if no entries) from the given
|
|
|
|
* tree, and return 0.
|
2010-02-13 22:28:14 +01:00
|
|
|
*/
|
|
|
|
static int note_tree_consolidate(struct int_node *tree,
|
|
|
|
struct int_node *parent, unsigned char index)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
void *p = NULL;
|
|
|
|
|
|
|
|
assert(tree && parent);
|
|
|
|
assert(CLR_PTR_TYPE(parent->a[index]) == tree);
|
|
|
|
|
|
|
|
for (i = 0; i < 16; i++) {
|
|
|
|
if (GET_PTR_TYPE(tree->a[i]) != PTR_TYPE_NULL) {
|
|
|
|
if (p) /* more than one entry */
|
|
|
|
return -2;
|
|
|
|
p = tree->a[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
notes: do not break note_tree structure in note_tree_consolidate()
After a note is removed, note_tree_consolidate is called to eliminate
some useless nodes. The typical case is that if you had an int_node
with 2 PTR_TYPE_NOTEs in it, and remove one of them, then the
PTR_TYPE_INTERNAL pointer in the parent tree can be replaced with the
remaining PTR_TYPE_NOTE.
This works fine when PTR_TYPE_NOTEs are involved, but falls flat when
other types are involved.
To put things in more practical terms, let's say we start from an empty
notes tree, and add 3 notes:
- one for a sha1 that starts with 424
- one for a sha1 that starts with 428
- one for a sha1 that starts with 4c
To keep track of this, note_tree.root will have a PTR_TYPE_INTERNAL at
a[4], pointing to an int_node*.
In turn, that int_node* will have a PTR_TYPE_NOTE at a[0xc], pointing to
the leaf_node* with the key and value, and a PTR_TYPE_INTERNAL at a[2],
pointing to another int_node*.
That other int_node* will have 2 PTR_TYPE_NOTE, one at a[4] and the
other at a[8].
When looking for the note for the sha1 starting with 428, get_note() will
recurse through (simplified) root.a[4].a[2].a[8].
Now, if we remove the note for the sha1 that starts with 4c, we're left
with a int_node* with only one PTR_TYPE_INTERNAL entry in it. After
note_tree_consolidate runs, root.a[4] now points to what used to be
pointed at by root.a[4].a[2].
Which means looking up for the note for the sha1 starting with 428 now
fails because there is nothing at root.a[4].a[2] anymore: there is only
root.a[4].a[4] and root.a[4].a[8], which don't match the expected
structure for the lookup.
So if all there is left in an int_node* is a PTR_TYPE_INTERNAL pointer,
we can't safely remove it. I think the same applies for PTR_TYPE_SUBTREE
pointers. IOW, only PTR_TYPE_NOTEs are safe to be moved to the parent
int_node*.
This doesn't have a practical effect on git because all that happens
after a remove_note is a write_notes_tree, which just iterates the entire
note tree, but this affects anything using libgit.a that would try to do
lookups after removing notes.
Signed-off-by: Mike Hommey <mh@glandium.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-26 03:52:12 +02:00
|
|
|
if (p && (GET_PTR_TYPE(p) != PTR_TYPE_NOTE))
|
|
|
|
return -2;
|
2010-02-13 22:28:14 +01:00
|
|
|
/* replace tree with p in parent[index] */
|
|
|
|
parent->a[index] = p;
|
|
|
|
free(tree);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* To remove a leaf_node:
|
|
|
|
* Search to the tree location appropriate for the given leaf_node's key:
|
|
|
|
* - If location does not hold a matching entry, abort and do nothing.
|
2010-08-31 17:56:50 +02:00
|
|
|
* - Copy the matching entry's value into the given entry.
|
2010-02-13 22:28:14 +01:00
|
|
|
* - Replace the matching leaf_node with a NULL entry (and free the leaf_node).
|
|
|
|
* - Consolidate int_nodes repeatedly, while walking up the tree towards root.
|
|
|
|
*/
|
2010-08-31 17:56:50 +02:00
|
|
|
static void note_tree_remove(struct notes_tree *t,
|
|
|
|
struct int_node *tree, unsigned char n,
|
|
|
|
struct leaf_node *entry)
|
2010-02-13 22:28:14 +01:00
|
|
|
{
|
|
|
|
struct leaf_node *l;
|
2019-02-19 01:05:01 +01:00
|
|
|
struct int_node *parent_stack[GIT_MAX_RAWSZ];
|
2010-02-13 22:28:14 +01:00
|
|
|
unsigned char i, j;
|
2017-05-30 19:30:37 +02:00
|
|
|
void **p = note_tree_search(t, &tree, &n, entry->key_oid.hash);
|
2010-02-13 22:28:14 +01:00
|
|
|
|
|
|
|
assert(GET_PTR_TYPE(entry) == 0); /* no type bits set */
|
|
|
|
if (GET_PTR_TYPE(*p) != PTR_TYPE_NOTE)
|
|
|
|
return; /* type mismatch, nothing to remove */
|
|
|
|
l = (struct leaf_node *) CLR_PTR_TYPE(*p);
|
2018-08-28 23:22:48 +02:00
|
|
|
if (!oideq(&l->key_oid, &entry->key_oid))
|
2010-02-13 22:28:14 +01:00
|
|
|
return; /* key mismatch, nothing to remove */
|
|
|
|
|
|
|
|
/* we have found a matching entry */
|
2017-05-30 19:30:37 +02:00
|
|
|
oidcpy(&entry->val_oid, &l->val_oid);
|
2010-02-13 22:28:14 +01:00
|
|
|
free(l);
|
|
|
|
*p = SET_PTR_TYPE(NULL, PTR_TYPE_NULL);
|
|
|
|
|
|
|
|
/* consolidate this tree level, and parent levels, if possible */
|
|
|
|
if (!n)
|
|
|
|
return; /* cannot consolidate top level */
|
|
|
|
/* first, build stack of ancestors between root and current node */
|
2010-02-13 22:28:18 +01:00
|
|
|
parent_stack[0] = t->root;
|
2010-02-13 22:28:14 +01:00
|
|
|
for (i = 0; i < n; i++) {
|
2017-05-30 19:30:37 +02:00
|
|
|
j = GET_NIBBLE(i, entry->key_oid.hash);
|
2010-02-13 22:28:14 +01:00
|
|
|
parent_stack[i + 1] = CLR_PTR_TYPE(parent_stack[i]->a[j]);
|
|
|
|
}
|
|
|
|
assert(i == n && parent_stack[i] == tree);
|
|
|
|
/* next, unwind stack until note_tree_consolidate() is done */
|
|
|
|
while (i > 0 &&
|
|
|
|
!note_tree_consolidate(parent_stack[i], parent_stack[i - 1],
|
2017-05-30 19:30:37 +02:00
|
|
|
GET_NIBBLE(i - 1, entry->key_oid.hash)))
|
2010-02-13 22:28:14 +01:00
|
|
|
i--;
|
|
|
|
}
|
|
|
|
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
/*
|
|
|
|
* To insert a leaf_node:
|
2009-10-09 12:22:09 +02:00
|
|
|
* Search to the tree location appropriate for the given leaf_node's key:
|
|
|
|
* - If location is unused (NULL), store the tweaked pointer directly there
|
|
|
|
* - If location holds a note entry that matches the note-to-be-inserted, then
|
Refactor notes concatenation into a flexible interface for combining notes
When adding a note to an object that already has an existing note, the
current solution is to concatenate the contents of the two notes. However,
the caller may instead wish to _overwrite_ the existing note with the new
note, or maybe even _ignore_ the new note, and keep the existing one. There
might also be other ways of combining notes that are only known to the
caller.
Therefore, instead of unconditionally concatenating notes, we let the caller
specify how to combine notes, by passing in a pointer to a function for
combining notes. The caller may choose to implement its own function for
notes combining, but normally one of the following three conveniently
supplied notes combination functions will be sufficient:
- combine_notes_concatenate() combines the two notes by appending the
contents of the new note to the contents of the existing note.
- combine_notes_overwrite() replaces the existing note with the new note.
- combine_notes_ignore() keeps the existing note, and ignores the new note.
A combine_notes function can be passed to init_notes() to choose a default
combine_notes function for that notes tree. If NULL is given, the notes tree
falls back to combine_notes_concatenate() as the ultimate default.
A combine_notes function can also be passed directly to add_note(), to
control the notes combining behaviour for a note addition in particular.
If NULL is passed, the combine_notes function registered for the given
notes tree is used.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:19 +01:00
|
|
|
* combine the two notes (by calling the given combine_notes function).
|
2009-10-09 12:22:09 +02:00
|
|
|
* - If location holds a note entry that matches the subtree-to-be-inserted,
|
|
|
|
* then unpack the subtree-to-be-inserted into the location.
|
|
|
|
* - If location holds a matching subtree entry, unpack the subtree at that
|
|
|
|
* location, and restart the insert operation from that level.
|
|
|
|
* - Else, create a new int_node, holding both the node-at-location and the
|
|
|
|
* node-to-be-inserted, and store the new int_node into the location.
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
*/
|
2010-11-15 00:52:26 +01:00
|
|
|
static int note_tree_insert(struct notes_tree *t, struct int_node *tree,
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
unsigned char n, struct leaf_node *entry, unsigned char type,
|
Refactor notes concatenation into a flexible interface for combining notes
When adding a note to an object that already has an existing note, the
current solution is to concatenate the contents of the two notes. However,
the caller may instead wish to _overwrite_ the existing note with the new
note, or maybe even _ignore_ the new note, and keep the existing one. There
might also be other ways of combining notes that are only known to the
caller.
Therefore, instead of unconditionally concatenating notes, we let the caller
specify how to combine notes, by passing in a pointer to a function for
combining notes. The caller may choose to implement its own function for
notes combining, but normally one of the following three conveniently
supplied notes combination functions will be sufficient:
- combine_notes_concatenate() combines the two notes by appending the
contents of the new note to the contents of the existing note.
- combine_notes_overwrite() replaces the existing note with the new note.
- combine_notes_ignore() keeps the existing note, and ignores the new note.
A combine_notes function can be passed to init_notes() to choose a default
combine_notes function for that notes tree. If NULL is given, the notes tree
falls back to combine_notes_concatenate() as the ultimate default.
A combine_notes function can also be passed directly to add_note(), to
control the notes combining behaviour for a note addition in particular.
If NULL is passed, the combine_notes function registered for the given
notes tree is used.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:19 +01:00
|
|
|
combine_notes_fn combine_notes)
|
2009-10-09 12:21:59 +02:00
|
|
|
{
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
struct int_node *new_node;
|
2009-10-09 12:22:09 +02:00
|
|
|
struct leaf_node *l;
|
2017-05-30 19:30:37 +02:00
|
|
|
void **p = note_tree_search(t, &tree, &n, entry->key_oid.hash);
|
2010-11-15 00:52:26 +01:00
|
|
|
int ret = 0;
|
2009-10-09 12:22:09 +02:00
|
|
|
|
|
|
|
assert(GET_PTR_TYPE(entry) == 0); /* no type bits set */
|
|
|
|
l = (struct leaf_node *) CLR_PTR_TYPE(*p);
|
2010-02-13 22:28:09 +01:00
|
|
|
switch (GET_PTR_TYPE(*p)) {
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
case PTR_TYPE_NULL:
|
2009-10-09 12:22:09 +02:00
|
|
|
assert(!*p);
|
2017-05-30 19:30:37 +02:00
|
|
|
if (is_null_oid(&entry->val_oid))
|
2010-11-09 22:49:41 +01:00
|
|
|
free(entry);
|
|
|
|
else
|
|
|
|
*p = SET_PTR_TYPE(entry, type);
|
2010-11-15 00:52:26 +01:00
|
|
|
return 0;
|
2009-10-09 12:22:09 +02:00
|
|
|
case PTR_TYPE_NOTE:
|
|
|
|
switch (type) {
|
|
|
|
case PTR_TYPE_NOTE:
|
convert "oidcmp() == 0" to oideq()
Using the more restrictive oideq() should, in the long run,
give the compiler more opportunities to optimize these
callsites. For now, this conversion should be a complete
noop with respect to the generated code.
The result is also perhaps a little more readable, as it
avoids the "zero is equal" idiom. Since it's so prevalent in
C, I think seasoned programmers tend not to even notice it
anymore, but it can sometimes make for awkward double
negations (e.g., we can drop a few !!oidcmp() instances
here).
This patch was generated almost entirely by the included
coccinelle patch. This mechanical conversion should be
completely safe, because we check explicitly for cases where
oidcmp() is compared to 0, which is what oideq() is doing
under the hood. Note that we don't have to catch "!oidcmp()"
separately; coccinelle's standard isomorphisms make sure the
two are treated equivalently.
I say "almost" because I did hand-edit the coccinelle output
to fix up a few style violations (it mostly keeps the
original formatting, but sometimes unwraps long lines).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-28 23:22:40 +02:00
|
|
|
if (oideq(&l->key_oid, &entry->key_oid)) {
|
2009-10-09 12:22:09 +02:00
|
|
|
/* skip concatenation if l == entry */
|
2019-08-25 07:18:18 +02:00
|
|
|
if (oideq(&l->val_oid, &entry->val_oid)) {
|
|
|
|
free(entry);
|
2010-11-15 00:52:26 +01:00
|
|
|
return 0;
|
2019-08-25 07:18:18 +02:00
|
|
|
}
|
2009-10-09 12:22:09 +02:00
|
|
|
|
2018-01-28 01:13:17 +01:00
|
|
|
ret = combine_notes(&l->val_oid,
|
|
|
|
&entry->val_oid);
|
2017-05-30 19:30:37 +02:00
|
|
|
if (!ret && is_null_oid(&l->val_oid))
|
2010-11-09 22:49:41 +01:00
|
|
|
note_tree_remove(t, tree, n, entry);
|
2009-10-09 12:22:09 +02:00
|
|
|
free(entry);
|
2010-11-15 00:52:26 +01:00
|
|
|
return ret;
|
2009-10-09 12:22:09 +02:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case PTR_TYPE_SUBTREE:
|
2017-05-30 19:30:37 +02:00
|
|
|
if (!SUBTREE_SHA1_PREFIXCMP(l->key_oid.hash,
|
|
|
|
entry->key_oid.hash)) {
|
2009-10-09 12:22:09 +02:00
|
|
|
/* unpack 'entry' */
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
load_subtree(t, entry, tree, n);
|
2009-10-09 12:22:09 +02:00
|
|
|
free(entry);
|
2010-11-15 00:52:26 +01:00
|
|
|
return 0;
|
2009-10-09 12:22:09 +02:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case PTR_TYPE_SUBTREE:
|
2017-05-30 19:30:37 +02:00
|
|
|
if (!SUBTREE_SHA1_PREFIXCMP(entry->key_oid.hash, l->key_oid.hash)) {
|
2009-10-09 12:22:09 +02:00
|
|
|
/* unpack 'l' and restart insert */
|
|
|
|
*p = NULL;
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
load_subtree(t, l, tree, n);
|
2009-10-09 12:22:09 +02:00
|
|
|
free(l);
|
2010-11-15 00:52:26 +01:00
|
|
|
return note_tree_insert(t, tree, n, entry, type,
|
|
|
|
combine_notes);
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
}
|
2009-10-09 12:22:09 +02:00
|
|
|
break;
|
2009-10-09 12:21:59 +02:00
|
|
|
}
|
2009-10-09 12:22:09 +02:00
|
|
|
|
|
|
|
/* non-matching leaf_node */
|
|
|
|
assert(GET_PTR_TYPE(*p) == PTR_TYPE_NOTE ||
|
|
|
|
GET_PTR_TYPE(*p) == PTR_TYPE_SUBTREE);
|
2017-05-30 19:30:37 +02:00
|
|
|
if (is_null_oid(&entry->val_oid)) { /* skip insertion of empty note */
|
2010-11-09 22:49:41 +01:00
|
|
|
free(entry);
|
2010-11-15 00:52:26 +01:00
|
|
|
return 0;
|
2010-11-09 22:49:41 +01:00
|
|
|
}
|
2014-05-26 17:33:52 +02:00
|
|
|
new_node = (struct int_node *) xcalloc(1, sizeof(struct int_node));
|
2010-11-15 00:52:26 +01:00
|
|
|
ret = note_tree_insert(t, new_node, n + 1, l, GET_PTR_TYPE(*p),
|
|
|
|
combine_notes);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2009-10-09 12:22:09 +02:00
|
|
|
*p = SET_PTR_TYPE(new_node, PTR_TYPE_INTERNAL);
|
2010-11-15 00:52:26 +01:00
|
|
|
return note_tree_insert(t, new_node, n + 1, entry, type, combine_notes);
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
}
|
2009-10-09 12:21:59 +02:00
|
|
|
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
/* Free the entire notes data contained in the given tree */
|
|
|
|
static void note_tree_free(struct int_node *tree)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < 16; i++) {
|
|
|
|
void *p = tree->a[i];
|
2010-02-13 22:28:09 +01:00
|
|
|
switch (GET_PTR_TYPE(p)) {
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
case PTR_TYPE_INTERNAL:
|
|
|
|
note_tree_free(CLR_PTR_TYPE(p));
|
|
|
|
/* fall through */
|
|
|
|
case PTR_TYPE_NOTE:
|
|
|
|
case PTR_TYPE_SUBTREE:
|
|
|
|
free(CLR_PTR_TYPE(p));
|
|
|
|
}
|
2009-10-09 12:21:59 +02:00
|
|
|
}
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
}
|
2009-10-09 12:21:59 +02:00
|
|
|
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
static int non_note_cmp(const struct non_note *a, const struct non_note *b)
|
|
|
|
{
|
|
|
|
return strcmp(a->path, b->path);
|
|
|
|
}
|
|
|
|
|
2015-08-19 20:12:41 +02:00
|
|
|
/* note: takes ownership of path string */
|
|
|
|
static void add_non_note(struct notes_tree *t, char *path,
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
unsigned int mode, const unsigned char *sha1)
|
|
|
|
{
|
|
|
|
struct non_note *p = t->prev_non_note, *n;
|
|
|
|
n = (struct non_note *) xmalloc(sizeof(struct non_note));
|
|
|
|
n->next = NULL;
|
2015-08-19 20:12:41 +02:00
|
|
|
n->path = path;
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
n->mode = mode;
|
2021-04-26 03:02:50 +02:00
|
|
|
oidread(&n->oid, sha1);
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
t->prev_non_note = n;
|
|
|
|
|
|
|
|
if (!t->first_non_note) {
|
|
|
|
t->first_non_note = n;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (non_note_cmp(p, n) < 0)
|
|
|
|
; /* do nothing */
|
|
|
|
else if (non_note_cmp(t->first_non_note, n) <= 0)
|
|
|
|
p = t->first_non_note;
|
|
|
|
else {
|
|
|
|
/* n sorts before t->first_non_note */
|
|
|
|
n->next = t->first_non_note;
|
|
|
|
t->first_non_note = n;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* n sorts equal or after p */
|
|
|
|
while (p->next && non_note_cmp(p->next, n) <= 0)
|
|
|
|
p = p->next;
|
|
|
|
|
|
|
|
if (non_note_cmp(p, n) == 0) { /* n ~= p; overwrite p with n */
|
|
|
|
assert(strcmp(p->path, n->path) == 0);
|
|
|
|
p->mode = n->mode;
|
2017-05-30 19:30:37 +02:00
|
|
|
oidcpy(&p->oid, &n->oid);
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
free(n);
|
|
|
|
t->prev_non_note = p;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* n sorts between p and p->next */
|
|
|
|
n->next = p->next;
|
|
|
|
p->next = n;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void load_subtree(struct notes_tree *t, struct leaf_node *subtree,
|
|
|
|
struct int_node *node, unsigned int n)
|
2009-10-09 12:21:59 +02:00
|
|
|
{
|
2017-05-30 19:30:38 +02:00
|
|
|
struct object_id object_oid;
|
2017-08-26 10:28:12 +02:00
|
|
|
size_t prefix_len;
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
void *buf;
|
2009-10-09 12:21:59 +02:00
|
|
|
struct tree_desc desc;
|
|
|
|
struct name_entry entry;
|
2019-02-19 01:05:01 +01:00
|
|
|
const unsigned hashsz = the_hash_algo->rawsz;
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
|
2019-06-27 11:28:48 +02:00
|
|
|
buf = fill_tree_descriptor(the_repository, &desc, &subtree->val_oid);
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
if (!buf)
|
|
|
|
die("Could not read %s for notes-index",
|
2017-05-30 19:30:37 +02:00
|
|
|
oid_to_hex(&subtree->val_oid));
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
|
2017-05-30 19:30:38 +02:00
|
|
|
prefix_len = subtree->key_oid.hash[KEY_INDEX];
|
2019-02-19 01:05:01 +01:00
|
|
|
if (prefix_len >= hashsz)
|
2017-09-08 18:10:10 +02:00
|
|
|
BUG("prefix_len (%"PRIuMAX") is out of range", (uintmax_t)prefix_len);
|
|
|
|
if (prefix_len * 2 < n)
|
|
|
|
BUG("prefix_len (%"PRIuMAX") is too small", (uintmax_t)prefix_len);
|
2017-05-30 19:30:38 +02:00
|
|
|
memcpy(object_oid.hash, subtree->key_oid.hash, prefix_len);
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
while (tree_entry(&desc, &entry)) {
|
2017-08-26 10:28:03 +02:00
|
|
|
unsigned char type;
|
|
|
|
struct leaf_node *l;
|
2017-08-26 10:28:12 +02:00
|
|
|
size_t path_len = strlen(entry.path);
|
2017-08-26 10:28:05 +02:00
|
|
|
|
2019-02-19 01:05:01 +01:00
|
|
|
if (path_len == 2 * (hashsz - prefix_len)) {
|
2017-08-26 10:28:05 +02:00
|
|
|
/* This is potentially the remainder of the SHA-1 */
|
2017-08-26 10:28:07 +02:00
|
|
|
|
|
|
|
if (!S_ISREG(entry.mode))
|
|
|
|
/* notes must be blobs */
|
|
|
|
goto handle_non_note;
|
|
|
|
|
2017-08-26 10:28:11 +02:00
|
|
|
if (hex_to_bytes(object_oid.hash + prefix_len, entry.path,
|
2019-02-19 01:05:01 +01:00
|
|
|
hashsz - prefix_len))
|
2017-08-26 10:28:05 +02:00
|
|
|
goto handle_non_note; /* entry.path is not a SHA1 */
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
type = PTR_TYPE_NOTE;
|
2017-08-26 10:28:05 +02:00
|
|
|
} else if (path_len == 2) {
|
|
|
|
/* This is potentially an internal node */
|
2017-08-26 10:28:10 +02:00
|
|
|
size_t len = prefix_len;
|
2017-08-26 10:28:06 +02:00
|
|
|
|
|
|
|
if (!S_ISDIR(entry.mode))
|
|
|
|
/* internal nodes must be trees */
|
|
|
|
goto handle_non_note;
|
|
|
|
|
2017-08-26 10:28:11 +02:00
|
|
|
if (hex_to_bytes(object_oid.hash + len++, entry.path, 1))
|
2017-08-26 10:28:05 +02:00
|
|
|
goto handle_non_note; /* entry.path is not a SHA1 */
|
2017-08-26 10:28:03 +02:00
|
|
|
|
2017-08-26 10:28:10 +02:00
|
|
|
/*
|
|
|
|
* Pad the rest of the SHA-1 with zeros,
|
|
|
|
* except for the last byte, where we write
|
|
|
|
* the length:
|
|
|
|
*/
|
2019-02-19 01:05:01 +01:00
|
|
|
memset(object_oid.hash + len, 0, hashsz - len - 1);
|
2017-08-26 10:28:10 +02:00
|
|
|
object_oid.hash[KEY_INDEX] = (unsigned char)len;
|
2017-08-26 10:28:09 +02:00
|
|
|
|
2017-08-26 10:28:02 +02:00
|
|
|
type = PTR_TYPE_SUBTREE;
|
2017-08-26 10:28:05 +02:00
|
|
|
} else {
|
|
|
|
/* This can't be part of a note */
|
|
|
|
goto handle_non_note;
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
}
|
2017-08-26 10:28:05 +02:00
|
|
|
|
2021-03-13 17:17:22 +01:00
|
|
|
CALLOC_ARRAY(l, 1);
|
2017-08-26 10:28:09 +02:00
|
|
|
oidcpy(&l->key_oid, &object_oid);
|
2019-01-15 01:39:44 +01:00
|
|
|
oidcpy(&l->val_oid, &entry.oid);
|
2021-04-26 03:02:55 +02:00
|
|
|
oid_set_algo(&l->key_oid, the_hash_algo);
|
|
|
|
oid_set_algo(&l->val_oid, the_hash_algo);
|
2017-08-26 10:28:02 +02:00
|
|
|
if (note_tree_insert(t, node, n, l, type,
|
|
|
|
combine_notes_concatenate))
|
|
|
|
die("Failed to load %s %s into notes tree "
|
|
|
|
"from %s",
|
|
|
|
type == PTR_TYPE_NOTE ? "note" : "subtree",
|
2019-08-25 09:19:51 +02:00
|
|
|
oid_to_hex(&object_oid), t->ref);
|
2017-08-26 10:28:02 +02:00
|
|
|
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
continue;
|
|
|
|
|
|
|
|
handle_non_note:
|
|
|
|
/*
|
2017-08-26 10:28:04 +02:00
|
|
|
* Determine full path for this non-note entry. The
|
|
|
|
* filename is already found in entry.path, but the
|
|
|
|
* directory part of the path must be deduced from the
|
|
|
|
* subtree containing this entry based on our
|
|
|
|
* knowledge that the overall notes tree follows a
|
|
|
|
* strict byte-based progressive fanout structure
|
|
|
|
* (i.e. using 2/38, 2/2/36, etc. fanouts).
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
*/
|
|
|
|
{
|
2015-08-19 20:12:41 +02:00
|
|
|
struct strbuf non_note_path = STRBUF_INIT;
|
2017-05-30 19:30:37 +02:00
|
|
|
const char *q = oid_to_hex(&subtree->key_oid);
|
2017-08-26 10:28:12 +02:00
|
|
|
size_t i;
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
for (i = 0; i < prefix_len; i++) {
|
2015-08-19 20:12:41 +02:00
|
|
|
strbuf_addch(&non_note_path, *q++);
|
|
|
|
strbuf_addch(&non_note_path, *q++);
|
|
|
|
strbuf_addch(&non_note_path, '/');
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
}
|
2015-08-19 20:12:41 +02:00
|
|
|
strbuf_addstr(&non_note_path, entry.path);
|
2021-04-26 03:02:55 +02:00
|
|
|
oid_set_algo(&entry.oid, the_hash_algo);
|
2015-08-19 20:12:41 +02:00
|
|
|
add_non_note(t, strbuf_detach(&non_note_path, NULL),
|
2019-01-15 01:39:44 +01:00
|
|
|
entry.mode, entry.oid.hash);
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
}
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
}
|
|
|
|
free(buf);
|
|
|
|
}
|
|
|
|
|
2010-02-13 22:28:16 +01:00
|
|
|
/*
|
|
|
|
* Determine optimal on-disk fanout for this part of the notes tree
|
|
|
|
*
|
|
|
|
* Given a (sub)tree and the level in the internal tree structure, determine
|
|
|
|
* whether or not the given existing fanout should be expanded for this
|
|
|
|
* (sub)tree.
|
|
|
|
*
|
|
|
|
* Values of the 'fanout' variable:
|
|
|
|
* - 0: No fanout (all notes are stored directly in the root notes tree)
|
|
|
|
* - 1: 2/38 fanout
|
|
|
|
* - 2: 2/2/36 fanout
|
|
|
|
* - 3: 2/2/2/34 fanout
|
|
|
|
* etc.
|
|
|
|
*/
|
|
|
|
static unsigned char determine_fanout(struct int_node *tree, unsigned char n,
|
|
|
|
unsigned char fanout)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The following is a simple heuristic that works well in practice:
|
|
|
|
* For each even-numbered 16-tree level (remember that each on-disk
|
|
|
|
* fanout level corresponds to _two_ 16-tree levels), peek at all 16
|
|
|
|
* entries at that tree level. If all of them are either int_nodes or
|
|
|
|
* subtree entries, then there are likely plenty of notes below this
|
|
|
|
* level, so we return an incremented fanout.
|
|
|
|
*/
|
|
|
|
unsigned int i;
|
|
|
|
if ((n % 2) || (n > 2 * fanout))
|
|
|
|
return fanout;
|
|
|
|
for (i = 0; i < 16; i++) {
|
|
|
|
switch (GET_PTR_TYPE(tree->a[i])) {
|
|
|
|
case PTR_TYPE_SUBTREE:
|
|
|
|
case PTR_TYPE_INTERNAL:
|
|
|
|
continue;
|
|
|
|
default:
|
|
|
|
return fanout;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return fanout + 1;
|
|
|
|
}
|
|
|
|
|
2019-02-19 01:05:01 +01:00
|
|
|
/* hex oid + '/' between each pair of hex digits + NUL */
|
|
|
|
#define FANOUT_PATH_MAX GIT_MAX_HEXSZ + FANOUT_PATH_SEPARATORS_MAX + 1
|
2015-09-24 23:08:24 +02:00
|
|
|
|
2019-02-19 01:05:02 +01:00
|
|
|
static void construct_path_with_fanout(const unsigned char *hash,
|
2010-02-13 22:28:16 +01:00
|
|
|
unsigned char fanout, char *path)
|
|
|
|
{
|
|
|
|
unsigned int i = 0, j = 0;
|
2019-02-19 01:05:02 +01:00
|
|
|
const char *hex_hash = hash_to_hex(hash);
|
2019-02-19 01:05:01 +01:00
|
|
|
assert(fanout < the_hash_algo->rawsz);
|
2010-02-13 22:28:16 +01:00
|
|
|
while (fanout) {
|
2019-02-19 01:05:02 +01:00
|
|
|
path[i++] = hex_hash[j++];
|
|
|
|
path[i++] = hex_hash[j++];
|
2010-02-13 22:28:16 +01:00
|
|
|
path[i++] = '/';
|
|
|
|
fanout--;
|
|
|
|
}
|
2019-02-19 01:05:02 +01:00
|
|
|
xsnprintf(path + i, FANOUT_PATH_MAX - i, "%s", hex_hash + j);
|
2010-02-13 22:28:16 +01:00
|
|
|
}
|
|
|
|
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
static int for_each_note_helper(struct notes_tree *t, struct int_node *tree,
|
|
|
|
unsigned char n, unsigned char fanout, int flags,
|
|
|
|
each_note_fn fn, void *cb_data)
|
2010-02-13 22:28:16 +01:00
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
void *p;
|
|
|
|
int ret = 0;
|
|
|
|
struct leaf_node *l;
|
2015-09-24 23:08:24 +02:00
|
|
|
static char path[FANOUT_PATH_MAX];
|
2010-02-13 22:28:16 +01:00
|
|
|
|
|
|
|
fanout = determine_fanout(tree, n, fanout);
|
|
|
|
for (i = 0; i < 16; i++) {
|
|
|
|
redo:
|
|
|
|
p = tree->a[i];
|
|
|
|
switch (GET_PTR_TYPE(p)) {
|
|
|
|
case PTR_TYPE_INTERNAL:
|
|
|
|
/* recurse into int_node */
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
ret = for_each_note_helper(t, CLR_PTR_TYPE(p), n + 1,
|
2010-02-13 22:28:16 +01:00
|
|
|
fanout, flags, fn, cb_data);
|
|
|
|
break;
|
|
|
|
case PTR_TYPE_SUBTREE:
|
|
|
|
l = (struct leaf_node *) CLR_PTR_TYPE(p);
|
|
|
|
/*
|
|
|
|
* Subtree entries in the note tree represent parts of
|
|
|
|
* the note tree that have not yet been explored. There
|
|
|
|
* is a direct relationship between subtree entries at
|
|
|
|
* level 'n' in the tree, and the 'fanout' variable:
|
notes.c: fix off-by-one error when decreasing notes fanout
As noted in the previous commit, the nature of the fanout heuristic
in the notes code causes the exact point at which we increase or
decrease the notes fanout to vary with the objects being annotated.
Since the object ids generated by the test environment are
deterministic (by design), the notes generated and tested by t3305
are always the same, and we therefore happen to see the same fanout
behavior from one run to the next.
Coincidentally, if we were to change the test environment slightly
(say by making a test commit on an unrelated branch before we start
the t3305 test proper), we not only see the fanout switch happen at
different points, we also manage to trigger a _bug_ in the notes
code where the fanout 1 -> 0 switch is not applied uniformly across
the notes tree, but instead yields a notes tree like this:
...
bdeafb301e44b0e4db0f738a2d2a7beefdb70b70
bff2d39b4f7122bd4c5caee3de353a774d1e632a
d3/8ec8f851adf470131178085bfbaab4b12ad2a7
e0b173960431a3e692ae929736df3c9b73a11d5b
eb3c3aede523d729990ac25c62a93eb47c21e2e3
...
The bug occurs when we are writing out a notes tree with a newly
decreased fanout, and the notes tree contains unexpanded subtrees
that should be consolidated into the parent tree as a consequence of
the decreased fanout):
Subtrees that happen to sit at an _even_ level in the internal notes
16-tree structure (in other words: subtrees whose path - "d3" in the
example above - is unique in the first nibble - i.e. there are no
other note paths that start with "d") are _not_ unpacked as part of
the tree writeout. This error will repeat itself in subsequent note
trees until the subtree is forced to be unpacked. In t3305 this only
happens when the d38ec8f8 note is itself removed from the tree.
The error is not severe (no information is lost, and the notes code
is able to read/decode this tree and manipulate it correctly), but
this is nonetheless a bug in the current implementation that should
be fixed.
That said, fixing the off-by-one error is not without complications:
We must take into account that the load_subtree() call from
for_each_note_helper() (that is now done to correctly unpack the
subtree while we're writing out the notes tree) may end up inserting
unpacked non-notes into the linked list of non_note entries held by
the struct notes_tree. Since we are in the process of writing out the
notes tree, this linked list is currently in the process of being
traversed by write_each_non_note_until(). The unpacked non-notes are
necessarily inserted between the last non-note we wrote out, and the
next non-note to be written. Hence, we cannot simply hold the
next_non_note to write in struct write_each_note_data (as we would
then silently skip these newly inserted notes), but must instead
always follow the ->next pointer from the last non-note we wrote.
(This part was caught by an existing test in t3304.)
Cc: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Cc: Brian M. Carlson <sandals@crustytoothpaste.net>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-03 22:04:45 +01:00
|
|
|
* Subtree entries at level 'n < 2 * fanout' should be
|
2010-02-13 22:28:16 +01:00
|
|
|
* preserved, since they correspond exactly to a fanout
|
|
|
|
* directory in the on-disk structure. However, subtree
|
notes.c: fix off-by-one error when decreasing notes fanout
As noted in the previous commit, the nature of the fanout heuristic
in the notes code causes the exact point at which we increase or
decrease the notes fanout to vary with the objects being annotated.
Since the object ids generated by the test environment are
deterministic (by design), the notes generated and tested by t3305
are always the same, and we therefore happen to see the same fanout
behavior from one run to the next.
Coincidentally, if we were to change the test environment slightly
(say by making a test commit on an unrelated branch before we start
the t3305 test proper), we not only see the fanout switch happen at
different points, we also manage to trigger a _bug_ in the notes
code where the fanout 1 -> 0 switch is not applied uniformly across
the notes tree, but instead yields a notes tree like this:
...
bdeafb301e44b0e4db0f738a2d2a7beefdb70b70
bff2d39b4f7122bd4c5caee3de353a774d1e632a
d3/8ec8f851adf470131178085bfbaab4b12ad2a7
e0b173960431a3e692ae929736df3c9b73a11d5b
eb3c3aede523d729990ac25c62a93eb47c21e2e3
...
The bug occurs when we are writing out a notes tree with a newly
decreased fanout, and the notes tree contains unexpanded subtrees
that should be consolidated into the parent tree as a consequence of
the decreased fanout):
Subtrees that happen to sit at an _even_ level in the internal notes
16-tree structure (in other words: subtrees whose path - "d3" in the
example above - is unique in the first nibble - i.e. there are no
other note paths that start with "d") are _not_ unpacked as part of
the tree writeout. This error will repeat itself in subsequent note
trees until the subtree is forced to be unpacked. In t3305 this only
happens when the d38ec8f8 note is itself removed from the tree.
The error is not severe (no information is lost, and the notes code
is able to read/decode this tree and manipulate it correctly), but
this is nonetheless a bug in the current implementation that should
be fixed.
That said, fixing the off-by-one error is not without complications:
We must take into account that the load_subtree() call from
for_each_note_helper() (that is now done to correctly unpack the
subtree while we're writing out the notes tree) may end up inserting
unpacked non-notes into the linked list of non_note entries held by
the struct notes_tree. Since we are in the process of writing out the
notes tree, this linked list is currently in the process of being
traversed by write_each_non_note_until(). The unpacked non-notes are
necessarily inserted between the last non-note we wrote out, and the
next non-note to be written. Hence, we cannot simply hold the
next_non_note to write in struct write_each_note_data (as we would
then silently skip these newly inserted notes), but must instead
always follow the ->next pointer from the last non-note we wrote.
(This part was caught by an existing test in t3304.)
Cc: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Cc: Brian M. Carlson <sandals@crustytoothpaste.net>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-03 22:04:45 +01:00
|
|
|
* entries at level 'n >= 2 * fanout' should NOT be
|
2010-02-13 22:28:16 +01:00
|
|
|
* preserved, but rather consolidated into the above
|
|
|
|
* notes tree level. We achieve this by unconditionally
|
|
|
|
* unpacking subtree entries that exist below the
|
|
|
|
* threshold level at 'n = 2 * fanout'.
|
|
|
|
*/
|
notes.c: fix off-by-one error when decreasing notes fanout
As noted in the previous commit, the nature of the fanout heuristic
in the notes code causes the exact point at which we increase or
decrease the notes fanout to vary with the objects being annotated.
Since the object ids generated by the test environment are
deterministic (by design), the notes generated and tested by t3305
are always the same, and we therefore happen to see the same fanout
behavior from one run to the next.
Coincidentally, if we were to change the test environment slightly
(say by making a test commit on an unrelated branch before we start
the t3305 test proper), we not only see the fanout switch happen at
different points, we also manage to trigger a _bug_ in the notes
code where the fanout 1 -> 0 switch is not applied uniformly across
the notes tree, but instead yields a notes tree like this:
...
bdeafb301e44b0e4db0f738a2d2a7beefdb70b70
bff2d39b4f7122bd4c5caee3de353a774d1e632a
d3/8ec8f851adf470131178085bfbaab4b12ad2a7
e0b173960431a3e692ae929736df3c9b73a11d5b
eb3c3aede523d729990ac25c62a93eb47c21e2e3
...
The bug occurs when we are writing out a notes tree with a newly
decreased fanout, and the notes tree contains unexpanded subtrees
that should be consolidated into the parent tree as a consequence of
the decreased fanout):
Subtrees that happen to sit at an _even_ level in the internal notes
16-tree structure (in other words: subtrees whose path - "d3" in the
example above - is unique in the first nibble - i.e. there are no
other note paths that start with "d") are _not_ unpacked as part of
the tree writeout. This error will repeat itself in subsequent note
trees until the subtree is forced to be unpacked. In t3305 this only
happens when the d38ec8f8 note is itself removed from the tree.
The error is not severe (no information is lost, and the notes code
is able to read/decode this tree and manipulate it correctly), but
this is nonetheless a bug in the current implementation that should
be fixed.
That said, fixing the off-by-one error is not without complications:
We must take into account that the load_subtree() call from
for_each_note_helper() (that is now done to correctly unpack the
subtree while we're writing out the notes tree) may end up inserting
unpacked non-notes into the linked list of non_note entries held by
the struct notes_tree. Since we are in the process of writing out the
notes tree, this linked list is currently in the process of being
traversed by write_each_non_note_until(). The unpacked non-notes are
necessarily inserted between the last non-note we wrote out, and the
next non-note to be written. Hence, we cannot simply hold the
next_non_note to write in struct write_each_note_data (as we would
then silently skip these newly inserted notes), but must instead
always follow the ->next pointer from the last non-note we wrote.
(This part was caught by an existing test in t3304.)
Cc: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Cc: Brian M. Carlson <sandals@crustytoothpaste.net>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-03 22:04:45 +01:00
|
|
|
if (n < 2 * fanout &&
|
2010-02-13 22:28:16 +01:00
|
|
|
flags & FOR_EACH_NOTE_YIELD_SUBTREES) {
|
|
|
|
/* invoke callback with subtree */
|
|
|
|
unsigned int path_len =
|
2017-05-30 19:30:38 +02:00
|
|
|
l->key_oid.hash[KEY_INDEX] * 2 + fanout;
|
2015-09-24 23:08:24 +02:00
|
|
|
assert(path_len < FANOUT_PATH_MAX - 1);
|
2017-05-30 19:30:37 +02:00
|
|
|
construct_path_with_fanout(l->key_oid.hash,
|
|
|
|
fanout,
|
2010-02-13 22:28:16 +01:00
|
|
|
path);
|
|
|
|
/* Create trailing slash, if needed */
|
|
|
|
if (path[path_len - 1] != '/')
|
|
|
|
path[path_len++] = '/';
|
|
|
|
path[path_len] = '\0';
|
2017-05-30 19:30:39 +02:00
|
|
|
ret = fn(&l->key_oid, &l->val_oid,
|
2017-05-30 19:30:37 +02:00
|
|
|
path,
|
2010-02-13 22:28:16 +01:00
|
|
|
cb_data);
|
|
|
|
}
|
notes.c: fix off-by-one error when decreasing notes fanout
As noted in the previous commit, the nature of the fanout heuristic
in the notes code causes the exact point at which we increase or
decrease the notes fanout to vary with the objects being annotated.
Since the object ids generated by the test environment are
deterministic (by design), the notes generated and tested by t3305
are always the same, and we therefore happen to see the same fanout
behavior from one run to the next.
Coincidentally, if we were to change the test environment slightly
(say by making a test commit on an unrelated branch before we start
the t3305 test proper), we not only see the fanout switch happen at
different points, we also manage to trigger a _bug_ in the notes
code where the fanout 1 -> 0 switch is not applied uniformly across
the notes tree, but instead yields a notes tree like this:
...
bdeafb301e44b0e4db0f738a2d2a7beefdb70b70
bff2d39b4f7122bd4c5caee3de353a774d1e632a
d3/8ec8f851adf470131178085bfbaab4b12ad2a7
e0b173960431a3e692ae929736df3c9b73a11d5b
eb3c3aede523d729990ac25c62a93eb47c21e2e3
...
The bug occurs when we are writing out a notes tree with a newly
decreased fanout, and the notes tree contains unexpanded subtrees
that should be consolidated into the parent tree as a consequence of
the decreased fanout):
Subtrees that happen to sit at an _even_ level in the internal notes
16-tree structure (in other words: subtrees whose path - "d3" in the
example above - is unique in the first nibble - i.e. there are no
other note paths that start with "d") are _not_ unpacked as part of
the tree writeout. This error will repeat itself in subsequent note
trees until the subtree is forced to be unpacked. In t3305 this only
happens when the d38ec8f8 note is itself removed from the tree.
The error is not severe (no information is lost, and the notes code
is able to read/decode this tree and manipulate it correctly), but
this is nonetheless a bug in the current implementation that should
be fixed.
That said, fixing the off-by-one error is not without complications:
We must take into account that the load_subtree() call from
for_each_note_helper() (that is now done to correctly unpack the
subtree while we're writing out the notes tree) may end up inserting
unpacked non-notes into the linked list of non_note entries held by
the struct notes_tree. Since we are in the process of writing out the
notes tree, this linked list is currently in the process of being
traversed by write_each_non_note_until(). The unpacked non-notes are
necessarily inserted between the last non-note we wrote out, and the
next non-note to be written. Hence, we cannot simply hold the
next_non_note to write in struct write_each_note_data (as we would
then silently skip these newly inserted notes), but must instead
always follow the ->next pointer from the last non-note we wrote.
(This part was caught by an existing test in t3304.)
Cc: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Cc: Brian M. Carlson <sandals@crustytoothpaste.net>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-03 22:04:45 +01:00
|
|
|
if (n >= 2 * fanout ||
|
2010-02-13 22:28:16 +01:00
|
|
|
!(flags & FOR_EACH_NOTE_DONT_UNPACK_SUBTREES)) {
|
|
|
|
/* unpack subtree and resume traversal */
|
|
|
|
tree->a[i] = NULL;
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
load_subtree(t, l, tree, n);
|
2010-02-13 22:28:16 +01:00
|
|
|
free(l);
|
|
|
|
goto redo;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case PTR_TYPE_NOTE:
|
|
|
|
l = (struct leaf_node *) CLR_PTR_TYPE(p);
|
2017-05-30 19:30:37 +02:00
|
|
|
construct_path_with_fanout(l->key_oid.hash, fanout,
|
|
|
|
path);
|
2017-05-30 19:30:39 +02:00
|
|
|
ret = fn(&l->key_oid, &l->val_oid, path,
|
2017-05-30 19:30:37 +02:00
|
|
|
cb_data);
|
2010-02-13 22:28:16 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-02-13 22:28:17 +01:00
|
|
|
struct tree_write_stack {
|
|
|
|
struct tree_write_stack *next;
|
|
|
|
struct strbuf buf;
|
|
|
|
char path[2]; /* path to subtree in next, if any */
|
|
|
|
};
|
|
|
|
|
|
|
|
static inline int matches_tree_write_stack(struct tree_write_stack *tws,
|
|
|
|
const char *full_path)
|
|
|
|
{
|
|
|
|
return full_path[0] == tws->path[0] &&
|
|
|
|
full_path[1] == tws->path[1] &&
|
|
|
|
full_path[2] == '/';
|
|
|
|
}
|
|
|
|
|
|
|
|
static void write_tree_entry(struct strbuf *buf, unsigned int mode,
|
|
|
|
const char *path, unsigned int path_len, const
|
2019-02-19 01:05:01 +01:00
|
|
|
unsigned char *hash)
|
2010-02-13 22:28:17 +01:00
|
|
|
{
|
2010-02-25 06:39:06 +01:00
|
|
|
strbuf_addf(buf, "%o %.*s%c", mode, path_len, path, '\0');
|
2019-02-19 01:05:01 +01:00
|
|
|
strbuf_add(buf, hash, the_hash_algo->rawsz);
|
2010-02-13 22:28:17 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void tree_write_stack_init_subtree(struct tree_write_stack *tws,
|
|
|
|
const char *path)
|
|
|
|
{
|
|
|
|
struct tree_write_stack *n;
|
|
|
|
assert(!tws->next);
|
|
|
|
assert(tws->path[0] == '\0' && tws->path[1] == '\0');
|
|
|
|
n = (struct tree_write_stack *)
|
|
|
|
xmalloc(sizeof(struct tree_write_stack));
|
|
|
|
n->next = NULL;
|
2019-02-19 01:05:01 +01:00
|
|
|
strbuf_init(&n->buf, 256 * (32 + the_hash_algo->hexsz)); /* assume 256 entries per tree */
|
2010-02-13 22:28:17 +01:00
|
|
|
n->path[0] = n->path[1] = '\0';
|
|
|
|
tws->next = n;
|
|
|
|
tws->path[0] = path[0];
|
|
|
|
tws->path[1] = path[1];
|
|
|
|
}
|
|
|
|
|
|
|
|
static int tree_write_stack_finish_subtree(struct tree_write_stack *tws)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
struct tree_write_stack *n = tws->next;
|
2017-05-30 19:30:38 +02:00
|
|
|
struct object_id s;
|
2010-02-13 22:28:17 +01:00
|
|
|
if (n) {
|
|
|
|
ret = tree_write_stack_finish_subtree(n);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2022-02-05 00:48:26 +01:00
|
|
|
ret = write_object_file(n->buf.buf, n->buf.len, OBJ_TREE, &s);
|
2010-02-13 22:28:17 +01:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
strbuf_release(&n->buf);
|
|
|
|
free(n);
|
|
|
|
tws->next = NULL;
|
2017-05-30 19:30:38 +02:00
|
|
|
write_tree_entry(&tws->buf, 040000, tws->path, 2, s.hash);
|
2010-02-13 22:28:17 +01:00
|
|
|
tws->path[0] = tws->path[1] = '\0';
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int write_each_note_helper(struct tree_write_stack *tws,
|
|
|
|
const char *path, unsigned int mode,
|
2017-05-30 19:30:39 +02:00
|
|
|
const struct object_id *oid)
|
2010-02-13 22:28:17 +01:00
|
|
|
{
|
|
|
|
size_t path_len = strlen(path);
|
|
|
|
unsigned int n = 0;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* Determine common part of tree write stack */
|
|
|
|
while (tws && 3 * n < path_len &&
|
|
|
|
matches_tree_write_stack(tws, path + 3 * n)) {
|
|
|
|
n++;
|
|
|
|
tws = tws->next;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* tws point to last matching tree_write_stack entry */
|
|
|
|
ret = tree_write_stack_finish_subtree(tws);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
/* Start subtrees needed to satisfy path */
|
|
|
|
while (3 * n + 2 < path_len && path[3 * n + 2] == '/') {
|
|
|
|
tree_write_stack_init_subtree(tws, path + 3 * n);
|
|
|
|
n++;
|
|
|
|
tws = tws->next;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* There should be no more directory components in the given path */
|
|
|
|
assert(memchr(path + 3 * n, '/', path_len - (3 * n)) == NULL);
|
|
|
|
|
|
|
|
/* Finally add given entry to the current tree object */
|
|
|
|
write_tree_entry(&tws->buf, mode, path + 3 * n, path_len - (3 * n),
|
2017-05-30 19:30:39 +02:00
|
|
|
oid->hash);
|
2010-02-13 22:28:17 +01:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct write_each_note_data {
|
|
|
|
struct tree_write_stack *root;
|
notes.c: fix off-by-one error when decreasing notes fanout
As noted in the previous commit, the nature of the fanout heuristic
in the notes code causes the exact point at which we increase or
decrease the notes fanout to vary with the objects being annotated.
Since the object ids generated by the test environment are
deterministic (by design), the notes generated and tested by t3305
are always the same, and we therefore happen to see the same fanout
behavior from one run to the next.
Coincidentally, if we were to change the test environment slightly
(say by making a test commit on an unrelated branch before we start
the t3305 test proper), we not only see the fanout switch happen at
different points, we also manage to trigger a _bug_ in the notes
code where the fanout 1 -> 0 switch is not applied uniformly across
the notes tree, but instead yields a notes tree like this:
...
bdeafb301e44b0e4db0f738a2d2a7beefdb70b70
bff2d39b4f7122bd4c5caee3de353a774d1e632a
d3/8ec8f851adf470131178085bfbaab4b12ad2a7
e0b173960431a3e692ae929736df3c9b73a11d5b
eb3c3aede523d729990ac25c62a93eb47c21e2e3
...
The bug occurs when we are writing out a notes tree with a newly
decreased fanout, and the notes tree contains unexpanded subtrees
that should be consolidated into the parent tree as a consequence of
the decreased fanout):
Subtrees that happen to sit at an _even_ level in the internal notes
16-tree structure (in other words: subtrees whose path - "d3" in the
example above - is unique in the first nibble - i.e. there are no
other note paths that start with "d") are _not_ unpacked as part of
the tree writeout. This error will repeat itself in subsequent note
trees until the subtree is forced to be unpacked. In t3305 this only
happens when the d38ec8f8 note is itself removed from the tree.
The error is not severe (no information is lost, and the notes code
is able to read/decode this tree and manipulate it correctly), but
this is nonetheless a bug in the current implementation that should
be fixed.
That said, fixing the off-by-one error is not without complications:
We must take into account that the load_subtree() call from
for_each_note_helper() (that is now done to correctly unpack the
subtree while we're writing out the notes tree) may end up inserting
unpacked non-notes into the linked list of non_note entries held by
the struct notes_tree. Since we are in the process of writing out the
notes tree, this linked list is currently in the process of being
traversed by write_each_non_note_until(). The unpacked non-notes are
necessarily inserted between the last non-note we wrote out, and the
next non-note to be written. Hence, we cannot simply hold the
next_non_note to write in struct write_each_note_data (as we would
then silently skip these newly inserted notes), but must instead
always follow the ->next pointer from the last non-note we wrote.
(This part was caught by an existing test in t3304.)
Cc: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Cc: Brian M. Carlson <sandals@crustytoothpaste.net>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-03 22:04:45 +01:00
|
|
|
struct non_note **nn_list;
|
|
|
|
struct non_note *nn_prev;
|
2010-02-13 22:28:17 +01:00
|
|
|
};
|
|
|
|
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
static int write_each_non_note_until(const char *note_path,
|
|
|
|
struct write_each_note_data *d)
|
|
|
|
{
|
notes.c: fix off-by-one error when decreasing notes fanout
As noted in the previous commit, the nature of the fanout heuristic
in the notes code causes the exact point at which we increase or
decrease the notes fanout to vary with the objects being annotated.
Since the object ids generated by the test environment are
deterministic (by design), the notes generated and tested by t3305
are always the same, and we therefore happen to see the same fanout
behavior from one run to the next.
Coincidentally, if we were to change the test environment slightly
(say by making a test commit on an unrelated branch before we start
the t3305 test proper), we not only see the fanout switch happen at
different points, we also manage to trigger a _bug_ in the notes
code where the fanout 1 -> 0 switch is not applied uniformly across
the notes tree, but instead yields a notes tree like this:
...
bdeafb301e44b0e4db0f738a2d2a7beefdb70b70
bff2d39b4f7122bd4c5caee3de353a774d1e632a
d3/8ec8f851adf470131178085bfbaab4b12ad2a7
e0b173960431a3e692ae929736df3c9b73a11d5b
eb3c3aede523d729990ac25c62a93eb47c21e2e3
...
The bug occurs when we are writing out a notes tree with a newly
decreased fanout, and the notes tree contains unexpanded subtrees
that should be consolidated into the parent tree as a consequence of
the decreased fanout):
Subtrees that happen to sit at an _even_ level in the internal notes
16-tree structure (in other words: subtrees whose path - "d3" in the
example above - is unique in the first nibble - i.e. there are no
other note paths that start with "d") are _not_ unpacked as part of
the tree writeout. This error will repeat itself in subsequent note
trees until the subtree is forced to be unpacked. In t3305 this only
happens when the d38ec8f8 note is itself removed from the tree.
The error is not severe (no information is lost, and the notes code
is able to read/decode this tree and manipulate it correctly), but
this is nonetheless a bug in the current implementation that should
be fixed.
That said, fixing the off-by-one error is not without complications:
We must take into account that the load_subtree() call from
for_each_note_helper() (that is now done to correctly unpack the
subtree while we're writing out the notes tree) may end up inserting
unpacked non-notes into the linked list of non_note entries held by
the struct notes_tree. Since we are in the process of writing out the
notes tree, this linked list is currently in the process of being
traversed by write_each_non_note_until(). The unpacked non-notes are
necessarily inserted between the last non-note we wrote out, and the
next non-note to be written. Hence, we cannot simply hold the
next_non_note to write in struct write_each_note_data (as we would
then silently skip these newly inserted notes), but must instead
always follow the ->next pointer from the last non-note we wrote.
(This part was caught by an existing test in t3304.)
Cc: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Cc: Brian M. Carlson <sandals@crustytoothpaste.net>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-03 22:04:45 +01:00
|
|
|
struct non_note *p = d->nn_prev;
|
|
|
|
struct non_note *n = p ? p->next : *d->nn_list;
|
2010-06-21 20:52:29 +02:00
|
|
|
int cmp = 0, ret;
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
while (n && (!note_path || (cmp = strcmp(n->path, note_path)) <= 0)) {
|
|
|
|
if (note_path && cmp == 0)
|
|
|
|
; /* do nothing, prefer note to non-note */
|
|
|
|
else {
|
|
|
|
ret = write_each_note_helper(d->root, n->path, n->mode,
|
2017-05-30 19:30:39 +02:00
|
|
|
&n->oid);
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
notes.c: fix off-by-one error when decreasing notes fanout
As noted in the previous commit, the nature of the fanout heuristic
in the notes code causes the exact point at which we increase or
decrease the notes fanout to vary with the objects being annotated.
Since the object ids generated by the test environment are
deterministic (by design), the notes generated and tested by t3305
are always the same, and we therefore happen to see the same fanout
behavior from one run to the next.
Coincidentally, if we were to change the test environment slightly
(say by making a test commit on an unrelated branch before we start
the t3305 test proper), we not only see the fanout switch happen at
different points, we also manage to trigger a _bug_ in the notes
code where the fanout 1 -> 0 switch is not applied uniformly across
the notes tree, but instead yields a notes tree like this:
...
bdeafb301e44b0e4db0f738a2d2a7beefdb70b70
bff2d39b4f7122bd4c5caee3de353a774d1e632a
d3/8ec8f851adf470131178085bfbaab4b12ad2a7
e0b173960431a3e692ae929736df3c9b73a11d5b
eb3c3aede523d729990ac25c62a93eb47c21e2e3
...
The bug occurs when we are writing out a notes tree with a newly
decreased fanout, and the notes tree contains unexpanded subtrees
that should be consolidated into the parent tree as a consequence of
the decreased fanout):
Subtrees that happen to sit at an _even_ level in the internal notes
16-tree structure (in other words: subtrees whose path - "d3" in the
example above - is unique in the first nibble - i.e. there are no
other note paths that start with "d") are _not_ unpacked as part of
the tree writeout. This error will repeat itself in subsequent note
trees until the subtree is forced to be unpacked. In t3305 this only
happens when the d38ec8f8 note is itself removed from the tree.
The error is not severe (no information is lost, and the notes code
is able to read/decode this tree and manipulate it correctly), but
this is nonetheless a bug in the current implementation that should
be fixed.
That said, fixing the off-by-one error is not without complications:
We must take into account that the load_subtree() call from
for_each_note_helper() (that is now done to correctly unpack the
subtree while we're writing out the notes tree) may end up inserting
unpacked non-notes into the linked list of non_note entries held by
the struct notes_tree. Since we are in the process of writing out the
notes tree, this linked list is currently in the process of being
traversed by write_each_non_note_until(). The unpacked non-notes are
necessarily inserted between the last non-note we wrote out, and the
next non-note to be written. Hence, we cannot simply hold the
next_non_note to write in struct write_each_note_data (as we would
then silently skip these newly inserted notes), but must instead
always follow the ->next pointer from the last non-note we wrote.
(This part was caught by an existing test in t3304.)
Cc: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Cc: Brian M. Carlson <sandals@crustytoothpaste.net>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-03 22:04:45 +01:00
|
|
|
p = n;
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
n = n->next;
|
|
|
|
}
|
notes.c: fix off-by-one error when decreasing notes fanout
As noted in the previous commit, the nature of the fanout heuristic
in the notes code causes the exact point at which we increase or
decrease the notes fanout to vary with the objects being annotated.
Since the object ids generated by the test environment are
deterministic (by design), the notes generated and tested by t3305
are always the same, and we therefore happen to see the same fanout
behavior from one run to the next.
Coincidentally, if we were to change the test environment slightly
(say by making a test commit on an unrelated branch before we start
the t3305 test proper), we not only see the fanout switch happen at
different points, we also manage to trigger a _bug_ in the notes
code where the fanout 1 -> 0 switch is not applied uniformly across
the notes tree, but instead yields a notes tree like this:
...
bdeafb301e44b0e4db0f738a2d2a7beefdb70b70
bff2d39b4f7122bd4c5caee3de353a774d1e632a
d3/8ec8f851adf470131178085bfbaab4b12ad2a7
e0b173960431a3e692ae929736df3c9b73a11d5b
eb3c3aede523d729990ac25c62a93eb47c21e2e3
...
The bug occurs when we are writing out a notes tree with a newly
decreased fanout, and the notes tree contains unexpanded subtrees
that should be consolidated into the parent tree as a consequence of
the decreased fanout):
Subtrees that happen to sit at an _even_ level in the internal notes
16-tree structure (in other words: subtrees whose path - "d3" in the
example above - is unique in the first nibble - i.e. there are no
other note paths that start with "d") are _not_ unpacked as part of
the tree writeout. This error will repeat itself in subsequent note
trees until the subtree is forced to be unpacked. In t3305 this only
happens when the d38ec8f8 note is itself removed from the tree.
The error is not severe (no information is lost, and the notes code
is able to read/decode this tree and manipulate it correctly), but
this is nonetheless a bug in the current implementation that should
be fixed.
That said, fixing the off-by-one error is not without complications:
We must take into account that the load_subtree() call from
for_each_note_helper() (that is now done to correctly unpack the
subtree while we're writing out the notes tree) may end up inserting
unpacked non-notes into the linked list of non_note entries held by
the struct notes_tree. Since we are in the process of writing out the
notes tree, this linked list is currently in the process of being
traversed by write_each_non_note_until(). The unpacked non-notes are
necessarily inserted between the last non-note we wrote out, and the
next non-note to be written. Hence, we cannot simply hold the
next_non_note to write in struct write_each_note_data (as we would
then silently skip these newly inserted notes), but must instead
always follow the ->next pointer from the last non-note we wrote.
(This part was caught by an existing test in t3304.)
Cc: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Cc: Brian M. Carlson <sandals@crustytoothpaste.net>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-03 22:04:45 +01:00
|
|
|
d->nn_prev = p;
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-02-24 07:39:31 +01:00
|
|
|
static int write_each_note(const struct object_id *object_oid UNUSED,
|
2017-05-30 19:30:39 +02:00
|
|
|
const struct object_id *note_oid, char *note_path,
|
2010-02-13 22:28:17 +01:00
|
|
|
void *cb_data)
|
|
|
|
{
|
|
|
|
struct write_each_note_data *d =
|
|
|
|
(struct write_each_note_data *) cb_data;
|
|
|
|
size_t note_path_len = strlen(note_path);
|
|
|
|
unsigned int mode = 0100644;
|
|
|
|
|
|
|
|
if (note_path[note_path_len - 1] == '/') {
|
|
|
|
/* subtree entry */
|
|
|
|
note_path_len--;
|
|
|
|
note_path[note_path_len] = '\0';
|
|
|
|
mode = 040000;
|
|
|
|
}
|
2019-02-19 01:05:01 +01:00
|
|
|
assert(note_path_len <= GIT_MAX_HEXSZ + FANOUT_PATH_SEPARATORS);
|
2010-02-13 22:28:17 +01:00
|
|
|
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
/* Weave non-note entries into note entries */
|
|
|
|
return write_each_non_note_until(note_path, d) ||
|
2017-05-30 19:30:39 +02:00
|
|
|
write_each_note_helper(d->root, note_path, mode, note_oid);
|
2010-02-13 22:28:17 +01:00
|
|
|
}
|
|
|
|
|
2010-02-13 22:28:27 +01:00
|
|
|
struct note_delete_list {
|
|
|
|
struct note_delete_list *next;
|
|
|
|
const unsigned char *sha1;
|
|
|
|
};
|
|
|
|
|
2017-05-30 19:30:39 +02:00
|
|
|
static int prune_notes_helper(const struct object_id *object_oid,
|
2023-02-24 07:39:31 +01:00
|
|
|
const struct object_id *note_oid UNUSED,
|
|
|
|
char *note_path UNUSED,
|
|
|
|
void *cb_data)
|
2010-02-13 22:28:27 +01:00
|
|
|
{
|
|
|
|
struct note_delete_list **l = (struct note_delete_list **) cb_data;
|
|
|
|
struct note_delete_list *n;
|
|
|
|
|
2023-03-28 15:58:50 +02:00
|
|
|
if (repo_has_object_file(the_repository, object_oid))
|
2010-02-13 22:28:27 +01:00
|
|
|
return 0; /* nothing to do for this note */
|
|
|
|
|
|
|
|
/* failed to find object => prune this note */
|
|
|
|
n = (struct note_delete_list *) xmalloc(sizeof(*n));
|
|
|
|
n->next = *l;
|
2017-05-30 19:30:39 +02:00
|
|
|
n->sha1 = object_oid->hash;
|
2010-02-13 22:28:27 +01:00
|
|
|
*l = n;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-01-28 01:13:17 +01:00
|
|
|
int combine_notes_concatenate(struct object_id *cur_oid,
|
|
|
|
const struct object_id *new_oid)
|
Refactor notes concatenation into a flexible interface for combining notes
When adding a note to an object that already has an existing note, the
current solution is to concatenate the contents of the two notes. However,
the caller may instead wish to _overwrite_ the existing note with the new
note, or maybe even _ignore_ the new note, and keep the existing one. There
might also be other ways of combining notes that are only known to the
caller.
Therefore, instead of unconditionally concatenating notes, we let the caller
specify how to combine notes, by passing in a pointer to a function for
combining notes. The caller may choose to implement its own function for
notes combining, but normally one of the following three conveniently
supplied notes combination functions will be sufficient:
- combine_notes_concatenate() combines the two notes by appending the
contents of the new note to the contents of the existing note.
- combine_notes_overwrite() replaces the existing note with the new note.
- combine_notes_ignore() keeps the existing note, and ignores the new note.
A combine_notes function can be passed to init_notes() to choose a default
combine_notes function for that notes tree. If NULL is given, the notes tree
falls back to combine_notes_concatenate() as the ultimate default.
A combine_notes function can also be passed directly to add_note(), to
control the notes combining behaviour for a note addition in particular.
If NULL is passed, the combine_notes function registered for the given
notes tree is used.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:19 +01:00
|
|
|
{
|
|
|
|
char *cur_msg = NULL, *new_msg = NULL, *buf;
|
|
|
|
unsigned long cur_len, new_len, buf_len;
|
|
|
|
enum object_type cur_type, new_type;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* read in both note blob objects */
|
2018-01-28 01:13:17 +01:00
|
|
|
if (!is_null_oid(new_oid))
|
2023-03-28 15:58:50 +02:00
|
|
|
new_msg = repo_read_object_file(the_repository, new_oid,
|
|
|
|
&new_type, &new_len);
|
Refactor notes concatenation into a flexible interface for combining notes
When adding a note to an object that already has an existing note, the
current solution is to concatenate the contents of the two notes. However,
the caller may instead wish to _overwrite_ the existing note with the new
note, or maybe even _ignore_ the new note, and keep the existing one. There
might also be other ways of combining notes that are only known to the
caller.
Therefore, instead of unconditionally concatenating notes, we let the caller
specify how to combine notes, by passing in a pointer to a function for
combining notes. The caller may choose to implement its own function for
notes combining, but normally one of the following three conveniently
supplied notes combination functions will be sufficient:
- combine_notes_concatenate() combines the two notes by appending the
contents of the new note to the contents of the existing note.
- combine_notes_overwrite() replaces the existing note with the new note.
- combine_notes_ignore() keeps the existing note, and ignores the new note.
A combine_notes function can be passed to init_notes() to choose a default
combine_notes function for that notes tree. If NULL is given, the notes tree
falls back to combine_notes_concatenate() as the ultimate default.
A combine_notes function can also be passed directly to add_note(), to
control the notes combining behaviour for a note addition in particular.
If NULL is passed, the combine_notes function registered for the given
notes tree is used.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:19 +01:00
|
|
|
if (!new_msg || !new_len || new_type != OBJ_BLOB) {
|
|
|
|
free(new_msg);
|
|
|
|
return 0;
|
|
|
|
}
|
2018-01-28 01:13:17 +01:00
|
|
|
if (!is_null_oid(cur_oid))
|
2023-03-28 15:58:50 +02:00
|
|
|
cur_msg = repo_read_object_file(the_repository, cur_oid,
|
|
|
|
&cur_type, &cur_len);
|
Refactor notes concatenation into a flexible interface for combining notes
When adding a note to an object that already has an existing note, the
current solution is to concatenate the contents of the two notes. However,
the caller may instead wish to _overwrite_ the existing note with the new
note, or maybe even _ignore_ the new note, and keep the existing one. There
might also be other ways of combining notes that are only known to the
caller.
Therefore, instead of unconditionally concatenating notes, we let the caller
specify how to combine notes, by passing in a pointer to a function for
combining notes. The caller may choose to implement its own function for
notes combining, but normally one of the following three conveniently
supplied notes combination functions will be sufficient:
- combine_notes_concatenate() combines the two notes by appending the
contents of the new note to the contents of the existing note.
- combine_notes_overwrite() replaces the existing note with the new note.
- combine_notes_ignore() keeps the existing note, and ignores the new note.
A combine_notes function can be passed to init_notes() to choose a default
combine_notes function for that notes tree. If NULL is given, the notes tree
falls back to combine_notes_concatenate() as the ultimate default.
A combine_notes function can also be passed directly to add_note(), to
control the notes combining behaviour for a note addition in particular.
If NULL is passed, the combine_notes function registered for the given
notes tree is used.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:19 +01:00
|
|
|
if (!cur_msg || !cur_len || cur_type != OBJ_BLOB) {
|
|
|
|
free(cur_msg);
|
|
|
|
free(new_msg);
|
2018-01-28 01:13:17 +01:00
|
|
|
oidcpy(cur_oid, new_oid);
|
Refactor notes concatenation into a flexible interface for combining notes
When adding a note to an object that already has an existing note, the
current solution is to concatenate the contents of the two notes. However,
the caller may instead wish to _overwrite_ the existing note with the new
note, or maybe even _ignore_ the new note, and keep the existing one. There
might also be other ways of combining notes that are only known to the
caller.
Therefore, instead of unconditionally concatenating notes, we let the caller
specify how to combine notes, by passing in a pointer to a function for
combining notes. The caller may choose to implement its own function for
notes combining, but normally one of the following three conveniently
supplied notes combination functions will be sufficient:
- combine_notes_concatenate() combines the two notes by appending the
contents of the new note to the contents of the existing note.
- combine_notes_overwrite() replaces the existing note with the new note.
- combine_notes_ignore() keeps the existing note, and ignores the new note.
A combine_notes function can be passed to init_notes() to choose a default
combine_notes function for that notes tree. If NULL is given, the notes tree
falls back to combine_notes_concatenate() as the ultimate default.
A combine_notes function can also be passed directly to add_note(), to
control the notes combining behaviour for a note addition in particular.
If NULL is passed, the combine_notes function registered for the given
notes tree is used.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:19 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-11-09 22:49:44 +01:00
|
|
|
/* we will separate the notes by two newlines anyway */
|
Refactor notes concatenation into a flexible interface for combining notes
When adding a note to an object that already has an existing note, the
current solution is to concatenate the contents of the two notes. However,
the caller may instead wish to _overwrite_ the existing note with the new
note, or maybe even _ignore_ the new note, and keep the existing one. There
might also be other ways of combining notes that are only known to the
caller.
Therefore, instead of unconditionally concatenating notes, we let the caller
specify how to combine notes, by passing in a pointer to a function for
combining notes. The caller may choose to implement its own function for
notes combining, but normally one of the following three conveniently
supplied notes combination functions will be sufficient:
- combine_notes_concatenate() combines the two notes by appending the
contents of the new note to the contents of the existing note.
- combine_notes_overwrite() replaces the existing note with the new note.
- combine_notes_ignore() keeps the existing note, and ignores the new note.
A combine_notes function can be passed to init_notes() to choose a default
combine_notes function for that notes tree. If NULL is given, the notes tree
falls back to combine_notes_concatenate() as the ultimate default.
A combine_notes function can also be passed directly to add_note(), to
control the notes combining behaviour for a note addition in particular.
If NULL is passed, the combine_notes function registered for the given
notes tree is used.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:19 +01:00
|
|
|
if (cur_msg[cur_len - 1] == '\n')
|
|
|
|
cur_len--;
|
|
|
|
|
|
|
|
/* concatenate cur_msg and new_msg into buf */
|
2010-11-09 22:49:44 +01:00
|
|
|
buf_len = cur_len + 2 + new_len;
|
Refactor notes concatenation into a flexible interface for combining notes
When adding a note to an object that already has an existing note, the
current solution is to concatenate the contents of the two notes. However,
the caller may instead wish to _overwrite_ the existing note with the new
note, or maybe even _ignore_ the new note, and keep the existing one. There
might also be other ways of combining notes that are only known to the
caller.
Therefore, instead of unconditionally concatenating notes, we let the caller
specify how to combine notes, by passing in a pointer to a function for
combining notes. The caller may choose to implement its own function for
notes combining, but normally one of the following three conveniently
supplied notes combination functions will be sufficient:
- combine_notes_concatenate() combines the two notes by appending the
contents of the new note to the contents of the existing note.
- combine_notes_overwrite() replaces the existing note with the new note.
- combine_notes_ignore() keeps the existing note, and ignores the new note.
A combine_notes function can be passed to init_notes() to choose a default
combine_notes function for that notes tree. If NULL is given, the notes tree
falls back to combine_notes_concatenate() as the ultimate default.
A combine_notes function can also be passed directly to add_note(), to
control the notes combining behaviour for a note addition in particular.
If NULL is passed, the combine_notes function registered for the given
notes tree is used.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:19 +01:00
|
|
|
buf = (char *) xmalloc(buf_len);
|
|
|
|
memcpy(buf, cur_msg, cur_len);
|
|
|
|
buf[cur_len] = '\n';
|
2010-11-09 22:49:44 +01:00
|
|
|
buf[cur_len + 1] = '\n';
|
|
|
|
memcpy(buf + cur_len + 2, new_msg, new_len);
|
Refactor notes concatenation into a flexible interface for combining notes
When adding a note to an object that already has an existing note, the
current solution is to concatenate the contents of the two notes. However,
the caller may instead wish to _overwrite_ the existing note with the new
note, or maybe even _ignore_ the new note, and keep the existing one. There
might also be other ways of combining notes that are only known to the
caller.
Therefore, instead of unconditionally concatenating notes, we let the caller
specify how to combine notes, by passing in a pointer to a function for
combining notes. The caller may choose to implement its own function for
notes combining, but normally one of the following three conveniently
supplied notes combination functions will be sufficient:
- combine_notes_concatenate() combines the two notes by appending the
contents of the new note to the contents of the existing note.
- combine_notes_overwrite() replaces the existing note with the new note.
- combine_notes_ignore() keeps the existing note, and ignores the new note.
A combine_notes function can be passed to init_notes() to choose a default
combine_notes function for that notes tree. If NULL is given, the notes tree
falls back to combine_notes_concatenate() as the ultimate default.
A combine_notes function can also be passed directly to add_note(), to
control the notes combining behaviour for a note addition in particular.
If NULL is passed, the combine_notes function registered for the given
notes tree is used.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:19 +01:00
|
|
|
free(cur_msg);
|
|
|
|
free(new_msg);
|
|
|
|
|
|
|
|
/* create a new blob object from buf */
|
2022-02-05 00:48:26 +01:00
|
|
|
ret = write_object_file(buf, buf_len, OBJ_BLOB, cur_oid);
|
Refactor notes concatenation into a flexible interface for combining notes
When adding a note to an object that already has an existing note, the
current solution is to concatenate the contents of the two notes. However,
the caller may instead wish to _overwrite_ the existing note with the new
note, or maybe even _ignore_ the new note, and keep the existing one. There
might also be other ways of combining notes that are only known to the
caller.
Therefore, instead of unconditionally concatenating notes, we let the caller
specify how to combine notes, by passing in a pointer to a function for
combining notes. The caller may choose to implement its own function for
notes combining, but normally one of the following three conveniently
supplied notes combination functions will be sufficient:
- combine_notes_concatenate() combines the two notes by appending the
contents of the new note to the contents of the existing note.
- combine_notes_overwrite() replaces the existing note with the new note.
- combine_notes_ignore() keeps the existing note, and ignores the new note.
A combine_notes function can be passed to init_notes() to choose a default
combine_notes function for that notes tree. If NULL is given, the notes tree
falls back to combine_notes_concatenate() as the ultimate default.
A combine_notes function can also be passed directly to add_note(), to
control the notes combining behaviour for a note addition in particular.
If NULL is passed, the combine_notes function registered for the given
notes tree is used.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:19 +01:00
|
|
|
free(buf);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-01-28 01:13:17 +01:00
|
|
|
int combine_notes_overwrite(struct object_id *cur_oid,
|
|
|
|
const struct object_id *new_oid)
|
Refactor notes concatenation into a flexible interface for combining notes
When adding a note to an object that already has an existing note, the
current solution is to concatenate the contents of the two notes. However,
the caller may instead wish to _overwrite_ the existing note with the new
note, or maybe even _ignore_ the new note, and keep the existing one. There
might also be other ways of combining notes that are only known to the
caller.
Therefore, instead of unconditionally concatenating notes, we let the caller
specify how to combine notes, by passing in a pointer to a function for
combining notes. The caller may choose to implement its own function for
notes combining, but normally one of the following three conveniently
supplied notes combination functions will be sufficient:
- combine_notes_concatenate() combines the two notes by appending the
contents of the new note to the contents of the existing note.
- combine_notes_overwrite() replaces the existing note with the new note.
- combine_notes_ignore() keeps the existing note, and ignores the new note.
A combine_notes function can be passed to init_notes() to choose a default
combine_notes function for that notes tree. If NULL is given, the notes tree
falls back to combine_notes_concatenate() as the ultimate default.
A combine_notes function can also be passed directly to add_note(), to
control the notes combining behaviour for a note addition in particular.
If NULL is passed, the combine_notes function registered for the given
notes tree is used.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:19 +01:00
|
|
|
{
|
2018-01-28 01:13:17 +01:00
|
|
|
oidcpy(cur_oid, new_oid);
|
Refactor notes concatenation into a flexible interface for combining notes
When adding a note to an object that already has an existing note, the
current solution is to concatenate the contents of the two notes. However,
the caller may instead wish to _overwrite_ the existing note with the new
note, or maybe even _ignore_ the new note, and keep the existing one. There
might also be other ways of combining notes that are only known to the
caller.
Therefore, instead of unconditionally concatenating notes, we let the caller
specify how to combine notes, by passing in a pointer to a function for
combining notes. The caller may choose to implement its own function for
notes combining, but normally one of the following three conveniently
supplied notes combination functions will be sufficient:
- combine_notes_concatenate() combines the two notes by appending the
contents of the new note to the contents of the existing note.
- combine_notes_overwrite() replaces the existing note with the new note.
- combine_notes_ignore() keeps the existing note, and ignores the new note.
A combine_notes function can be passed to init_notes() to choose a default
combine_notes function for that notes tree. If NULL is given, the notes tree
falls back to combine_notes_concatenate() as the ultimate default.
A combine_notes function can also be passed directly to add_note(), to
control the notes combining behaviour for a note addition in particular.
If NULL is passed, the combine_notes function registered for the given
notes tree is used.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:19 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-02-24 07:39:31 +01:00
|
|
|
int combine_notes_ignore(struct object_id *cur_oid UNUSED,
|
|
|
|
const struct object_id *new_oid UNUSED)
|
Refactor notes concatenation into a flexible interface for combining notes
When adding a note to an object that already has an existing note, the
current solution is to concatenate the contents of the two notes. However,
the caller may instead wish to _overwrite_ the existing note with the new
note, or maybe even _ignore_ the new note, and keep the existing one. There
might also be other ways of combining notes that are only known to the
caller.
Therefore, instead of unconditionally concatenating notes, we let the caller
specify how to combine notes, by passing in a pointer to a function for
combining notes. The caller may choose to implement its own function for
notes combining, but normally one of the following three conveniently
supplied notes combination functions will be sufficient:
- combine_notes_concatenate() combines the two notes by appending the
contents of the new note to the contents of the existing note.
- combine_notes_overwrite() replaces the existing note with the new note.
- combine_notes_ignore() keeps the existing note, and ignores the new note.
A combine_notes function can be passed to init_notes() to choose a default
combine_notes function for that notes tree. If NULL is given, the notes tree
falls back to combine_notes_concatenate() as the ultimate default.
A combine_notes function can also be passed directly to add_note(), to
control the notes combining behaviour for a note addition in particular.
If NULL is passed, the combine_notes function registered for the given
notes tree is used.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:19 +01:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-11-04 08:07:08 +01:00
|
|
|
/*
|
|
|
|
* Add the lines from the named object to list, with trailing
|
|
|
|
* newlines removed.
|
|
|
|
*/
|
|
|
|
static int string_list_add_note_lines(struct string_list *list,
|
2018-01-28 01:13:17 +01:00
|
|
|
const struct object_id *oid)
|
2010-11-15 00:57:17 +01:00
|
|
|
{
|
|
|
|
char *data;
|
|
|
|
unsigned long len;
|
|
|
|
enum object_type t;
|
|
|
|
|
2018-01-28 01:13:17 +01:00
|
|
|
if (is_null_oid(oid))
|
2010-11-15 00:57:17 +01:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* read_sha1_file NUL-terminates */
|
2023-03-28 15:58:50 +02:00
|
|
|
data = repo_read_object_file(the_repository, oid, &t, &len);
|
2010-11-15 00:57:17 +01:00
|
|
|
if (t != OBJ_BLOB || !data || !len) {
|
|
|
|
free(data);
|
|
|
|
return t != OBJ_BLOB || !data;
|
|
|
|
}
|
|
|
|
|
2012-11-04 08:07:08 +01:00
|
|
|
/*
|
|
|
|
* If the last line of the file is EOL-terminated, this will
|
|
|
|
* add an empty string to the list. But it will be removed
|
|
|
|
* later, along with any empty strings that came from empty
|
|
|
|
* lines within the file.
|
|
|
|
*/
|
|
|
|
string_list_split(list, data, '\n', -1);
|
|
|
|
free(data);
|
2010-11-15 00:57:17 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int string_list_join_lines_helper(struct string_list_item *item,
|
|
|
|
void *cb_data)
|
|
|
|
{
|
|
|
|
struct strbuf *buf = cb_data;
|
|
|
|
strbuf_addstr(buf, item->string);
|
|
|
|
strbuf_addch(buf, '\n');
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-01-28 01:13:17 +01:00
|
|
|
int combine_notes_cat_sort_uniq(struct object_id *cur_oid,
|
|
|
|
const struct object_id *new_oid)
|
2010-11-15 00:57:17 +01:00
|
|
|
{
|
2012-11-04 08:07:07 +01:00
|
|
|
struct string_list sort_uniq_list = STRING_LIST_INIT_DUP;
|
2010-11-15 00:57:17 +01:00
|
|
|
struct strbuf buf = STRBUF_INIT;
|
|
|
|
int ret = 1;
|
|
|
|
|
|
|
|
/* read both note blob objects into unique_lines */
|
2018-01-28 01:13:17 +01:00
|
|
|
if (string_list_add_note_lines(&sort_uniq_list, cur_oid))
|
2010-11-15 00:57:17 +01:00
|
|
|
goto out;
|
2018-01-28 01:13:17 +01:00
|
|
|
if (string_list_add_note_lines(&sort_uniq_list, new_oid))
|
2010-11-15 00:57:17 +01:00
|
|
|
goto out;
|
2012-11-04 08:07:08 +01:00
|
|
|
string_list_remove_empty_items(&sort_uniq_list, 0);
|
2014-11-25 09:02:35 +01:00
|
|
|
string_list_sort(&sort_uniq_list);
|
2012-11-04 08:07:08 +01:00
|
|
|
string_list_remove_duplicates(&sort_uniq_list, 0);
|
2010-11-15 00:57:17 +01:00
|
|
|
|
|
|
|
/* create a new blob object from sort_uniq_list */
|
|
|
|
if (for_each_string_list(&sort_uniq_list,
|
|
|
|
string_list_join_lines_helper, &buf))
|
|
|
|
goto out;
|
|
|
|
|
2022-02-05 00:48:26 +01:00
|
|
|
ret = write_object_file(buf.buf, buf.len, OBJ_BLOB, cur_oid);
|
2010-11-15 00:57:17 +01:00
|
|
|
|
|
|
|
out:
|
|
|
|
strbuf_release(&buf);
|
|
|
|
string_list_clear(&sort_uniq_list, 0);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2022-08-19 12:08:32 +02:00
|
|
|
static int string_list_add_one_ref(const char *refname,
|
2022-08-25 19:09:48 +02:00
|
|
|
const struct object_id *oid UNUSED,
|
|
|
|
int flag UNUSED, void *cb)
|
2010-03-12 18:04:26 +01:00
|
|
|
{
|
|
|
|
struct string_list *refs = cb;
|
2013-05-25 11:08:20 +02:00
|
|
|
if (!unsorted_string_list_has_string(refs, refname))
|
|
|
|
string_list_append(refs, refname);
|
2010-03-12 18:04:26 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-05-25 11:08:21 +02:00
|
|
|
/*
|
|
|
|
* The list argument must have strdup_strings set on it.
|
|
|
|
*/
|
2010-03-12 18:04:26 +01:00
|
|
|
void string_list_add_refs_by_glob(struct string_list *list, const char *glob)
|
|
|
|
{
|
2013-05-25 11:08:21 +02:00
|
|
|
assert(list->strdup_strings);
|
2010-03-12 18:04:26 +01:00
|
|
|
if (has_glob_specials(glob)) {
|
2015-05-25 20:38:59 +02:00
|
|
|
for_each_glob_ref(string_list_add_one_ref, glob, list);
|
2010-03-12 18:04:26 +01:00
|
|
|
} else {
|
2017-05-30 19:30:38 +02:00
|
|
|
struct object_id oid;
|
2023-03-28 15:58:46 +02:00
|
|
|
if (repo_get_oid(the_repository, glob, &oid))
|
2010-03-12 18:04:26 +01:00
|
|
|
warning("notes ref %s is invalid", glob);
|
|
|
|
if (!unsorted_string_list_has_string(list, glob))
|
2010-06-26 01:41:38 +02:00
|
|
|
string_list_append(list, glob);
|
2010-03-12 18:04:26 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void string_list_add_refs_from_colon_sep(struct string_list *list,
|
|
|
|
const char *globs)
|
|
|
|
{
|
2012-11-04 08:07:10 +01:00
|
|
|
struct string_list split = STRING_LIST_INIT_NODUP;
|
|
|
|
char *globs_copy = xstrdup(globs);
|
2010-03-12 18:04:26 +01:00
|
|
|
int i;
|
|
|
|
|
2012-11-04 08:07:10 +01:00
|
|
|
string_list_split_in_place(&split, globs_copy, ':', -1);
|
|
|
|
string_list_remove_empty_items(&split, 0);
|
2010-03-12 18:04:26 +01:00
|
|
|
|
2012-11-04 08:07:10 +01:00
|
|
|
for (i = 0; i < split.nr; i++)
|
|
|
|
string_list_add_refs_by_glob(list, split.items[i].string);
|
2010-03-12 18:04:26 +01:00
|
|
|
|
2012-11-04 08:07:10 +01:00
|
|
|
string_list_clear(&split, 0);
|
|
|
|
free(globs_copy);
|
2010-03-12 18:04:26 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static int notes_display_config(const char *k, const char *v, void *cb)
|
|
|
|
{
|
|
|
|
int *load_refs = cb;
|
|
|
|
|
|
|
|
if (*load_refs && !strcmp(k, "notes.displayref")) {
|
|
|
|
if (!v)
|
2020-11-23 04:23:41 +01:00
|
|
|
return config_error_nonbool(k);
|
2010-03-12 18:04:26 +01:00
|
|
|
string_list_add_refs_by_glob(&display_notes_refs, v);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-11-09 22:49:39 +01:00
|
|
|
const char *default_notes_ref(void)
|
2010-03-12 18:04:26 +01:00
|
|
|
{
|
|
|
|
const char *notes_ref = NULL;
|
|
|
|
if (!notes_ref)
|
|
|
|
notes_ref = getenv(GIT_NOTES_REF_ENVIRONMENT);
|
|
|
|
if (!notes_ref)
|
|
|
|
notes_ref = notes_ref_name; /* value of core.notesRef config */
|
|
|
|
if (!notes_ref)
|
|
|
|
notes_ref = GIT_NOTES_DEFAULT_REF;
|
|
|
|
return notes_ref;
|
|
|
|
}
|
|
|
|
|
Refactor notes concatenation into a flexible interface for combining notes
When adding a note to an object that already has an existing note, the
current solution is to concatenate the contents of the two notes. However,
the caller may instead wish to _overwrite_ the existing note with the new
note, or maybe even _ignore_ the new note, and keep the existing one. There
might also be other ways of combining notes that are only known to the
caller.
Therefore, instead of unconditionally concatenating notes, we let the caller
specify how to combine notes, by passing in a pointer to a function for
combining notes. The caller may choose to implement its own function for
notes combining, but normally one of the following three conveniently
supplied notes combination functions will be sufficient:
- combine_notes_concatenate() combines the two notes by appending the
contents of the new note to the contents of the existing note.
- combine_notes_overwrite() replaces the existing note with the new note.
- combine_notes_ignore() keeps the existing note, and ignores the new note.
A combine_notes function can be passed to init_notes() to choose a default
combine_notes function for that notes tree. If NULL is given, the notes tree
falls back to combine_notes_concatenate() as the ultimate default.
A combine_notes function can also be passed directly to add_note(), to
control the notes combining behaviour for a note addition in particular.
If NULL is passed, the combine_notes function registered for the given
notes tree is used.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:19 +01:00
|
|
|
void init_notes(struct notes_tree *t, const char *notes_ref,
|
|
|
|
combine_notes_fn combine_notes, int flags)
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
{
|
2016-09-05 22:08:05 +02:00
|
|
|
struct object_id oid, object_oid;
|
2019-04-05 17:00:12 +02:00
|
|
|
unsigned short mode;
|
Teach the notes lookup code to parse notes trees with various fanout schemes
The semantics used when parsing notes trees (with regards to fanout subtrees)
follow Dscho's proposal fairly closely:
- No concatenation/merging of notes is performed. If there are several notes
objects referencing a given commit, only one of those objects are used.
- If a notes object for a given commit is present in the "root" notes tree,
no subtrees are consulted; the object in the root tree is used directly.
- If there are more than one subtree that prefix-matches the given commit,
only the subtree with the longest matching prefix is consulted. This
means that if the given commit is e.g. "deadbeef", and the notes tree have
subtrees "de" and "dead", then the following paths in the notes tree are
searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched.
- Fanout directories (subtrees) must references a whole number of bytes
from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are
acceptable; "d" and "dea" are not.
- Multiple levels of fanout are allowed. All the above rules apply
recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc.
This patch changes the in-memory datastructure for holding parsed notes:
Instead of holding all note (and subtree) entries in a hash table, a
simple 16-tree structure is used instead. The tree structure consists of
16-arrays as internal nodes, and note/subtree entries as leaf nodes. The
tree is traversed by indexing subsequent nibbles of the search key until
a leaf node is encountered. If a subtree entry is encountered while
searching for a note, the subtree is unpacked into the 16-tree structure,
and the search continues into that subtree.
The new algorithm performs significantly better in the cases where only
a fraction of the notes need to be looked up (this is assumed to be the
common case for notes lookup). The new code even performs marginally
better in the worst case (where _all_ the notes are looked up).
In addition to this, comes the massive performance win associated with
organizing the notes tree according to some fanout scheme. Even a simple
2/38 fanout scheme is dramatically quicker to traverse (going from tens of
seconds to sub-second runtimes).
As for memory usage, the new code is marginally better than the old code in
the worst case, but in the case of looking up only some notes from a notes
tree with proper fanout, the new code uses only a small fraction of the
memory needed to hold the entire notes tree.
However, there is one casualty of this patch. The old notes lookup code was
able to parse notes that were associated with non-SHA1s (e.g. refs). The new
code requires the referenced object to be named by a SHA1 sum. Still, this
is not considered a major setback, since the notes infrastructure was not
originally intended to annotate objects outside the Git object database.
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-09 12:22:07 +02:00
|
|
|
struct leaf_node root_tree;
|
2009-10-09 12:21:59 +02:00
|
|
|
|
2010-02-13 22:28:18 +01:00
|
|
|
if (!t)
|
|
|
|
t = &default_notes_tree;
|
|
|
|
assert(!t->initialized);
|
2010-02-13 22:28:12 +01:00
|
|
|
|
|
|
|
if (!notes_ref)
|
2010-03-12 18:04:26 +01:00
|
|
|
notes_ref = default_notes_ref();
|
2022-08-05 19:58:36 +02:00
|
|
|
update_ref_namespace(NAMESPACE_NOTES, xstrdup(notes_ref));
|
2010-02-13 22:28:12 +01:00
|
|
|
|
Refactor notes concatenation into a flexible interface for combining notes
When adding a note to an object that already has an existing note, the
current solution is to concatenate the contents of the two notes. However,
the caller may instead wish to _overwrite_ the existing note with the new
note, or maybe even _ignore_ the new note, and keep the existing one. There
might also be other ways of combining notes that are only known to the
caller.
Therefore, instead of unconditionally concatenating notes, we let the caller
specify how to combine notes, by passing in a pointer to a function for
combining notes. The caller may choose to implement its own function for
notes combining, but normally one of the following three conveniently
supplied notes combination functions will be sufficient:
- combine_notes_concatenate() combines the two notes by appending the
contents of the new note to the contents of the existing note.
- combine_notes_overwrite() replaces the existing note with the new note.
- combine_notes_ignore() keeps the existing note, and ignores the new note.
A combine_notes function can be passed to init_notes() to choose a default
combine_notes function for that notes tree. If NULL is given, the notes tree
falls back to combine_notes_concatenate() as the ultimate default.
A combine_notes function can also be passed directly to add_note(), to
control the notes combining behaviour for a note addition in particular.
If NULL is passed, the combine_notes function registered for the given
notes tree is used.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:19 +01:00
|
|
|
if (!combine_notes)
|
|
|
|
combine_notes = combine_notes_concatenate;
|
|
|
|
|
2014-05-26 17:33:52 +02:00
|
|
|
t->root = (struct int_node *) xcalloc(1, sizeof(struct int_node));
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
t->first_non_note = NULL;
|
|
|
|
t->prev_non_note = NULL;
|
2015-01-13 02:59:09 +01:00
|
|
|
t->ref = xstrdup_or_null(notes_ref);
|
2015-10-08 04:54:43 +02:00
|
|
|
t->update_ref = (flags & NOTES_INIT_WRITABLE) ? t->ref : NULL;
|
Refactor notes concatenation into a flexible interface for combining notes
When adding a note to an object that already has an existing note, the
current solution is to concatenate the contents of the two notes. However,
the caller may instead wish to _overwrite_ the existing note with the new
note, or maybe even _ignore_ the new note, and keep the existing one. There
might also be other ways of combining notes that are only known to the
caller.
Therefore, instead of unconditionally concatenating notes, we let the caller
specify how to combine notes, by passing in a pointer to a function for
combining notes. The caller may choose to implement its own function for
notes combining, but normally one of the following three conveniently
supplied notes combination functions will be sufficient:
- combine_notes_concatenate() combines the two notes by appending the
contents of the new note to the contents of the existing note.
- combine_notes_overwrite() replaces the existing note with the new note.
- combine_notes_ignore() keeps the existing note, and ignores the new note.
A combine_notes function can be passed to init_notes() to choose a default
combine_notes function for that notes tree. If NULL is given, the notes tree
falls back to combine_notes_concatenate() as the ultimate default.
A combine_notes function can also be passed directly to add_note(), to
control the notes combining behaviour for a note addition in particular.
If NULL is passed, the combine_notes function registered for the given
notes tree is used.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:19 +01:00
|
|
|
t->combine_notes = combine_notes;
|
2010-02-13 22:28:18 +01:00
|
|
|
t->initialized = 1;
|
2010-03-12 18:04:36 +01:00
|
|
|
t->dirty = 0;
|
2010-02-13 22:28:18 +01:00
|
|
|
|
2010-02-13 22:28:12 +01:00
|
|
|
if (flags & NOTES_INIT_EMPTY || !notes_ref ||
|
2023-03-28 15:58:46 +02:00
|
|
|
repo_get_oid_treeish(the_repository, notes_ref, &object_oid))
|
2009-10-09 12:21:59 +02:00
|
|
|
return;
|
2017-10-16 00:06:56 +02:00
|
|
|
if (flags & NOTES_INIT_WRITABLE && read_ref(notes_ref, &object_oid))
|
2015-10-08 04:54:43 +02:00
|
|
|
die("Cannot use notes ref %s", notes_ref);
|
2019-06-27 11:28:49 +02:00
|
|
|
if (get_tree_entry(the_repository, &object_oid, "", &oid, &mode))
|
2010-02-13 22:28:12 +01:00
|
|
|
die("Failed to read notes tree referenced by %s (%s)",
|
2016-09-05 22:08:05 +02:00
|
|
|
notes_ref, oid_to_hex(&object_oid));
|
2009-10-09 12:21:59 +02:00
|
|
|
|
2017-05-30 19:30:37 +02:00
|
|
|
oidclr(&root_tree.key_oid);
|
|
|
|
oidcpy(&root_tree.val_oid, &oid);
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
load_subtree(t, &root_tree, t->root, 0);
|
2009-10-09 12:21:59 +02:00
|
|
|
}
|
|
|
|
|
2015-10-08 04:54:43 +02:00
|
|
|
struct notes_tree **load_notes_trees(struct string_list *refs, int flags)
|
2010-03-12 18:04:26 +01:00
|
|
|
{
|
2010-07-03 14:41:54 +02:00
|
|
|
struct string_list_item *item;
|
|
|
|
int counter = 0;
|
2010-03-12 18:04:26 +01:00
|
|
|
struct notes_tree **trees;
|
2016-02-22 23:44:25 +01:00
|
|
|
ALLOC_ARRAY(trees, refs->nr + 1);
|
2010-07-03 14:41:54 +02:00
|
|
|
for_each_string_list_item(item, refs) {
|
|
|
|
struct notes_tree *t = xcalloc(1, sizeof(struct notes_tree));
|
2015-10-08 04:54:43 +02:00
|
|
|
init_notes(t, item->string, combine_notes_ignore, flags);
|
2010-07-03 14:41:54 +02:00
|
|
|
trees[counter++] = t;
|
|
|
|
}
|
|
|
|
trees[counter] = NULL;
|
2010-03-12 18:04:26 +01:00
|
|
|
return trees;
|
|
|
|
}
|
|
|
|
|
|
|
|
void init_display_notes(struct display_notes_opt *opt)
|
2019-12-09 14:10:41 +01:00
|
|
|
{
|
|
|
|
memset(opt, 0, sizeof(*opt));
|
|
|
|
opt->use_default_notes = -1;
|
|
|
|
}
|
|
|
|
|
notes: break set_display_notes() into smaller functions
In 8164c961e1 (format-patch: use --notes behavior for format.notes,
2019-12-09), we introduced set_display_notes() which was a monolithic
function with three mutually exclusive branches. Break the function up
into three small and simple functions that each are only responsible for
one task.
This family of functions accepts an `int *show_notes` instead of
returning a value suitable for assignment to `show_notes`. This is for
two reasons. First of all, this guarantees that the external
`show_notes` variable changes in lockstep with the
`struct display_notes_opt`. Second, this prompts future developers to be
careful about doing something meaningful with this value. In fact, a
NULL check is intentionally omitted because causing a segfault here
would tell the future developer that they are meant to use the value for
something meaningful.
One alternative was making the family of functions accept a
`struct rev_info *` instead of the `struct display_notes_opt *`, since
the former contains the `show_notes` field as well. This does not work
because we have to call git_config() before repo_init_revisions().
However, if we had a `struct rev_info`, we'd need to initialize it before
it gets assigned values from git_config(). As a result, we break the
circular dependency by having standalone `int show_notes` and
`struct display_notes_opt notes_opt` variables which temporarily hold
values from git_config() until the values are copied over to `rev`.
To implement this change, we need to get a pointer to
`rev_info::show_notes`. Unfortunately, this is not possible with
bitfields and only direct-assignment is possible. Change
`rev_info::show_notes` to a non-bitfield int so that we can get its
address.
Signed-off-by: Denton Liu <liu.denton@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-12-12 01:49:50 +01:00
|
|
|
void enable_default_display_notes(struct display_notes_opt *opt, int *show_notes)
|
2019-12-09 14:10:44 +01:00
|
|
|
{
|
notes: break set_display_notes() into smaller functions
In 8164c961e1 (format-patch: use --notes behavior for format.notes,
2019-12-09), we introduced set_display_notes() which was a monolithic
function with three mutually exclusive branches. Break the function up
into three small and simple functions that each are only responsible for
one task.
This family of functions accepts an `int *show_notes` instead of
returning a value suitable for assignment to `show_notes`. This is for
two reasons. First of all, this guarantees that the external
`show_notes` variable changes in lockstep with the
`struct display_notes_opt`. Second, this prompts future developers to be
careful about doing something meaningful with this value. In fact, a
NULL check is intentionally omitted because causing a segfault here
would tell the future developer that they are meant to use the value for
something meaningful.
One alternative was making the family of functions accept a
`struct rev_info *` instead of the `struct display_notes_opt *`, since
the former contains the `show_notes` field as well. This does not work
because we have to call git_config() before repo_init_revisions().
However, if we had a `struct rev_info`, we'd need to initialize it before
it gets assigned values from git_config(). As a result, we break the
circular dependency by having standalone `int show_notes` and
`struct display_notes_opt notes_opt` variables which temporarily hold
values from git_config() until the values are copied over to `rev`.
To implement this change, we need to get a pointer to
`rev_info::show_notes`. Unfortunately, this is not possible with
bitfields and only direct-assignment is possible. Change
`rev_info::show_notes` to a non-bitfield int so that we can get its
address.
Signed-off-by: Denton Liu <liu.denton@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-12-12 01:49:50 +01:00
|
|
|
opt->use_default_notes = 1;
|
|
|
|
*show_notes = 1;
|
|
|
|
}
|
2019-12-09 14:10:44 +01:00
|
|
|
|
notes: break set_display_notes() into smaller functions
In 8164c961e1 (format-patch: use --notes behavior for format.notes,
2019-12-09), we introduced set_display_notes() which was a monolithic
function with three mutually exclusive branches. Break the function up
into three small and simple functions that each are only responsible for
one task.
This family of functions accepts an `int *show_notes` instead of
returning a value suitable for assignment to `show_notes`. This is for
two reasons. First of all, this guarantees that the external
`show_notes` variable changes in lockstep with the
`struct display_notes_opt`. Second, this prompts future developers to be
careful about doing something meaningful with this value. In fact, a
NULL check is intentionally omitted because causing a segfault here
would tell the future developer that they are meant to use the value for
something meaningful.
One alternative was making the family of functions accept a
`struct rev_info *` instead of the `struct display_notes_opt *`, since
the former contains the `show_notes` field as well. This does not work
because we have to call git_config() before repo_init_revisions().
However, if we had a `struct rev_info`, we'd need to initialize it before
it gets assigned values from git_config(). As a result, we break the
circular dependency by having standalone `int show_notes` and
`struct display_notes_opt notes_opt` variables which temporarily hold
values from git_config() until the values are copied over to `rev`.
To implement this change, we need to get a pointer to
`rev_info::show_notes`. Unfortunately, this is not possible with
bitfields and only direct-assignment is possible. Change
`rev_info::show_notes` to a non-bitfield int so that we can get its
address.
Signed-off-by: Denton Liu <liu.denton@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-12-12 01:49:50 +01:00
|
|
|
void enable_ref_display_notes(struct display_notes_opt *opt, int *show_notes,
|
|
|
|
const char *ref) {
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
|
|
|
strbuf_addstr(&buf, ref);
|
|
|
|
expand_notes_ref(&buf);
|
|
|
|
string_list_append(&opt->extra_notes_refs,
|
|
|
|
strbuf_detach(&buf, NULL));
|
|
|
|
*show_notes = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
void disable_display_notes(struct display_notes_opt *opt, int *show_notes)
|
|
|
|
{
|
|
|
|
opt->use_default_notes = -1;
|
|
|
|
/* we have been strdup'ing ourselves, so trick
|
|
|
|
* string_list into free()ing strings */
|
|
|
|
opt->extra_notes_refs.strdup_strings = 1;
|
|
|
|
string_list_clear(&opt->extra_notes_refs, 0);
|
|
|
|
opt->extra_notes_refs.strdup_strings = 0;
|
|
|
|
*show_notes = 0;
|
2019-12-09 14:10:44 +01:00
|
|
|
}
|
|
|
|
|
2019-12-09 14:10:39 +01:00
|
|
|
void load_display_notes(struct display_notes_opt *opt)
|
2010-03-12 18:04:26 +01:00
|
|
|
{
|
|
|
|
char *display_ref_env;
|
|
|
|
int load_config_refs = 0;
|
|
|
|
display_notes_refs.strdup_strings = 1;
|
|
|
|
|
|
|
|
assert(!display_notes_trees);
|
|
|
|
|
2011-03-29 22:57:27 +02:00
|
|
|
if (!opt || opt->use_default_notes > 0 ||
|
|
|
|
(opt->use_default_notes == -1 && !opt->extra_notes_refs.nr)) {
|
2010-06-26 01:41:38 +02:00
|
|
|
string_list_append(&display_notes_refs, default_notes_ref());
|
2010-03-12 18:04:26 +01:00
|
|
|
display_ref_env = getenv(GIT_NOTES_DISPLAY_REF_ENVIRONMENT);
|
|
|
|
if (display_ref_env) {
|
|
|
|
string_list_add_refs_from_colon_sep(&display_notes_refs,
|
|
|
|
display_ref_env);
|
|
|
|
load_config_refs = 0;
|
|
|
|
} else
|
|
|
|
load_config_refs = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
git_config(notes_display_config, &load_config_refs);
|
|
|
|
|
2011-03-29 22:56:53 +02:00
|
|
|
if (opt) {
|
2010-07-03 14:41:54 +02:00
|
|
|
struct string_list_item *item;
|
2011-03-29 22:56:53 +02:00
|
|
|
for_each_string_list_item(item, &opt->extra_notes_refs)
|
2010-07-03 14:41:54 +02:00
|
|
|
string_list_add_refs_by_glob(&display_notes_refs,
|
|
|
|
item->string);
|
|
|
|
}
|
2010-03-12 18:04:26 +01:00
|
|
|
|
2015-10-08 04:54:43 +02:00
|
|
|
display_notes_trees = load_notes_trees(&display_notes_refs, 0);
|
2010-03-12 18:04:26 +01:00
|
|
|
string_list_clear(&display_notes_refs, 0);
|
|
|
|
}
|
|
|
|
|
2017-05-30 19:30:43 +02:00
|
|
|
int add_note(struct notes_tree *t, const struct object_id *object_oid,
|
|
|
|
const struct object_id *note_oid, combine_notes_fn combine_notes)
|
2010-02-13 22:28:13 +01:00
|
|
|
{
|
|
|
|
struct leaf_node *l;
|
|
|
|
|
2010-02-13 22:28:18 +01:00
|
|
|
if (!t)
|
|
|
|
t = &default_notes_tree;
|
|
|
|
assert(t->initialized);
|
2010-03-12 18:04:36 +01:00
|
|
|
t->dirty = 1;
|
Refactor notes concatenation into a flexible interface for combining notes
When adding a note to an object that already has an existing note, the
current solution is to concatenate the contents of the two notes. However,
the caller may instead wish to _overwrite_ the existing note with the new
note, or maybe even _ignore_ the new note, and keep the existing one. There
might also be other ways of combining notes that are only known to the
caller.
Therefore, instead of unconditionally concatenating notes, we let the caller
specify how to combine notes, by passing in a pointer to a function for
combining notes. The caller may choose to implement its own function for
notes combining, but normally one of the following three conveniently
supplied notes combination functions will be sufficient:
- combine_notes_concatenate() combines the two notes by appending the
contents of the new note to the contents of the existing note.
- combine_notes_overwrite() replaces the existing note with the new note.
- combine_notes_ignore() keeps the existing note, and ignores the new note.
A combine_notes function can be passed to init_notes() to choose a default
combine_notes function for that notes tree. If NULL is given, the notes tree
falls back to combine_notes_concatenate() as the ultimate default.
A combine_notes function can also be passed directly to add_note(), to
control the notes combining behaviour for a note addition in particular.
If NULL is passed, the combine_notes function registered for the given
notes tree is used.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:19 +01:00
|
|
|
if (!combine_notes)
|
|
|
|
combine_notes = t->combine_notes;
|
2010-02-13 22:28:13 +01:00
|
|
|
l = (struct leaf_node *) xmalloc(sizeof(struct leaf_node));
|
2017-05-30 19:30:43 +02:00
|
|
|
oidcpy(&l->key_oid, object_oid);
|
|
|
|
oidcpy(&l->val_oid, note_oid);
|
2010-11-15 00:52:26 +01:00
|
|
|
return note_tree_insert(t, t->root, 0, l, PTR_TYPE_NOTE, combine_notes);
|
2010-02-13 22:28:13 +01:00
|
|
|
}
|
|
|
|
|
2010-08-31 17:56:50 +02:00
|
|
|
int remove_note(struct notes_tree *t, const unsigned char *object_sha1)
|
2010-02-13 22:28:14 +01:00
|
|
|
{
|
|
|
|
struct leaf_node l;
|
|
|
|
|
2010-02-13 22:28:18 +01:00
|
|
|
if (!t)
|
|
|
|
t = &default_notes_tree;
|
|
|
|
assert(t->initialized);
|
2021-04-26 03:02:50 +02:00
|
|
|
oidread(&l.key_oid, object_sha1);
|
2017-05-30 19:30:37 +02:00
|
|
|
oidclr(&l.val_oid);
|
2010-03-18 16:03:43 +01:00
|
|
|
note_tree_remove(t, t->root, 0, &l);
|
2017-05-30 19:30:37 +02:00
|
|
|
if (is_null_oid(&l.val_oid)) /* no note was removed */
|
2010-08-31 17:56:50 +02:00
|
|
|
return 1;
|
|
|
|
t->dirty = 1;
|
|
|
|
return 0;
|
2010-02-13 22:28:14 +01:00
|
|
|
}
|
|
|
|
|
2017-05-30 19:30:40 +02:00
|
|
|
const struct object_id *get_note(struct notes_tree *t,
|
2017-05-30 19:30:43 +02:00
|
|
|
const struct object_id *oid)
|
2009-10-09 12:21:59 +02:00
|
|
|
{
|
2010-02-13 22:28:15 +01:00
|
|
|
struct leaf_node *found;
|
|
|
|
|
2010-02-13 22:28:18 +01:00
|
|
|
if (!t)
|
|
|
|
t = &default_notes_tree;
|
|
|
|
assert(t->initialized);
|
2017-05-30 19:30:43 +02:00
|
|
|
found = note_tree_find(t, t->root, 0, oid->hash);
|
2017-05-30 19:30:40 +02:00
|
|
|
return found ? &found->val_oid : NULL;
|
2009-10-09 12:21:59 +02:00
|
|
|
}
|
2009-10-09 12:21:57 +02:00
|
|
|
|
2010-02-13 22:28:18 +01:00
|
|
|
int for_each_note(struct notes_tree *t, int flags, each_note_fn fn,
|
|
|
|
void *cb_data)
|
2010-02-13 22:28:16 +01:00
|
|
|
{
|
2010-02-13 22:28:18 +01:00
|
|
|
if (!t)
|
|
|
|
t = &default_notes_tree;
|
|
|
|
assert(t->initialized);
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
return for_each_note_helper(t, t->root, 0, 0, flags, fn, cb_data);
|
2010-02-13 22:28:16 +01:00
|
|
|
}
|
|
|
|
|
2018-01-28 01:13:18 +01:00
|
|
|
int write_notes_tree(struct notes_tree *t, struct object_id *result)
|
2010-02-13 22:28:17 +01:00
|
|
|
{
|
|
|
|
struct tree_write_stack root;
|
|
|
|
struct write_each_note_data cb_data;
|
|
|
|
int ret;
|
2018-01-28 01:13:18 +01:00
|
|
|
int flags;
|
2010-02-13 22:28:17 +01:00
|
|
|
|
2010-02-13 22:28:18 +01:00
|
|
|
if (!t)
|
|
|
|
t = &default_notes_tree;
|
|
|
|
assert(t->initialized);
|
2010-02-13 22:28:17 +01:00
|
|
|
|
|
|
|
/* Prepare for traversal of current notes tree */
|
|
|
|
root.next = NULL; /* last forward entry in list is grounded */
|
2019-02-19 01:05:01 +01:00
|
|
|
strbuf_init(&root.buf, 256 * (32 + the_hash_algo->hexsz)); /* assume 256 entries */
|
2010-02-13 22:28:17 +01:00
|
|
|
root.path[0] = root.path[1] = '\0';
|
|
|
|
cb_data.root = &root;
|
notes.c: fix off-by-one error when decreasing notes fanout
As noted in the previous commit, the nature of the fanout heuristic
in the notes code causes the exact point at which we increase or
decrease the notes fanout to vary with the objects being annotated.
Since the object ids generated by the test environment are
deterministic (by design), the notes generated and tested by t3305
are always the same, and we therefore happen to see the same fanout
behavior from one run to the next.
Coincidentally, if we were to change the test environment slightly
(say by making a test commit on an unrelated branch before we start
the t3305 test proper), we not only see the fanout switch happen at
different points, we also manage to trigger a _bug_ in the notes
code where the fanout 1 -> 0 switch is not applied uniformly across
the notes tree, but instead yields a notes tree like this:
...
bdeafb301e44b0e4db0f738a2d2a7beefdb70b70
bff2d39b4f7122bd4c5caee3de353a774d1e632a
d3/8ec8f851adf470131178085bfbaab4b12ad2a7
e0b173960431a3e692ae929736df3c9b73a11d5b
eb3c3aede523d729990ac25c62a93eb47c21e2e3
...
The bug occurs when we are writing out a notes tree with a newly
decreased fanout, and the notes tree contains unexpanded subtrees
that should be consolidated into the parent tree as a consequence of
the decreased fanout):
Subtrees that happen to sit at an _even_ level in the internal notes
16-tree structure (in other words: subtrees whose path - "d3" in the
example above - is unique in the first nibble - i.e. there are no
other note paths that start with "d") are _not_ unpacked as part of
the tree writeout. This error will repeat itself in subsequent note
trees until the subtree is forced to be unpacked. In t3305 this only
happens when the d38ec8f8 note is itself removed from the tree.
The error is not severe (no information is lost, and the notes code
is able to read/decode this tree and manipulate it correctly), but
this is nonetheless a bug in the current implementation that should
be fixed.
That said, fixing the off-by-one error is not without complications:
We must take into account that the load_subtree() call from
for_each_note_helper() (that is now done to correctly unpack the
subtree while we're writing out the notes tree) may end up inserting
unpacked non-notes into the linked list of non_note entries held by
the struct notes_tree. Since we are in the process of writing out the
notes tree, this linked list is currently in the process of being
traversed by write_each_non_note_until(). The unpacked non-notes are
necessarily inserted between the last non-note we wrote out, and the
next non-note to be written. Hence, we cannot simply hold the
next_non_note to write in struct write_each_note_data (as we would
then silently skip these newly inserted notes), but must instead
always follow the ->next pointer from the last non-note we wrote.
(This part was caught by an existing test in t3304.)
Cc: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Cc: Brian M. Carlson <sandals@crustytoothpaste.net>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-03 22:04:45 +01:00
|
|
|
cb_data.nn_list = &(t->first_non_note);
|
|
|
|
cb_data.nn_prev = NULL;
|
2010-02-13 22:28:17 +01:00
|
|
|
|
|
|
|
/* Write tree objects representing current notes tree */
|
2018-01-28 01:13:18 +01:00
|
|
|
flags = FOR_EACH_NOTE_DONT_UNPACK_SUBTREES |
|
|
|
|
FOR_EACH_NOTE_YIELD_SUBTREES;
|
|
|
|
ret = for_each_note(t, flags, write_each_note, &cb_data) ||
|
|
|
|
write_each_non_note_until(NULL, &cb_data) ||
|
|
|
|
tree_write_stack_finish_subtree(&root) ||
|
2022-02-05 00:48:26 +01:00
|
|
|
write_object_file(root.buf.buf, root.buf.len, OBJ_TREE, result);
|
2010-02-13 22:28:17 +01:00
|
|
|
strbuf_release(&root.buf);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2010-05-14 23:42:07 +02:00
|
|
|
void prune_notes(struct notes_tree *t, int flags)
|
2010-02-13 22:28:27 +01:00
|
|
|
{
|
|
|
|
struct note_delete_list *l = NULL;
|
|
|
|
|
|
|
|
if (!t)
|
|
|
|
t = &default_notes_tree;
|
|
|
|
assert(t->initialized);
|
|
|
|
|
|
|
|
for_each_note(t, 0, prune_notes_helper, &l);
|
|
|
|
|
|
|
|
while (l) {
|
2010-05-14 23:42:07 +02:00
|
|
|
if (flags & NOTES_PRUNE_VERBOSE)
|
2019-02-19 01:05:02 +01:00
|
|
|
printf("%s\n", hash_to_hex(l->sha1));
|
2010-05-14 23:42:07 +02:00
|
|
|
if (!(flags & NOTES_PRUNE_DRYRUN))
|
|
|
|
remove_note(t, l->sha1);
|
2010-02-13 22:28:27 +01:00
|
|
|
l = l->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-02-13 22:28:18 +01:00
|
|
|
void free_notes(struct notes_tree *t)
|
2009-10-09 12:22:06 +02:00
|
|
|
{
|
2010-02-13 22:28:18 +01:00
|
|
|
if (!t)
|
|
|
|
t = &default_notes_tree;
|
|
|
|
if (t->root)
|
|
|
|
note_tree_free(t->root);
|
|
|
|
free(t->root);
|
Teach notes code to properly preserve non-notes in the notes tree
The note tree structure allows for non-note entries to coexist with note
entries in a notes tree. Although we certainly expect there to be very
few non-notes in a notes tree, we should still support them to a certain
degree.
This patch teaches the notes code to preserve non-notes when updating the
notes tree with write_notes_tree(). Non-notes are not affected by fanout
restructuring.
For non-notes to be handled correctly, we can no longer allow subtree
entries that do not match the fanout structure produced by the notes code
itself. This means that fanouts like 4/36, 6/34, 8/32, 4/4/32, etc. are
no longer recognized as note subtrees; only 2-based fanouts are allowed
(2/38, 2/2/36, 2/2/2/34, etc.). Since the notes code has never at any point
_produced_ non-2-based fanouts, it is highly unlikely that this change will
cause problems for anyone.
The patch also adds some tests verifying the correct handling of non-notes
in a notes tree.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:23 +01:00
|
|
|
while (t->first_non_note) {
|
|
|
|
t->prev_non_note = t->first_non_note->next;
|
|
|
|
free(t->first_non_note->path);
|
|
|
|
free(t->first_non_note);
|
|
|
|
t->first_non_note = t->prev_non_note;
|
|
|
|
}
|
2010-02-13 22:28:18 +01:00
|
|
|
free(t->ref);
|
|
|
|
memset(t, 0, sizeof(struct notes_tree));
|
2009-10-09 12:22:06 +02:00
|
|
|
}
|
|
|
|
|
2012-09-15 23:08:39 +02:00
|
|
|
/*
|
|
|
|
* Fill the given strbuf with the notes associated with the given object.
|
|
|
|
*
|
|
|
|
* If the given notes_tree structure is not initialized, it will be auto-
|
|
|
|
* initialized to the default value (see documentation for init_notes() above).
|
|
|
|
* If the given notes_tree is NULL, the internal/default notes_tree will be
|
|
|
|
* used instead.
|
|
|
|
*
|
2012-10-18 06:41:54 +02:00
|
|
|
* (raw != 0) gives the %N userformat; otherwise, the note message is given
|
|
|
|
* for human consumption.
|
2012-09-15 23:08:39 +02:00
|
|
|
*/
|
2017-05-30 19:30:41 +02:00
|
|
|
static void format_note(struct notes_tree *t, const struct object_id *object_oid,
|
2012-10-18 06:41:54 +02:00
|
|
|
struct strbuf *sb, const char *output_encoding, int raw)
|
2009-10-09 12:21:57 +02:00
|
|
|
{
|
|
|
|
static const char utf8[] = "utf-8";
|
2017-05-30 19:30:40 +02:00
|
|
|
const struct object_id *oid;
|
2009-10-09 12:21:57 +02:00
|
|
|
char *msg, *msg_p;
|
|
|
|
unsigned long linelen, msglen;
|
|
|
|
enum object_type type;
|
|
|
|
|
2010-02-13 22:28:18 +01:00
|
|
|
if (!t)
|
|
|
|
t = &default_notes_tree;
|
|
|
|
if (!t->initialized)
|
Refactor notes concatenation into a flexible interface for combining notes
When adding a note to an object that already has an existing note, the
current solution is to concatenate the contents of the two notes. However,
the caller may instead wish to _overwrite_ the existing note with the new
note, or maybe even _ignore_ the new note, and keep the existing one. There
might also be other ways of combining notes that are only known to the
caller.
Therefore, instead of unconditionally concatenating notes, we let the caller
specify how to combine notes, by passing in a pointer to a function for
combining notes. The caller may choose to implement its own function for
notes combining, but normally one of the following three conveniently
supplied notes combination functions will be sufficient:
- combine_notes_concatenate() combines the two notes by appending the
contents of the new note to the contents of the existing note.
- combine_notes_overwrite() replaces the existing note with the new note.
- combine_notes_ignore() keeps the existing note, and ignores the new note.
A combine_notes function can be passed to init_notes() to choose a default
combine_notes function for that notes tree. If NULL is given, the notes tree
falls back to combine_notes_concatenate() as the ultimate default.
A combine_notes function can also be passed directly to add_note(), to
control the notes combining behaviour for a note addition in particular.
If NULL is passed, the combine_notes function registered for the given
notes tree is used.
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-13 22:28:19 +01:00
|
|
|
init_notes(t, NULL, NULL, 0);
|
2009-10-09 12:21:57 +02:00
|
|
|
|
2017-05-30 19:30:43 +02:00
|
|
|
oid = get_note(t, object_oid);
|
2017-05-30 19:30:40 +02:00
|
|
|
if (!oid)
|
2009-10-09 12:21:57 +02:00
|
|
|
return;
|
|
|
|
|
2023-03-28 15:58:50 +02:00
|
|
|
if (!(msg = repo_read_object_file(the_repository, oid, &type, &msglen)) || type != OBJ_BLOB) {
|
2009-10-09 12:21:57 +02:00
|
|
|
free(msg);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (output_encoding && *output_encoding &&
|
2012-10-19 07:41:56 +02:00
|
|
|
!is_encoding_utf8(output_encoding)) {
|
2009-10-09 12:21:57 +02:00
|
|
|
char *reencoded = reencode_string(msg, output_encoding, utf8);
|
|
|
|
if (reencoded) {
|
|
|
|
free(msg);
|
|
|
|
msg = reencoded;
|
|
|
|
msglen = strlen(msg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* we will end the annotation by a newline anyway */
|
|
|
|
if (msglen && msg[msglen - 1] == '\n')
|
|
|
|
msglen--;
|
|
|
|
|
2012-10-18 06:41:54 +02:00
|
|
|
if (!raw) {
|
2010-03-12 18:04:26 +01:00
|
|
|
const char *ref = t->ref;
|
|
|
|
if (!ref || !strcmp(ref, GIT_NOTES_DEFAULT_REF)) {
|
|
|
|
strbuf_addstr(sb, "\nNotes:\n");
|
|
|
|
} else {
|
2020-01-30 20:35:46 +01:00
|
|
|
skip_prefix(ref, "refs/", &ref);
|
|
|
|
skip_prefix(ref, "notes/", &ref);
|
2010-03-12 18:04:26 +01:00
|
|
|
strbuf_addf(sb, "\nNotes (%s):\n", ref);
|
|
|
|
}
|
|
|
|
}
|
2009-10-09 12:21:57 +02:00
|
|
|
|
|
|
|
for (msg_p = msg; msg_p < msg + msglen; msg_p += linelen + 1) {
|
|
|
|
linelen = strchrnul(msg_p, '\n') - msg_p;
|
|
|
|
|
2012-10-18 06:41:54 +02:00
|
|
|
if (!raw)
|
2009-10-09 12:22:04 +02:00
|
|
|
strbuf_addstr(sb, " ");
|
2009-10-09 12:21:57 +02:00
|
|
|
strbuf_add(sb, msg_p, linelen);
|
|
|
|
strbuf_addch(sb, '\n');
|
|
|
|
}
|
|
|
|
|
|
|
|
free(msg);
|
|
|
|
}
|
2010-03-12 18:04:26 +01:00
|
|
|
|
2017-05-30 19:30:41 +02:00
|
|
|
void format_display_notes(const struct object_id *object_oid,
|
2012-10-18 06:41:54 +02:00
|
|
|
struct strbuf *sb, const char *output_encoding, int raw)
|
2010-03-12 18:04:26 +01:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
assert(display_notes_trees);
|
|
|
|
for (i = 0; display_notes_trees[i]; i++)
|
2017-05-30 19:30:41 +02:00
|
|
|
format_note(display_notes_trees[i], object_oid, sb,
|
2012-10-18 06:41:54 +02:00
|
|
|
output_encoding, raw);
|
2010-03-12 18:04:26 +01:00
|
|
|
}
|
2010-03-12 18:04:31 +01:00
|
|
|
|
|
|
|
int copy_note(struct notes_tree *t,
|
2017-05-30 19:30:43 +02:00
|
|
|
const struct object_id *from_obj, const struct object_id *to_obj,
|
2010-11-15 00:52:26 +01:00
|
|
|
int force, combine_notes_fn combine_notes)
|
2010-03-12 18:04:31 +01:00
|
|
|
{
|
2017-05-30 19:30:40 +02:00
|
|
|
const struct object_id *note = get_note(t, from_obj);
|
|
|
|
const struct object_id *existing_note = get_note(t, to_obj);
|
2010-03-12 18:04:31 +01:00
|
|
|
|
|
|
|
if (!force && existing_note)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
if (note)
|
2017-05-30 19:30:43 +02:00
|
|
|
return add_note(t, to_obj, note, combine_notes);
|
2010-03-12 18:04:31 +01:00
|
|
|
else if (existing_note)
|
2021-04-26 03:02:56 +02:00
|
|
|
return add_note(t, to_obj, null_oid(), combine_notes);
|
2010-03-12 18:04:31 +01:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2011-03-29 22:55:32 +02:00
|
|
|
|
|
|
|
void expand_notes_ref(struct strbuf *sb)
|
|
|
|
{
|
2013-11-30 21:55:40 +01:00
|
|
|
if (starts_with(sb->buf, "refs/notes/"))
|
2011-03-29 22:55:32 +02:00
|
|
|
return; /* we're happy */
|
2013-11-30 21:55:40 +01:00
|
|
|
else if (starts_with(sb->buf, "notes/"))
|
2020-02-09 14:44:23 +01:00
|
|
|
strbuf_insertstr(sb, 0, "refs/");
|
2011-03-29 22:55:32 +02:00
|
|
|
else
|
2020-02-09 14:44:23 +01:00
|
|
|
strbuf_insertstr(sb, 0, "refs/notes/");
|
2011-03-29 22:55:32 +02:00
|
|
|
}
|
2015-12-29 23:40:28 +01:00
|
|
|
|
|
|
|
void expand_loose_notes_ref(struct strbuf *sb)
|
|
|
|
{
|
2017-05-30 19:30:38 +02:00
|
|
|
struct object_id object;
|
2015-12-29 23:40:28 +01:00
|
|
|
|
2023-03-28 15:58:46 +02:00
|
|
|
if (repo_get_oid(the_repository, sb->buf, &object)) {
|
2015-12-29 23:40:28 +01:00
|
|
|
/* fallback to expand_notes_ref */
|
|
|
|
expand_notes_ref(sb);
|
|
|
|
}
|
|
|
|
}
|