From dd1055ed594f8fef18779cce3cd921c4ac66cf9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20=C3=85gren?= Date: Sat, 23 Sep 2017 01:34:49 +0200 Subject: [PATCH 1/6] builtin/commit: fix memory leak in `prepare_index()` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Release `pathspec` and the string list `partial`. When we clear the string list, make sure we do not free the `util` pointers. That would result in double-freeing, since we set them up as `item->util = item` in `list_paths()`. Initialize the string list early, so that we can always release it. That introduces some unnecessary overhead in various code paths, but means there is one and only one way out of the function. If we ever accumulate more things we need to free, it should be straightforward to do so. Signed-off-by: Martin Ågren Reviewed-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/commit.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/builtin/commit.c b/builtin/commit.c index 1a0da71a43..ce76b6f960 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -336,7 +336,7 @@ static void refresh_cache_or_die(int refresh_flags) static const char *prepare_index(int argc, const char **argv, const char *prefix, const struct commit *current_head, int is_status) { - struct string_list partial; + struct string_list partial = STRING_LIST_INIT_DUP; struct pathspec pathspec; int refresh_flags = REFRESH_QUIET; const char *ret; @@ -381,7 +381,8 @@ static const char *prepare_index(int argc, const char **argv, const char *prefix warning(_("Failed to update main cache tree")); commit_style = COMMIT_NORMAL; - return get_lock_file_path(&index_lock); + ret = get_lock_file_path(&index_lock); + goto out; } /* @@ -404,7 +405,8 @@ static const char *prepare_index(int argc, const char **argv, const char *prefix if (write_locked_index(&the_index, &index_lock, CLOSE_LOCK)) die(_("unable to write new_index file")); commit_style = COMMIT_NORMAL; - return get_lock_file_path(&index_lock); + ret = get_lock_file_path(&index_lock); + goto out; } /* @@ -430,7 +432,8 @@ static const char *prepare_index(int argc, const char **argv, const char *prefix rollback_lock_file(&index_lock); } commit_style = COMMIT_AS_IS; - return get_index_file(); + ret = get_index_file(); + goto out; } /* @@ -461,7 +464,6 @@ static const char *prepare_index(int argc, const char **argv, const char *prefix die(_("cannot do a partial commit during a cherry-pick.")); } - string_list_init(&partial, 1); if (list_paths(&partial, !current_head ? NULL : "HEAD", prefix, &pathspec)) exit(1); @@ -491,6 +493,9 @@ static const char *prepare_index(int argc, const char **argv, const char *prefix discard_cache(); ret = get_lock_file_path(&false_lock); read_cache_from(ret); +out: + string_list_clear(&partial, 0); + clear_pathspec(&pathspec); return ret; } From cb7b29eb67772d08e2365ed07ede9d954d0344c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20=C3=85gren?= Date: Sat, 23 Sep 2017 01:34:50 +0200 Subject: [PATCH 2/6] commit: fix memory leak in `reduce_heads()` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't free the temporary scratch space we use with `remove_redundant()`. Free it similar to how we do it in `get_merge_bases_many_0()`. Signed-off-by: Martin Ågren Reviewed-by: Jeff King Signed-off-by: Junio C Hamano --- commit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/commit.c b/commit.c index d3150d6270..f73976bcc9 100644 --- a/commit.c +++ b/commit.c @@ -1080,6 +1080,7 @@ struct commit_list *reduce_heads(struct commit_list *heads) num_head = remove_redundant(array, num_head); for (i = 0; i < num_head; i++) tail = &commit_list_insert(array[i], tail)->next; + free(array); return result; } From b2ccdf7fc15e866a883b706540055b5d05fb9aef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20=C3=85gren?= Date: Sat, 23 Sep 2017 01:34:51 +0200 Subject: [PATCH 3/6] leak_pending: use `object_array_clear()`, not `free()` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting `leak_pending = 1` tells `prepare_revision_walk()` not to release the `pending` array, and makes that the caller's responsibility. See 4a43d374f (revision: add leak_pending flag, 2011-10-01) and 353f5657a (bisect: use leak_pending flag, 2011-10-01). Commit 1da1e07c8 (clean up name allocation in prepare_revision_walk, 2014-10-15) fixed a memory leak in `prepare_revision_walk()` by switching from `free()` to `object_array_clear()`. However, where we use the `leak_pending`-mechanism, we're still only calling `free()`. Use `object_array_clear()` instead. Copy some helpful comments from 353f5657a to the other callers that we update to clarify the memory responsibilities, and to highlight that the commits are not affected when we clear the array -- it is indeed correct to both tidy up the commit flags and clear the object array. Document `leak_pending` in revision.h to help future users get this right. Signed-off-by: Martin Ågren Reviewed-by: Jeff King Signed-off-by: Junio C Hamano --- bisect.c | 3 ++- builtin/checkout.c | 9 ++++++++- bundle.c | 9 ++++++++- revision.h | 11 +++++++++++ 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/bisect.c b/bisect.c index a9fd9fbc61..fc797f6aea 100644 --- a/bisect.c +++ b/bisect.c @@ -826,7 +826,8 @@ static int check_ancestors(const char *prefix) /* Clean up objects used, as they will be reused. */ clear_commit_marks_for_object_array(&pending_copy, ALL_REV_FLAGS); - free(pending_copy.objects); + + object_array_clear(&pending_copy); return res; } diff --git a/builtin/checkout.c b/builtin/checkout.c index 2d75ac66c7..52f1b67708 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -796,9 +796,14 @@ static void orphaned_commit_warning(struct commit *old, struct commit *new) for_each_ref(add_pending_uninteresting_ref, &revs); add_pending_oid(&revs, "HEAD", &new->object.oid, UNINTERESTING); + /* Save pending objects, so they can be cleaned up later. */ refs = revs.pending; revs.leak_pending = 1; + /* + * prepare_revision_walk (together with .leak_pending = 1) makes us + * the sole owner of the list of pending objects. + */ if (prepare_revision_walk(&revs)) die(_("internal error in revision walk")); if (!(old->object.flags & UNINTERESTING)) @@ -806,8 +811,10 @@ static void orphaned_commit_warning(struct commit *old, struct commit *new) else describe_detached_head(_("Previous HEAD position was"), old); + /* Clean up objects used, as they will be reused. */ clear_commit_marks_for_object_array(&refs, ALL_REV_FLAGS); - free(refs.objects); + + object_array_clear(&refs); } static int switch_branches(const struct checkout_opts *opts, diff --git a/bundle.c b/bundle.c index d15db03c84..c092d5d68f 100644 --- a/bundle.c +++ b/bundle.c @@ -157,9 +157,14 @@ int verify_bundle(struct bundle_header *header, int verbose) req_nr = revs.pending.nr; setup_revisions(2, argv, &revs, NULL); + /* Save pending objects, so they can be cleaned up later. */ refs = revs.pending; revs.leak_pending = 1; + /* + * prepare_revision_walk (together with .leak_pending = 1) makes us + * the sole owner of the list of pending objects. + */ if (prepare_revision_walk(&revs)) die(_("revision walk setup failed")); @@ -176,8 +181,10 @@ int verify_bundle(struct bundle_header *header, int verbose) refs.objects[i].name); } + /* Clean up objects used, as they will be reused. */ clear_commit_marks_for_object_array(&refs, ALL_REV_FLAGS); - free(refs.objects); + + object_array_clear(&refs); if (verbose) { struct ref_list *r; diff --git a/revision.h b/revision.h index bc18487d6f..3162cc78e8 100644 --- a/revision.h +++ b/revision.h @@ -149,6 +149,17 @@ struct rev_info { date_mode_explicit:1, preserve_subject:1; unsigned int disable_stdin:1; + /* + * Set `leak_pending` to prevent `prepare_revision_walk()` from clearing + * the array of pending objects (`pending`). It will still forget about + * the array and its entries, so they really are leaked. This can be + * useful if the `struct object_array` `pending` is copied before + * calling `prepare_revision_walk()`. By setting `leak_pending`, you + * effectively claim ownership of the old array, so you should most + * likely call `object_array_clear(&pending_copy)` once you are done. + * Observe that this is about ownership of the array and its entries, + * not the commits referenced by those entries. + */ unsigned int leak_pending:1; /* --show-linear-break */ unsigned int track_linear:1, From dcb572ab94f83a1a857d276fcebff5700077f2b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20=C3=85gren?= Date: Sat, 23 Sep 2017 01:34:52 +0200 Subject: [PATCH 4/6] object_array: use `object_array_clear()`, not `free()` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of freeing `foo.objects` for an object array `foo` (sometimes conditionally), call `object_array_clear(&foo)`. This means we don't poke as much into the implementation, which is already a good thing, but also that we release the individual entries as well, thereby fixing at least one memory-leak (in diff-lib.c). If someone is holding on to a pointer to an element's `name` or `path`, that is now a dangling pointer, i.e., we'd be turning an unpleasant situation into an outright bug. To the best of my understanding no such long-term pointers are being taken. The way we handle `study` in builting/reflog.c still looks like it might leak. That will be addressed in the next commit. Signed-off-by: Martin Ågren Reviewed-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/reflog.c | 4 ++-- diff-lib.c | 3 +-- submodule.c | 4 ++-- upload-pack.c | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/builtin/reflog.c b/builtin/reflog.c index e237d927a0..6b34f23e78 100644 --- a/builtin/reflog.c +++ b/builtin/reflog.c @@ -182,8 +182,8 @@ static int commit_is_complete(struct commit *commit) found.objects[i].item->flags |= SEEN; } /* free object arrays */ - free(study.objects); - free(found.objects); + object_array_clear(&study); + object_array_clear(&found); return !is_incomplete; } diff --git a/diff-lib.c b/diff-lib.c index 2a52b07954..4e0980caa8 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -549,7 +549,6 @@ int index_differs_from(const char *def, int diff_flags, rev.diffopt.flags |= diff_flags; rev.diffopt.ita_invisible_in_index = ita_invisible_in_index; run_diff_index(&rev, 1); - if (rev.pending.alloc) - free(rev.pending.objects); + object_array_clear(&rev.pending); return (DIFF_OPT_TST(&rev.diffopt, HAS_CHANGES) != 0); } diff --git a/submodule.c b/submodule.c index 36f45f5a5a..79fd01f7b0 100644 --- a/submodule.c +++ b/submodule.c @@ -1728,7 +1728,7 @@ static int find_first_merges(struct object_array *result, const char *path, add_object_array(merges.objects[i].item, NULL, result); } - free(merges.objects); + object_array_clear(&merges); return result->nr; } @@ -1833,7 +1833,7 @@ int merge_submodule(struct object_id *result, const char *path, print_commit((struct commit *) merges.objects[i].item); } - free(merges.objects); + object_array_clear(&merges); return 0; } diff --git a/upload-pack.c b/upload-pack.c index 7efff2fbfd..ec0eee8fc4 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -888,7 +888,7 @@ static void receive_needs(void) } shallow_nr += shallows.nr; - free(shallows.objects); + object_array_clear(&shallows); } /* return non-zero if the ref is hidden, otherwise 0 */ From 719920393737b3934a168f35ab45e09104edeed8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20=C3=85gren?= Date: Sat, 23 Sep 2017 01:34:53 +0200 Subject: [PATCH 5/6] object_array: add and use `object_array_pop()` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In a couple of places, we pop objects off an object array `foo` by decreasing `foo.nr`. We access `foo.nr` in many places, but most if not all other times we do so read-only, e.g., as we iterate over the array. But when we change `foo.nr` behind the array's back, it feels a bit nasty and looks like it might leak memory. Leaks happen if the popped element has an allocated `name` or `path`. At the moment, that is not the case. Still, 1) the object array might gain more fields that want to be freed, 2) a code path where we pop might start using names or paths, 3) one of these code paths might be copied to somewhere where we do, and 4) using a dedicated function for popping is conceptually cleaner. Introduce and use `object_array_pop()` instead. Release memory in the new function. Document that popping an object leaves the associated elements in limbo. The converted places were identified by grepping for "\.nr\>" and looking for "--". Make the new function return NULL on an empty array. This is consistent with `pop_commit()` and allows the following: while ((o = object_array_pop(&foo)) != NULL) { // do something } But as noted above, we don't need to go out of our way to avoid reading `foo.nr`. This is probably more readable: while (foo.nr) { ... o = object_array_pop(&foo); // do something } The name of `object_array_pop()` does not quite align with `add_object_array()`. That is unfortunate. On the other hand, it matches `object_array_clear()`. Arguably it's `add_...` that is the odd one out, since it reads like it's used to "add" an "object array". For that reason, side with `object_array_clear()`. Signed-off-by: Martin Ågren Reviewed-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fast-export.c | 3 +-- builtin/fsck.c | 7 +------ builtin/reflog.c | 2 +- object.c | 13 +++++++++++++ object.h | 8 ++++++++ shallow.c | 2 +- 6 files changed, 25 insertions(+), 10 deletions(-) diff --git a/builtin/fast-export.c b/builtin/fast-export.c index d412c0a8f3..cff8d0fc5b 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -634,11 +634,10 @@ static void handle_tail(struct object_array *commits, struct rev_info *revs) { struct commit *commit; while (commits->nr) { - commit = (struct commit *)commits->objects[commits->nr - 1].item; + commit = (struct commit *)object_array_pop(commits); if (has_unshown_parent(commit)) return; handle_commit(commit, revs); - commits->nr--; } } diff --git a/builtin/fsck.c b/builtin/fsck.c index d18244ab54..7d4ad02733 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -181,12 +181,7 @@ static int traverse_reachable(void) if (show_progress) progress = start_progress_delay(_("Checking connectivity"), 0, 0, 2); while (pending.nr) { - struct object_array_entry *entry; - struct object *obj; - - entry = pending.objects + --pending.nr; - obj = entry->item; - result |= traverse_one_object(obj); + result |= traverse_one_object(object_array_pop(&pending)); display_progress(progress, ++nr); } stop_progress(&progress); diff --git a/builtin/reflog.c b/builtin/reflog.c index 6b34f23e78..2067cca5b1 100644 --- a/builtin/reflog.c +++ b/builtin/reflog.c @@ -126,7 +126,7 @@ static int commit_is_complete(struct commit *commit) struct commit *c; struct commit_list *parent; - c = (struct commit *)study.objects[--study.nr].item; + c = (struct commit *)object_array_pop(&study); if (!c->object.parsed && !parse_object(&c->object.oid)) c->object.flags |= INCOMPLETE; diff --git a/object.c b/object.c index 321d7e9201..b9a4a0e501 100644 --- a/object.c +++ b/object.c @@ -353,6 +353,19 @@ static void object_array_release_entry(struct object_array_entry *ent) free(ent->path); } +struct object *object_array_pop(struct object_array *array) +{ + struct object *ret; + + if (!array->nr) + return NULL; + + ret = array->objects[array->nr - 1].item; + object_array_release_entry(&array->objects[array->nr - 1]); + array->nr--; + return ret; +} + void object_array_filter(struct object_array *array, object_array_each_func_t want, void *cb_data) { diff --git a/object.h b/object.h index 0a419ba8da..df8abe96f7 100644 --- a/object.h +++ b/object.h @@ -116,6 +116,14 @@ int object_list_contains(struct object_list *list, struct object *obj); void add_object_array(struct object *obj, const char *name, struct object_array *array); void add_object_array_with_path(struct object *obj, const char *name, struct object_array *array, unsigned mode, const char *path); +/* + * Returns NULL if the array is empty. Otherwise, returns the last object + * after removing its entry from the array. Other resources associated + * with that object are left in an unspecified state and should not be + * examined. + */ +struct object *object_array_pop(struct object_array *array); + typedef int (*object_array_each_func_t)(struct object_array_entry *, void *); /* diff --git a/shallow.c b/shallow.c index 54359d5490..901ac97917 100644 --- a/shallow.c +++ b/shallow.c @@ -99,7 +99,7 @@ struct commit_list *get_shallow_commits(struct object_array *heads, int depth, cur_depth = 0; } else { commit = (struct commit *) - stack.objects[--stack.nr].item; + object_array_pop(&stack); cur_depth = *(int *)commit->util; } } From 4d01a7fa65c50e817a935396432e199b7a565f53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20=C3=85gren?= Date: Sat, 23 Sep 2017 01:34:54 +0200 Subject: [PATCH 6/6] pack-bitmap[-write]: use `object_array_clear()`, don't leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of setting the fields of rev->pending to 0/NULL, thereby leaking memory, call `object_array_clear(&rev->pending)`. In pack-bitmap.c, we make copies of those fields as `pending_nr` and `pending_e`. We never update the aliases and the original fields never change, so the aliases are not really needed and just make it harder than necessary to understand the code. While we're here, remove the aliases to make the code easier to follow. Signed-off-by: Martin Ågren Reviewed-by: Jeff King Signed-off-by: Junio C Hamano --- pack-bitmap-write.c | 4 +--- pack-bitmap.c | 10 +++------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 8e47a96b3b..a8df5ce2ab 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -297,9 +297,7 @@ void bitmap_writer_build(struct packing_data *to_pack) traverse_commit_list(&revs, show_commit, show_object, base); - revs.pending.nr = 0; - revs.pending.alloc = 0; - revs.pending.objects = NULL; + object_array_clear(&revs.pending); stored->bitmap = bitmap_to_ewah(base); need_reset = 0; diff --git a/pack-bitmap.c b/pack-bitmap.c index 327634cd71..0a49c1595a 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -653,8 +653,6 @@ static int in_bitmapped_pack(struct object_list *roots) int prepare_bitmap_walk(struct rev_info *revs) { unsigned int i; - unsigned int pending_nr = revs->pending.nr; - struct object_array_entry *pending_e = revs->pending.objects; struct object_list *wants = NULL; struct object_list *haves = NULL; @@ -669,8 +667,8 @@ int prepare_bitmap_walk(struct rev_info *revs) return -1; } - for (i = 0; i < pending_nr; ++i) { - struct object *object = pending_e[i].item; + for (i = 0; i < revs->pending.nr; ++i) { + struct object *object = revs->pending.objects[i].item; if (object->type == OBJ_NONE) parse_object_or_die(&object->oid, NULL); @@ -714,9 +712,7 @@ int prepare_bitmap_walk(struct rev_info *revs) if (!bitmap_git.loaded && load_pack_bitmap() < 0) return -1; - revs->pending.nr = 0; - revs->pending.alloc = 0; - revs->pending.objects = NULL; + object_array_clear(&revs->pending); if (haves) { revs->ignore_missing_links = 1;