2018-03-24 08:45:00 +01:00
|
|
|
#include "test-tool.h"
|
2012-09-12 16:04:43 +02:00
|
|
|
#include "cache.h"
|
|
|
|
#include "string-list.h"
|
|
|
|
|
2012-09-12 16:04:44 +02:00
|
|
|
/*
|
|
|
|
* Parse an argument into a string list. arg should either be a
|
|
|
|
* ':'-separated list of strings, or "-" to indicate an empty string
|
|
|
|
* list (as opposed to "", which indicates a string list containing a
|
|
|
|
* single empty string). list->strdup_strings must be set.
|
|
|
|
*/
|
2012-09-15 18:18:42 +02:00
|
|
|
static void parse_string_list(struct string_list *list, const char *arg)
|
2012-09-12 16:04:44 +02:00
|
|
|
{
|
|
|
|
if (!strcmp(arg, "-"))
|
|
|
|
return;
|
|
|
|
|
|
|
|
(void)string_list_split(list, arg, ':', -1);
|
|
|
|
}
|
|
|
|
|
2012-09-15 18:18:42 +02:00
|
|
|
static void write_list(const struct string_list *list)
|
2012-09-12 16:04:43 +02:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < list->nr; i++)
|
|
|
|
printf("[%d]: \"%s\"\n", i, list->items[i].string);
|
|
|
|
}
|
|
|
|
|
2012-09-15 18:18:42 +02:00
|
|
|
static void write_list_compact(const struct string_list *list)
|
2012-09-12 16:04:44 +02:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
if (!list->nr)
|
|
|
|
printf("-\n");
|
|
|
|
else {
|
|
|
|
printf("%s", list->items[0].string);
|
|
|
|
for (i = 1; i < list->nr; i++)
|
|
|
|
printf(":%s", list->items[i].string);
|
|
|
|
printf("\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-09-15 18:18:42 +02:00
|
|
|
static int prefix_cb(struct string_list_item *item, void *cb_data)
|
2012-09-12 16:04:44 +02:00
|
|
|
{
|
|
|
|
const char *prefix = (const char *)cb_data;
|
2013-11-30 21:55:40 +01:00
|
|
|
return starts_with(item->string, prefix);
|
2012-09-12 16:04:44 +02:00
|
|
|
}
|
|
|
|
|
2018-03-24 08:45:00 +01:00
|
|
|
int cmd__string_list(int argc, const char **argv)
|
2012-09-12 16:04:43 +02:00
|
|
|
{
|
|
|
|
if (argc == 5 && !strcmp(argv[1], "split")) {
|
|
|
|
struct string_list list = STRING_LIST_INIT_DUP;
|
|
|
|
int i;
|
|
|
|
const char *s = argv[2];
|
|
|
|
int delim = *argv[3];
|
|
|
|
int maxsplit = atoi(argv[4]);
|
|
|
|
|
|
|
|
i = string_list_split(&list, s, delim, maxsplit);
|
|
|
|
printf("%d\n", i);
|
|
|
|
write_list(&list);
|
|
|
|
string_list_clear(&list, 0);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (argc == 5 && !strcmp(argv[1], "split_in_place")) {
|
|
|
|
struct string_list list = STRING_LIST_INIT_NODUP;
|
|
|
|
int i;
|
|
|
|
char *s = xstrdup(argv[2]);
|
string-list: multi-delimiter `string_list_split_in_place()`
Enhance `string_list_split_in_place()` to accept multiple characters as
delimiters instead of a single character.
Instead of using `strchr(2)` to locate the first occurrence of the given
delimiter character, `string_list_split_in_place_multi()` uses
`strcspn(2)` to move past the initial segment of characters comprised of
any characters in the delimiting set.
When only a single delimiting character is provided, `strpbrk(2)` (which
is implemented with `strcspn(2)`) has equivalent performance to
`strchr(2)`. Modern `strcspn(2)` implementations treat an empty
delimiter or the singleton delimiter as a special case and fall back to
calling strchrnul(). Both glibc[1] and musl[2] implement `strcspn(2)`
this way.
This change is one step to removing `strtok(2)` from the tree. Note that
`string_list_split_in_place()` is not a strict replacement for
`strtok()`, since it will happily turn sequential delimiter characters
into empty entries in the resulting string_list. For example:
string_list_split_in_place(&xs, "foo:;:bar:;:baz", ":;", -1)
would yield a string list of:
["foo", "", "", "bar", "", "", "baz"]
Callers that wish to emulate the behavior of strtok(2) more directly
should call `string_list_remove_empty_items()` after splitting.
To avoid regressions for the new multi-character delimter cases, update
t0063 in this patch as well.
[1]: https://sourceware.org/git/?p=glibc.git;a=blob;f=string/strcspn.c;hb=glibc-2.37#l35
[2]: https://git.musl-libc.org/cgit/musl/tree/src/string/strcspn.c?h=v1.2.3#n11
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-04-25 00:20:10 +02:00
|
|
|
const char *delim = argv[3];
|
2012-09-12 16:04:43 +02:00
|
|
|
int maxsplit = atoi(argv[4]);
|
|
|
|
|
|
|
|
i = string_list_split_in_place(&list, s, delim, maxsplit);
|
|
|
|
printf("%d\n", i);
|
|
|
|
write_list(&list);
|
|
|
|
string_list_clear(&list, 0);
|
|
|
|
free(s);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-09-12 16:04:44 +02:00
|
|
|
if (argc == 4 && !strcmp(argv[1], "filter")) {
|
|
|
|
/*
|
|
|
|
* Retain only the items that have the specified prefix.
|
|
|
|
* Arguments: list|- prefix
|
|
|
|
*/
|
|
|
|
struct string_list list = STRING_LIST_INIT_DUP;
|
|
|
|
const char *prefix = argv[3];
|
|
|
|
|
|
|
|
parse_string_list(&list, argv[2]);
|
|
|
|
filter_string_list(&list, 0, prefix_cb, (void *)prefix);
|
|
|
|
write_list_compact(&list);
|
|
|
|
string_list_clear(&list, 0);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-09-12 16:04:45 +02:00
|
|
|
if (argc == 3 && !strcmp(argv[1], "remove_duplicates")) {
|
|
|
|
struct string_list list = STRING_LIST_INIT_DUP;
|
|
|
|
|
|
|
|
parse_string_list(&list, argv[2]);
|
|
|
|
string_list_remove_duplicates(&list, 0);
|
|
|
|
write_list_compact(&list);
|
|
|
|
string_list_clear(&list, 0);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-01-22 18:53:57 +01:00
|
|
|
if (argc == 2 && !strcmp(argv[1], "sort")) {
|
|
|
|
struct string_list list = STRING_LIST_INIT_NODUP;
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
struct string_list_item *item;
|
|
|
|
|
|
|
|
strbuf_read(&sb, 0, 0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Split by newline, but don't create a string_list item
|
|
|
|
* for the empty string after the last separator.
|
|
|
|
*/
|
2017-10-03 16:36:40 +02:00
|
|
|
if (sb.len && sb.buf[sb.len - 1] == '\n')
|
2017-01-22 18:53:57 +01:00
|
|
|
strbuf_setlen(&sb, sb.len - 1);
|
string-list: multi-delimiter `string_list_split_in_place()`
Enhance `string_list_split_in_place()` to accept multiple characters as
delimiters instead of a single character.
Instead of using `strchr(2)` to locate the first occurrence of the given
delimiter character, `string_list_split_in_place_multi()` uses
`strcspn(2)` to move past the initial segment of characters comprised of
any characters in the delimiting set.
When only a single delimiting character is provided, `strpbrk(2)` (which
is implemented with `strcspn(2)`) has equivalent performance to
`strchr(2)`. Modern `strcspn(2)` implementations treat an empty
delimiter or the singleton delimiter as a special case and fall back to
calling strchrnul(). Both glibc[1] and musl[2] implement `strcspn(2)`
this way.
This change is one step to removing `strtok(2)` from the tree. Note that
`string_list_split_in_place()` is not a strict replacement for
`strtok()`, since it will happily turn sequential delimiter characters
into empty entries in the resulting string_list. For example:
string_list_split_in_place(&xs, "foo:;:bar:;:baz", ":;", -1)
would yield a string list of:
["foo", "", "", "bar", "", "", "baz"]
Callers that wish to emulate the behavior of strtok(2) more directly
should call `string_list_remove_empty_items()` after splitting.
To avoid regressions for the new multi-character delimter cases, update
t0063 in this patch as well.
[1]: https://sourceware.org/git/?p=glibc.git;a=blob;f=string/strcspn.c;hb=glibc-2.37#l35
[2]: https://git.musl-libc.org/cgit/musl/tree/src/string/strcspn.c?h=v1.2.3#n11
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-04-25 00:20:10 +02:00
|
|
|
string_list_split_in_place(&list, sb.buf, "\n", -1);
|
2017-01-22 18:53:57 +01:00
|
|
|
|
|
|
|
string_list_sort(&list);
|
|
|
|
|
|
|
|
for_each_string_list_item(item, &list)
|
|
|
|
puts(item->string);
|
|
|
|
|
|
|
|
string_list_clear(&list, 0);
|
|
|
|
strbuf_release(&sb);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-09-12 16:04:43 +02:00
|
|
|
fprintf(stderr, "%s: unknown function name: %s\n", argv[0],
|
|
|
|
argv[1] ? argv[1] : "(there was none)");
|
|
|
|
return 1;
|
|
|
|
}
|