From 9b3497cab9986883ff2499722f8758795afa34b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Tue, 1 Jan 2013 09:44:05 +0700 Subject: [PATCH 1/7] wildmatch: rename constants and update prototype MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - All exported constants now have a prefix WM_ - Do not rely on FNM_* constants, use the WM_ counterparts - Remove TRUE and FALSE to follow Git's coding style - While at it, turn flags type from int to unsigned int - Add an (unused yet) argument to carry extra information so that we don't have to change the prototype again later when we need to pass other stuff to wildmatch Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 3 +- test-wildmatch.c | 4 +-- wildmatch.c | 88 +++++++++++++++++++++++------------------------- wildmatch.h | 22 ++++++++---- 4 files changed, 62 insertions(+), 55 deletions(-) diff --git a/dir.c b/dir.c index cb7328b548..175a1827ba 100644 --- a/dir.c +++ b/dir.c @@ -595,7 +595,8 @@ int match_pathname(const char *pathname, int pathlen, } return wildmatch(pattern, name, - ignore_case ? FNM_CASEFOLD : 0) == 0; + ignore_case ? WM_CASEFOLD : 0, + NULL) == 0; } /* Scan the list and let the last match determine the fate. diff --git a/test-wildmatch.c b/test-wildmatch.c index e384c8edb1..4bb23b4c92 100644 --- a/test-wildmatch.c +++ b/test-wildmatch.c @@ -12,9 +12,9 @@ int main(int argc, char **argv) argv[i] += 3; } if (!strcmp(argv[1], "wildmatch")) - return !!wildmatch(argv[3], argv[2], 0); + return !!wildmatch(argv[3], argv[2], 0, NULL); else if (!strcmp(argv[1], "iwildmatch")) - return !!wildmatch(argv[3], argv[2], FNM_CASEFOLD); + return !!wildmatch(argv[3], argv[2], WM_CASEFOLD, NULL); else if (!strcmp(argv[1], "fnmatch")) return !!fnmatch(argv[3], argv[2], FNM_PATHNAME); else diff --git a/wildmatch.c b/wildmatch.c index 2d3ed84364..2a655fa7d4 100644 --- a/wildmatch.c +++ b/wildmatch.c @@ -18,9 +18,6 @@ typedef unsigned char uchar; #define NEGATE_CLASS '!' #define NEGATE_CLASS2 '^' -#define FALSE 0 -#define TRUE 1 - #define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \ && *(class) == *(litmatch) \ && strncmp((char*)class, litmatch, len) == 0) @@ -64,7 +61,7 @@ static int dowild(const uchar *p, const uchar *text, int force_lower_case) int matched, match_slash, negated; uchar t_ch, prev_ch; if ((t_ch = *text) == '\0' && p_ch != '*') - return ABORT_ALL; + return WM_ABORT_ALL; if (force_lower_case && ISUPPER(t_ch)) t_ch = tolower(t_ch); if (force_lower_case && ISUPPER(p_ch)) @@ -77,12 +74,12 @@ static int dowild(const uchar *p, const uchar *text, int force_lower_case) /* FALLTHROUGH */ default: if (t_ch != p_ch) - return NOMATCH; + return WM_NOMATCH; continue; case '?': /* Match anything but '/'. */ if (t_ch == '/') - return NOMATCH; + return WM_NOMATCH; continue; case '*': if (*++p == '*') { @@ -101,135 +98,136 @@ static int dowild(const uchar *p, const uchar *text, int force_lower_case) * both foo/bar and foo/a/bar. */ if (p[0] == '/' && - dowild(p + 1, text, force_lower_case) == MATCH) - return MATCH; - match_slash = TRUE; + dowild(p + 1, text, force_lower_case) == WM_MATCH) + return WM_MATCH; + match_slash = 1; } else - return ABORT_MALFORMED; + return WM_ABORT_MALFORMED; } else - match_slash = FALSE; + match_slash = 0; if (*p == '\0') { /* Trailing "**" matches everything. Trailing "*" matches * only if there are no more slash characters. */ if (!match_slash) { if (strchr((char*)text, '/') != NULL) - return NOMATCH; + return WM_NOMATCH; } - return MATCH; + return WM_MATCH; } while (1) { if (t_ch == '\0') break; - if ((matched = dowild(p, text, force_lower_case)) != NOMATCH) { - if (!match_slash || matched != ABORT_TO_STARSTAR) + if ((matched = dowild(p, text, force_lower_case)) != WM_NOMATCH) { + if (!match_slash || matched != WM_ABORT_TO_STARSTAR) return matched; } else if (!match_slash && t_ch == '/') - return ABORT_TO_STARSTAR; + return WM_ABORT_TO_STARSTAR; t_ch = *++text; } - return ABORT_ALL; + return WM_ABORT_ALL; case '[': p_ch = *++p; #ifdef NEGATE_CLASS2 if (p_ch == NEGATE_CLASS2) p_ch = NEGATE_CLASS; #endif - /* Assign literal TRUE/FALSE because of "matched" comparison. */ - negated = p_ch == NEGATE_CLASS? TRUE : FALSE; + /* Assign literal 1/0 because of "matched" comparison. */ + negated = p_ch == NEGATE_CLASS ? 1 : 0; if (negated) { /* Inverted character class. */ p_ch = *++p; } prev_ch = 0; - matched = FALSE; + matched = 0; do { if (!p_ch) - return ABORT_ALL; + return WM_ABORT_ALL; if (p_ch == '\\') { p_ch = *++p; if (!p_ch) - return ABORT_ALL; + return WM_ABORT_ALL; if (t_ch == p_ch) - matched = TRUE; + matched = 1; } else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') { p_ch = *++p; if (p_ch == '\\') { p_ch = *++p; if (!p_ch) - return ABORT_ALL; + return WM_ABORT_ALL; } if (t_ch <= p_ch && t_ch >= prev_ch) - matched = TRUE; + matched = 1; p_ch = 0; /* This makes "prev_ch" get set to 0. */ } else if (p_ch == '[' && p[1] == ':') { const uchar *s; int i; for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/ if (!p_ch) - return ABORT_ALL; + return WM_ABORT_ALL; i = p - s - 1; if (i < 0 || p[-1] != ':') { /* Didn't find ":]", so treat like a normal set. */ p = s - 2; p_ch = '['; if (t_ch == p_ch) - matched = TRUE; + matched = 1; continue; } if (CC_EQ(s,i, "alnum")) { if (ISALNUM(t_ch)) - matched = TRUE; + matched = 1; } else if (CC_EQ(s,i, "alpha")) { if (ISALPHA(t_ch)) - matched = TRUE; + matched = 1; } else if (CC_EQ(s,i, "blank")) { if (ISBLANK(t_ch)) - matched = TRUE; + matched = 1; } else if (CC_EQ(s,i, "cntrl")) { if (ISCNTRL(t_ch)) - matched = TRUE; + matched = 1; } else if (CC_EQ(s,i, "digit")) { if (ISDIGIT(t_ch)) - matched = TRUE; + matched = 1; } else if (CC_EQ(s,i, "graph")) { if (ISGRAPH(t_ch)) - matched = TRUE; + matched = 1; } else if (CC_EQ(s,i, "lower")) { if (ISLOWER(t_ch)) - matched = TRUE; + matched = 1; } else if (CC_EQ(s,i, "print")) { if (ISPRINT(t_ch)) - matched = TRUE; + matched = 1; } else if (CC_EQ(s,i, "punct")) { if (ISPUNCT(t_ch)) - matched = TRUE; + matched = 1; } else if (CC_EQ(s,i, "space")) { if (ISSPACE(t_ch)) - matched = TRUE; + matched = 1; } else if (CC_EQ(s,i, "upper")) { if (ISUPPER(t_ch)) - matched = TRUE; + matched = 1; } else if (CC_EQ(s,i, "xdigit")) { if (ISXDIGIT(t_ch)) - matched = TRUE; + matched = 1; } else /* malformed [:class:] string */ - return ABORT_ALL; + return WM_ABORT_ALL; p_ch = 0; /* This makes "prev_ch" get set to 0. */ } else if (t_ch == p_ch) - matched = TRUE; + matched = 1; } while (prev_ch = p_ch, (p_ch = *++p) != ']'); if (matched == negated || t_ch == '/') - return NOMATCH; + return WM_NOMATCH; continue; } } - return *text ? NOMATCH : MATCH; + return *text ? WM_NOMATCH : WM_MATCH; } /* Match the "pattern" against the "text" string. */ -int wildmatch(const char *pattern, const char *text, int flags) +int wildmatch(const char *pattern, const char *text, + unsigned int flags, struct wildopts *wo) { return dowild((const uchar*)pattern, (const uchar*)text, - flags & FNM_CASEFOLD ? 1 :0); + flags & WM_CASEFOLD ? 1 :0); } diff --git a/wildmatch.h b/wildmatch.h index 984a38cdc2..1c814fd5ff 100644 --- a/wildmatch.h +++ b/wildmatch.h @@ -1,9 +1,17 @@ -/* wildmatch.h */ +#ifndef WILDMATCH_H +#define WILDMATCH_H -#define ABORT_MALFORMED 2 -#define NOMATCH 1 -#define MATCH 0 -#define ABORT_ALL -1 -#define ABORT_TO_STARSTAR -2 +#define WM_CASEFOLD 1 -int wildmatch(const char *pattern, const char *text, int flags); +#define WM_ABORT_MALFORMED 2 +#define WM_NOMATCH 1 +#define WM_MATCH 0 +#define WM_ABORT_ALL -1 +#define WM_ABORT_TO_STARSTAR -2 + +struct wildopts; + +int wildmatch(const char *pattern, const char *text, + unsigned int flags, + struct wildopts *wo); +#endif From 0c528168dadd4209de0213a16ff811a89ee3f206 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Tue, 1 Jan 2013 09:44:06 +0700 Subject: [PATCH 2/7] wildmatch: make dowild() take arbitrary flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- wildmatch.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/wildmatch.c b/wildmatch.c index 2a655fa7d4..1b5bbacf1a 100644 --- a/wildmatch.c +++ b/wildmatch.c @@ -52,7 +52,7 @@ typedef unsigned char uchar; #define ISXDIGIT(c) (ISASCII(c) && isxdigit(c)) /* Match pattern "p" against "text" */ -static int dowild(const uchar *p, const uchar *text, int force_lower_case) +static int dowild(const uchar *p, const uchar *text, unsigned int flags) { uchar p_ch; const uchar *pattern = p; @@ -62,9 +62,9 @@ static int dowild(const uchar *p, const uchar *text, int force_lower_case) uchar t_ch, prev_ch; if ((t_ch = *text) == '\0' && p_ch != '*') return WM_ABORT_ALL; - if (force_lower_case && ISUPPER(t_ch)) + if ((flags & WM_CASEFOLD) && ISUPPER(t_ch)) t_ch = tolower(t_ch); - if (force_lower_case && ISUPPER(p_ch)) + if ((flags & WM_CASEFOLD) && ISUPPER(p_ch)) p_ch = tolower(p_ch); switch (p_ch) { case '\\': @@ -98,7 +98,7 @@ static int dowild(const uchar *p, const uchar *text, int force_lower_case) * both foo/bar and foo/a/bar. */ if (p[0] == '/' && - dowild(p + 1, text, force_lower_case) == WM_MATCH) + dowild(p + 1, text, flags) == WM_MATCH) return WM_MATCH; match_slash = 1; } else @@ -117,7 +117,7 @@ static int dowild(const uchar *p, const uchar *text, int force_lower_case) while (1) { if (t_ch == '\0') break; - if ((matched = dowild(p, text, force_lower_case)) != WM_NOMATCH) { + if ((matched = dowild(p, text, flags)) != WM_NOMATCH) { if (!match_slash || matched != WM_ABORT_TO_STARSTAR) return matched; } else if (!match_slash && t_ch == '/') @@ -228,6 +228,5 @@ static int dowild(const uchar *p, const uchar *text, int force_lower_case) int wildmatch(const char *pattern, const char *text, unsigned int flags, struct wildopts *wo) { - return dowild((const uchar*)pattern, (const uchar*)text, - flags & WM_CASEFOLD ? 1 :0); + return dowild((const uchar*)pattern, (const uchar*)text, flags); } From c41244e702fd4fc1039f39a3915ae1e5f165bbf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Tue, 1 Jan 2013 09:44:07 +0700 Subject: [PATCH 3/7] wildmatch: support "no FNM_PATHNAME" mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So far, wildmatch() has always honoured directory boundary and there was no way to turn it off. Make it behave more like fnmatch() by requiring all callers that want the FNM_PATHNAME behaviour to pass that in the equivalent flag WM_PATHNAME. Callers that do not specify WM_PATHNAME will get wildcards like ? and * in their patterns matched against '/', just like not passing FNM_PATHNAME to fnmatch(). Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 2 +- t/t3070-wildmatch.sh | 27 +++++++++++++++++++++++++++ test-wildmatch.c | 6 ++++-- wildmatch.c | 13 +++++++++---- wildmatch.h | 1 + 5 files changed, 42 insertions(+), 7 deletions(-) diff --git a/dir.c b/dir.c index 175a1827ba..6ef03961f3 100644 --- a/dir.c +++ b/dir.c @@ -595,7 +595,7 @@ int match_pathname(const char *pathname, int pathlen, } return wildmatch(pattern, name, - ignore_case ? WM_CASEFOLD : 0, + WM_PATHNAME | (ignore_case ? WM_CASEFOLD : 0), NULL) == 0; } diff --git a/t/t3070-wildmatch.sh b/t/t3070-wildmatch.sh index af54c83111..5c9601a0c8 100755 --- a/t/t3070-wildmatch.sh +++ b/t/t3070-wildmatch.sh @@ -29,6 +29,18 @@ match() { fi } +pathmatch() { + if [ $1 = 1 ]; then + test_expect_success "pathmatch: match '$2' '$3'" " + test-wildmatch pathmatch '$2' '$3' + " + else + test_expect_success "pathmatch: no match '$2' '$3'" " + ! test-wildmatch pathmatch '$2' '$3' + " + fi +} + # Basic wildmat features match 1 1 foo foo match 0 0 foo bar @@ -192,4 +204,19 @@ match 0 0 'XXX/adobe/courier/bold/o/normal//12/120/75/75/X/70/iso8859/1' 'XXX/*/ match 1 0 'abcd/abcdefg/abcdefghijk/abcdefghijklmnop.txt' '**/*a*b*g*n*t' match 0 0 'abcd/abcdefg/abcdefghijk/abcdefghijklmnop.txtz' '**/*a*b*g*n*t' +pathmatch 1 foo foo +pathmatch 0 foo fo +pathmatch 1 foo/bar foo/bar +pathmatch 1 foo/bar 'foo/*' +pathmatch 1 foo/bba/arr 'foo/*' +pathmatch 1 foo/bba/arr 'foo/**' +pathmatch 1 foo/bba/arr 'foo*' +pathmatch 1 foo/bba/arr 'foo**' +pathmatch 1 foo/bba/arr 'foo/*arr' +pathmatch 1 foo/bba/arr 'foo/**arr' +pathmatch 0 foo/bba/arr 'foo/*z' +pathmatch 0 foo/bba/arr 'foo/**z' +pathmatch 1 foo/bar 'foo?bar' +pathmatch 1 foo/bar 'foo[/]bar' + test_done diff --git a/test-wildmatch.c b/test-wildmatch.c index 4bb23b4c92..a5f4833770 100644 --- a/test-wildmatch.c +++ b/test-wildmatch.c @@ -12,9 +12,11 @@ int main(int argc, char **argv) argv[i] += 3; } if (!strcmp(argv[1], "wildmatch")) - return !!wildmatch(argv[3], argv[2], 0, NULL); + return !!wildmatch(argv[3], argv[2], WM_PATHNAME, NULL); else if (!strcmp(argv[1], "iwildmatch")) - return !!wildmatch(argv[3], argv[2], WM_CASEFOLD, NULL); + return !!wildmatch(argv[3], argv[2], WM_PATHNAME | WM_CASEFOLD, NULL); + else if (!strcmp(argv[1], "pathmatch")) + return !!wildmatch(argv[3], argv[2], 0, NULL); else if (!strcmp(argv[1], "fnmatch")) return !!fnmatch(argv[3], argv[2], FNM_PATHNAME); else diff --git a/wildmatch.c b/wildmatch.c index 1b5bbacf1a..536470b794 100644 --- a/wildmatch.c +++ b/wildmatch.c @@ -78,14 +78,17 @@ static int dowild(const uchar *p, const uchar *text, unsigned int flags) continue; case '?': /* Match anything but '/'. */ - if (t_ch == '/') + if ((flags & WM_PATHNAME) && t_ch == '/') return WM_NOMATCH; continue; case '*': if (*++p == '*') { const uchar *prev_p = p - 2; while (*++p == '*') {} - if ((prev_p < pattern || *prev_p == '/') && + if (!(flags & WM_PATHNAME)) + /* without WM_PATHNAME, '*' == '**' */ + match_slash = 1; + else if ((prev_p < pattern || *prev_p == '/') && (*p == '\0' || *p == '/' || (p[0] == '\\' && p[1] == '/'))) { /* @@ -104,7 +107,8 @@ static int dowild(const uchar *p, const uchar *text, unsigned int flags) } else return WM_ABORT_MALFORMED; } else - match_slash = 0; + /* without WM_PATHNAME, '*' == '**' */ + match_slash = flags & WM_PATHNAME ? 0 : 1; if (*p == '\0') { /* Trailing "**" matches everything. Trailing "*" matches * only if there are no more slash characters. */ @@ -215,7 +219,8 @@ static int dowild(const uchar *p, const uchar *text, unsigned int flags) } else if (t_ch == p_ch) matched = 1; } while (prev_ch = p_ch, (p_ch = *++p) != ']'); - if (matched == negated || t_ch == '/') + if (matched == negated || + ((flags & WM_PATHNAME) && t_ch == '/')) return WM_NOMATCH; continue; } diff --git a/wildmatch.h b/wildmatch.h index 1c814fd5ff..4090c8f4bb 100644 --- a/wildmatch.h +++ b/wildmatch.h @@ -2,6 +2,7 @@ #define WILDMATCH_H #define WM_CASEFOLD 1 +#define WM_PATHNAME 2 #define WM_ABORT_MALFORMED 2 #define WM_NOMATCH 1 From 1b25892636d2f250eb2163301440ee8c8c1ac944 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Tue, 1 Jan 2013 09:44:08 +0700 Subject: [PATCH 4/7] test-wildmatch: add "perf" command to compare wildmatch and fnmatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It takes a text file, a pattern, a number and pathname flag. Each line in the text file is matched against the pattern times. If "pathname" is given, FNM_PATHNAME is used. test-wildmatch is built with -O2 and tested against glibc 2.14.1 (also -O2) and compat/fnmatch. The input file is linux-2.6.git file list. is 2000. The complete command list is at the end. wildmatch is beaten in the following cases. Apparently it needs some improvement in FNM_PATHNAME case: glibc, '*/*/*' with FNM_PATHNAME: wildmatch 8s 1559us fnmatch 1s 11877us or 12.65% faster compat, '*/*/*' with FNM_PATHNAME: wildmatch 7s 922458us fnmatch 2s 905111us or 36.67% faster compat, '*/*/*' without FNM_PATHNAME: wildmatch 7s 264201us fnmatch 2s 1897us or 27.56% faster compat, '[a-z]*/[a-z]*/[a-z]*' with FNM_PATHNAME: wildmatch 8s 742827us fnmatch 0s 922943us or 10.56% faster compat, '[a-z]*/[a-z]*/[a-z]*' without FNM_PATHNAME: wildmatch 8s 284520us fnmatch 0s 6936us or 0.08% faster The rest of glibc numbers ------------------------- 'Documentation/*' wildmatch 1s 529479us fnmatch 1s 98263us or 71.81% slower 'drivers/*' wildmatch 1s 988288us fnmatch 1s 192049us or 59.95% slower 'Documentation/*' pathname wildmatch 1s 557507us fnmatch 1s 93696us or 70.22% slower 'drivers/*' pathname wildmatch 2s 161626us fnmatch 1s 230372us or 56.92% slower '[Dd]ocu[Mn]entation/*' wildmatch 1s 776581us fnmatch 1s 471693us or 82.84% slower '[Dd]o?u[Mn]en?ati?n/*' wildmatch 1s 770770us fnmatch 1s 555727us or 87.86% slower '[Dd]o?u[Mn]en?ati?n/*' pathname wildmatch 1s 783507us fnmatch 1s 537029us or 86.18% slower '[A-Za-z][A-Za-z]??*' wildmatch 4s 110386us fnmatch 4s 926306us or 119.85% slower '[A-Za-z][A-Za-z]??' wildmatch 3s 918114us fnmatch 3s 686175us or 94.08% slower '[A-Za-z][A-Za-z]??*' pathname wildmatch 4s 453746us fnmatch 4s 955856us or 111.27% slower '[A-Za-z][A-Za-z]??' pathname wildmatch 3s 896646us fnmatch 3s 733828us or 95.82% slower '*/*/*' wildmatch 7s 287985us fnmatch 1s 74083us or 14.74% slower '[a-z]*/[a-z]*/[a-z]*' pathname wildmatch 8s 796659us fnmatch 1s 568409us or 17.83% slower '[a-z]*/[a-z]*/[a-z]*' wildmatch 8s 316559us fnmatch 3s 430652us or 41.25% slower The rest of compat numbers -------------------------- 'Documentation/*' wildmatch 1s 520389us fnmatch 0s 62579us or 4.12% slower 'drivers/*' wildmatch 1s 955354us fnmatch 0s 190109us or 9.72% slower 'Documentation/*' pathname wildmatch 1s 561675us fnmatch 0s 55336us or 3.54% slower 'drivers/*' pathname wildmatch 2s 106100us fnmatch 0s 219680us or 10.43% slower '[Dd]ocu[Mn]entation/*' wildmatch 1s 750810us fnmatch 0s 542721us or 31.00% slower '[Dd]o?u[Mn]en?ati?n/*' wildmatch 1s 724791us fnmatch 0s 538948us or 31.25% slower '[Dd]o?u[Mn]en?ati?n/*' pathname wildmatch 1s 731403us fnmatch 0s 537474us or 31.04% slower '[A-Za-z][A-Za-z]??*' wildmatch 4s 28555us fnmatch 1s 67297us or 26.49% slower '[A-Za-z][A-Za-z]??' wildmatch 3s 838279us fnmatch 0s 880005us or 22.93% slower '[A-Za-z][A-Za-z]??*' pathname wildmatch 4s 379476us fnmatch 1s 55643us or 24.10% slower '[A-Za-z][A-Za-z]??' pathname wildmatch 3s 830910us fnmatch 0s 849699us or 22.18% slower The following commands are used: LANG=C ./test-wildmatch perf /tmp/filelist.txt 'Documentation/*' 2000 LANG=C ./test-wildmatch perf /tmp/filelist.txt 'drivers/*' 2000 LANG=C ./test-wildmatch perf /tmp/filelist.txt 'Documentation/*' 2000 pathname LANG=C ./test-wildmatch perf /tmp/filelist.txt 'drivers/*' 2000 pathname LANG=C ./test-wildmatch perf /tmp/filelist.txt '[Dd]ocu[Mn]entation/*' 2000 LANG=C ./test-wildmatch perf /tmp/filelist.txt '[Dd]o?u[Mn]en?ati?n/*' 2000 LANG=C ./test-wildmatch perf /tmp/filelist.txt '[Dd]o?u[Mn]en?ati?n/*' 2000 pathname LANG=C ./test-wildmatch perf /tmp/filelist.txt '[A-Za-z][A-Za-z]??*' 2000 LANG=C ./test-wildmatch perf /tmp/filelist.txt '[A-Za-z][A-Za-z]??' 2000 LANG=C ./test-wildmatch perf /tmp/filelist.txt '[A-Za-z][A-Za-z]??*' 2000 pathname LANG=C ./test-wildmatch perf /tmp/filelist.txt '[A-Za-z][A-Za-z]??' 2000 pathname LANG=C ./test-wildmatch perf /tmp/filelist.txt '*/*/*' 2000 LANG=C ./test-wildmatch perf /tmp/filelist.txt '*/*/*' 2000 pathname LANG=C ./test-wildmatch perf /tmp/filelist.txt '[a-z]*/[a-z]*/[a-z]*' 2000 pathname LANG=C ./test-wildmatch perf /tmp/filelist.txt '[a-z]*/[a-z]*/[a-z]*' 2000 Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- test-wildmatch.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/test-wildmatch.c b/test-wildmatch.c index a5f4833770..ac86800d50 100644 --- a/test-wildmatch.c +++ b/test-wildmatch.c @@ -1,9 +1,82 @@ #include "cache.h" #include "wildmatch.h" +static int perf(int ac, char **av) +{ + struct timeval tv1, tv2; + struct stat st; + int fd, i, n, flags1 = 0, flags2 = 0; + char *buffer, *p; + uint32_t usec1, usec2; + const char *lang; + const char *file = av[0]; + const char *pattern = av[1]; + + lang = getenv("LANG"); + if (lang && strcmp(lang, "C")) + die("Please test it on C locale."); + + if ((fd = open(file, O_RDONLY)) == -1 || fstat(fd, &st)) + die_errno("file open"); + + buffer = xmalloc(st.st_size + 2); + if (read(fd, buffer, st.st_size) != st.st_size) + die_errno("read"); + + buffer[st.st_size] = '\0'; + buffer[st.st_size + 1] = '\0'; + for (i = 0; i < st.st_size; i++) + if (buffer[i] == '\n') + buffer[i] = '\0'; + + n = atoi(av[2]); + if (av[3] && !strcmp(av[3], "pathname")) { + flags1 = WM_PATHNAME; + flags2 = FNM_PATHNAME; + } + + gettimeofday(&tv1, NULL); + for (i = 0; i < n; i++) { + for (p = buffer; *p; p += strlen(p) + 1) + wildmatch(pattern, p, flags1, NULL); + } + gettimeofday(&tv2, NULL); + + usec1 = (uint32_t)tv2.tv_sec * 1000000 + tv2.tv_usec; + usec1 -= (uint32_t)tv1.tv_sec * 1000000 + tv1.tv_usec; + printf("wildmatch %ds %dus\n", + (int)(usec1 / 1000000), + (int)(usec1 % 1000000)); + + gettimeofday(&tv1, NULL); + for (i = 0; i < n; i++) { + for (p = buffer; *p; p += strlen(p) + 1) + fnmatch(pattern, p, flags2); + } + gettimeofday(&tv2, NULL); + + usec2 = (uint32_t)tv2.tv_sec * 1000000 + tv2.tv_usec; + usec2 -= (uint32_t)tv1.tv_sec * 1000000 + tv1.tv_usec; + if (usec2 > usec1) + printf("fnmatch %ds %dus or %.2f%% slower\n", + (int)((usec2 - usec1) / 1000000), + (int)((usec2 - usec1) % 1000000), + (float)(usec2 - usec1) / usec1 * 100); + else + printf("fnmatch %ds %dus or %.2f%% faster\n", + (int)((usec1 - usec2) / 1000000), + (int)((usec1 - usec2) % 1000000), + (float)(usec1 - usec2) / usec1 * 100); + return 0; +} + int main(int argc, char **argv) { int i; + + if (!strcmp(argv[1], "perf")) + return perf(argc - 2, argv + 2); + for (i = 2; i < argc; i++) { if (argv[i][0] == '/') die("Forward slash is not allowed at the beginning of the\n" From 46983441ae17b34abee2954b87efeeefbe0768b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Tue, 1 Jan 2013 09:44:09 +0700 Subject: [PATCH 5/7] wildmatch: make a special case for "*/" with FNM_PATHNAME MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Normally we need recursion for "*". In this case we know that it matches everything until "/" so we can skip the recursion. glibc, '*/*/*' on linux-2.6.git file list 2000 times before: wildmatch 8s 74513us fnmatch 1s 97042us or 13.59% faster after: wildmatch 3s 521862us fnmatch 3s 488616us or 99.06% slower Same test with compat/fnmatch: wildmatch 8s 110763us fnmatch 2s 980845us or 36.75% faster wildmatch 3s 522156us fnmatch 1s 544487us or 43.85% slower Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- t/t3070-wildmatch.sh | 8 ++++++++ wildmatch.c | 12 ++++++++++++ 2 files changed, 20 insertions(+) diff --git a/t/t3070-wildmatch.sh b/t/t3070-wildmatch.sh index 5c9601a0c8..97f1daff8e 100755 --- a/t/t3070-wildmatch.sh +++ b/t/t3070-wildmatch.sh @@ -203,6 +203,10 @@ match 1 1 'XXX/adobe/courier/bold/o/normal//12/120/75/75/m/70/iso8859/1' 'XXX/*/ match 0 0 'XXX/adobe/courier/bold/o/normal//12/120/75/75/X/70/iso8859/1' 'XXX/*/*/*/*/*/*/12/*/*/*/m/*/*/*' match 1 0 'abcd/abcdefg/abcdefghijk/abcdefghijklmnop.txt' '**/*a*b*g*n*t' match 0 0 'abcd/abcdefg/abcdefghijk/abcdefghijklmnop.txtz' '**/*a*b*g*n*t' +match 0 x foo '*/*/*' +match 0 x foo/bar '*/*/*' +match 1 x foo/bba/arr '*/*/*' +match 0 x foo/bb/aa/rr '*/*/*' pathmatch 1 foo foo pathmatch 0 foo fo @@ -218,5 +222,9 @@ pathmatch 0 foo/bba/arr 'foo/*z' pathmatch 0 foo/bba/arr 'foo/**z' pathmatch 1 foo/bar 'foo?bar' pathmatch 1 foo/bar 'foo[/]bar' +pathmatch 0 foo '*/*/*' +pathmatch 0 foo/bar '*/*/*' +pathmatch 1 foo/bba/arr '*/*/*' +pathmatch 1 foo/bb/aa/rr '*/*/*' test_done diff --git a/wildmatch.c b/wildmatch.c index 536470b794..bb425220b0 100644 --- a/wildmatch.c +++ b/wildmatch.c @@ -117,6 +117,18 @@ static int dowild(const uchar *p, const uchar *text, unsigned int flags) return WM_NOMATCH; } return WM_MATCH; + } else if (!match_slash && *p == '/') { + /* + * _one_ asterisk followed by a slash + * with WM_PATHNAME matches the next + * directory + */ + const char *slash = strchr((char*)text, '/'); + if (!slash) + return WM_NOMATCH; + text = (const uchar*)slash; + /* the slash is consumed by the top-level for loop */ + break; } while (1) { if (t_ch == '\0') From 6f1a31f0aae6dbd5414b81fb6c0f4ff087146fc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Tue, 1 Jan 2013 09:44:10 +0700 Subject: [PATCH 6/7] wildmatch: advance faster in + patterns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Normally when we match "*X" on "abcX", we call dowild("X", "abcX"), dowild("X", "bcX"), dowild("X", "cX") and dowild("X", "X"). Only the last call may have a chance of matching. By skipping the text before "X", we can eliminate the first three useless calls. compat, '*/*/*' on linux-2.6.git file list 2000 times, before: wildmatch 7s 985049us fnmatch 2s 735541us or 34.26% faster and after: wildmatch 4s 492549us fnmatch 0s 888263us or 19.77% slower Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- t/t3070-wildmatch.sh | 8 ++++++++ wildmatch.c | 23 +++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/t/t3070-wildmatch.sh b/t/t3070-wildmatch.sh index 97f1daff8e..4c37057ddf 100755 --- a/t/t3070-wildmatch.sh +++ b/t/t3070-wildmatch.sh @@ -207,6 +207,11 @@ match 0 x foo '*/*/*' match 0 x foo/bar '*/*/*' match 1 x foo/bba/arr '*/*/*' match 0 x foo/bb/aa/rr '*/*/*' +match 1 x foo/bb/aa/rr '**/**/**' +match 1 x abcXdefXghi '*X*i' +match 0 x ab/cXd/efXg/hi '*X*i' +match 1 x ab/cXd/efXg/hi '*/*X*/*/*i' +match 1 x ab/cXd/efXg/hi '**/*X*/**/*i' pathmatch 1 foo foo pathmatch 0 foo fo @@ -226,5 +231,8 @@ pathmatch 0 foo '*/*/*' pathmatch 0 foo/bar '*/*/*' pathmatch 1 foo/bba/arr '*/*/*' pathmatch 1 foo/bb/aa/rr '*/*/*' +pathmatch 1 abcXdefXghi '*X*i' +pathmatch 1 ab/cXd/efXg/hi '*/*X*/*/*i' +pathmatch 1 ab/cXd/efXg/hi '*Xg*i' test_done diff --git a/wildmatch.c b/wildmatch.c index bb425220b0..7192bdc1b8 100644 --- a/wildmatch.c +++ b/wildmatch.c @@ -133,6 +133,29 @@ static int dowild(const uchar *p, const uchar *text, unsigned int flags) while (1) { if (t_ch == '\0') break; + /* + * Try to advance faster when an asterisk is + * followed by a literal. We know in this case + * that the the string before the literal + * must belong to "*". + * If match_slash is false, do not look past + * the first slash as it cannot belong to '*'. + */ + if (!is_glob_special(*p)) { + p_ch = *p; + if ((flags & WM_CASEFOLD) && ISUPPER(p_ch)) + p_ch = tolower(p_ch); + while ((t_ch = *text) != '\0' && + (match_slash || t_ch != '/')) { + if ((flags & WM_CASEFOLD) && ISUPPER(t_ch)) + t_ch = tolower(t_ch); + if (t_ch == p_ch) + break; + text++; + } + if (t_ch != p_ch) + return WM_NOMATCH; + } if ((matched = dowild(p, text, flags)) != WM_NOMATCH) { if (!match_slash || matched != WM_ABORT_TO_STARSTAR) return matched; From cebcab189aa7727af5027f0215e798bbe66f2815 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Tue, 1 Jan 2013 09:44:11 +0700 Subject: [PATCH 7/7] Makefile: add USE_WILDMATCH to use wildmatch as fnmatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is similar to NO_FNMATCH but it uses wildmatch instead of compat/fnmatch. This is an intermediate step to let wildmatch be used as fnmatch replacement for wider audience before it replaces fnmatch completely and compat/fnmatch is removed. fnmatch in test-wildmatch is not impacted by this and is the only place that NO_FNMATCH or NO_FNMATCH_CASEFOLD remain active when USE_WILDMATCH is set. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- Makefile | 6 ++++++ git-compat-util.h | 13 +++++++++++++ test-wildmatch.c | 3 +++ 3 files changed, 22 insertions(+) diff --git a/Makefile b/Makefile index bc868d1548..24e277481f 100644 --- a/Makefile +++ b/Makefile @@ -99,6 +99,9 @@ all:: # Define NO_FNMATCH_CASEFOLD if your fnmatch function doesn't have the # FNM_CASEFOLD GNU extension. # +# Define USE_WILDMATCH if you want to use Git's wildmatch +# implementation as fnmatch +# # Define NO_GECOS_IN_PWENT if you don't have pw_gecos in struct passwd # in the C library. # @@ -1625,6 +1628,9 @@ ifdef NO_FNMATCH_CASEFOLD COMPAT_OBJS += compat/fnmatch/fnmatch.o endif endif +ifdef USE_WILDMATCH + COMPAT_CFLAGS += -DUSE_WILDMATCH +endif ifdef NO_SETENV COMPAT_CFLAGS += -DNO_SETENV COMPAT_OBJS += compat/setenv.o diff --git a/git-compat-util.h b/git-compat-util.h index 02f48f653b..b2c7638ae5 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -106,7 +106,9 @@ #include #include #include +#ifndef USE_WILDMATCH #include +#endif #include #include #include @@ -238,6 +240,17 @@ extern char *gitbasename(char *); #include "compat/bswap.h" +#ifdef USE_WILDMATCH +#include "wildmatch.h" +#define FNM_PATHNAME WM_PATHNAME +#define FNM_CASEFOLD WM_CASEFOLD +#define FNM_NOMATCH WM_NOMATCH +static inline int fnmatch(const char *pattern, const char *string, int flags) +{ + return wildmatch(pattern, string, flags, NULL); +} +#endif + /* General helper functions */ extern void vreportf(const char *prefix, const char *err, va_list params); extern void vwritef(int fd, const char *prefix, const char *err, va_list params); diff --git a/test-wildmatch.c b/test-wildmatch.c index ac86800d50..a3e2643fbc 100644 --- a/test-wildmatch.c +++ b/test-wildmatch.c @@ -1,3 +1,6 @@ +#ifdef USE_WILDMATCH +#undef USE_WILDMATCH /* We need real fnmatch implementation here */ +#endif #include "cache.h" #include "wildmatch.h"