git-grep: Learn PCRE

This patch teaches git-grep the --perl-regexp/-P options (naming
borrowed from GNU grep) in order to allow specifying PCRE regexes on the
command line.

PCRE has a number of features which make them more handy to use than
POSIX regexes, like consistent escaping rules, extended character
classes, ungreedy matching etc.

git isn't build with PCRE support automatically. USE_LIBPCRE environment
variable must be enabled (like `make USE_LIBPCRE=YesPlease`).

Signed-off-by: Michał Kiedrowicz <michal.kiedrowicz@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Michał Kiedrowicz 2011-05-09 23:52:05 +02:00 committed by Junio C Hamano
parent a30c148aa7
commit 63e7e9d8b6
6 changed files with 107 additions and 1 deletions

View File

@ -12,6 +12,7 @@ SYNOPSIS
'git grep' [-a | --text] [-I] [-i | --ignore-case] [-w | --word-regexp]
[-v | --invert-match] [-h|-H] [--full-name]
[-E | --extended-regexp] [-G | --basic-regexp]
[-P | --perl-regexp]
[-F | --fixed-strings] [-n | --line-number]
[-l | --files-with-matches] [-L | --files-without-match]
[(-O | --open-files-in-pager) [<pager>]]
@ -97,6 +98,11 @@ OPTIONS
Use POSIX extended/basic regexp for patterns. Default
is to use basic regexp.
-P::
--perl-regexp::
Use Perl-compatible regexp for patterns. Requires libpcre to be
compiled in.
-F::
--fixed-strings::
Use fixed strings for patterns (don't interpret pattern

View File

@ -24,6 +24,12 @@ all::
# Define NO_OPENSSL environment variable if you do not have OpenSSL.
# This also implies BLK_SHA1.
#
# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be
# able to use Perl-compatible regular expressions.
#
# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in
# /foo/bar/include and /foo/bar/lib directories.
#
# Define NO_CURL if you do not have libcurl installed. git-http-pull and
# git-http-push are not built, and you cannot use http:// and https://
# transports.
@ -1248,6 +1254,15 @@ ifdef NO_LIBGEN_H
COMPAT_OBJS += compat/basename.o
endif
ifdef USE_LIBPCRE
BASIC_CFLAGS += -DUSE_LIBPCRE
ifdef LIBPCREDIR
BASIC_CFLAGS += -I$(LIBPCREDIR)/include
EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib)
endif
EXTLIBS += -lpcre
endif
ifdef NO_CURL
BASIC_CFLAGS += -DNO_CURL
REMOTE_CURL_PRIMARY =

View File

@ -781,6 +781,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
REG_EXTENDED),
OPT_BOOLEAN('F', "fixed-strings", &opt.fixed,
"interpret patterns as fixed strings"),
OPT_BOOLEAN('P', "perl-regexp", &opt.pcre,
"use Perl-compatible regular expressions"),
OPT_GROUP(""),
OPT_BOOLEAN('n', "line-number", &opt.linenum, "show line numbers"),
OPT_NEGBIT('h', NULL, &opt.pathname, "don't show filenames", 1),

View File

@ -1487,6 +1487,7 @@ _git_grep ()
--text --ignore-case --word-regexp --invert-match
--full-name --line-number
--extended-regexp --basic-regexp --fixed-strings
--perl-regexp
--files-with-matches --name-only
--files-without-match
--max-depth

73
grep.c
View File

@ -74,6 +74,69 @@ static NORETURN void compile_regexp_failed(const struct grep_pat *p,
die("%s'%s': %s", where, p->pattern, error);
}
#ifdef USE_LIBPCRE
static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
{
const char *error;
int erroffset;
int options = 0;
if (opt->ignore_case)
options |= PCRE_CASELESS;
p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
NULL);
if (!p->pcre_regexp)
compile_regexp_failed(p, error);
p->pcre_extra_info = pcre_study(p->pcre_regexp, 0, &error);
if (!p->pcre_extra_info && error)
die("%s", error);
}
static int pcrematch(struct grep_pat *p, const char *line, const char *eol,
regmatch_t *match, int eflags)
{
int ovector[30], ret, flags = 0;
if (eflags & REG_NOTBOL)
flags |= PCRE_NOTBOL;
ret = pcre_exec(p->pcre_regexp, p->pcre_extra_info, line, eol - line,
0, flags, ovector, ARRAY_SIZE(ovector));
if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
die("pcre_exec failed with error code %d", ret);
if (ret > 0) {
ret = 0;
match->rm_so = ovector[0];
match->rm_eo = ovector[1];
}
return ret;
}
static void free_pcre_regexp(struct grep_pat *p)
{
pcre_free(p->pcre_regexp);
pcre_free(p->pcre_extra_info);
}
#else /* !USE_LIBPCRE */
static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
{
die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
}
static int pcrematch(struct grep_pat *p, const char *line, const char *eol,
regmatch_t *match, int eflags)
{
return 1;
}
static void free_pcre_regexp(struct grep_pat *p)
{
}
#endif /* !USE_LIBPCRE */
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
{
int err;
@ -85,6 +148,11 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
if (p->fixed)
return;
if (opt->pcre) {
compile_pcre_regexp(p, opt);
return;
}
err = regcomp(&p->regexp, p->pattern, opt->regflags);
if (err) {
char errbuf[1024];
@ -327,6 +395,9 @@ void free_grep_patterns(struct grep_opt *opt)
case GREP_PATTERN: /* atom */
case GREP_PATTERN_HEAD:
case GREP_PATTERN_BODY:
if (p->pcre_regexp)
free_pcre_regexp(p);
else
regfree(&p->regexp);
break;
default:
@ -426,6 +497,8 @@ static int patmatch(struct grep_pat *p, char *line, char *eol,
if (p->fixed)
hit = !fixmatch(p, line, eol, match);
else if (p->pcre_regexp)
hit = !pcrematch(p, line, eol, match, eflags);
else
hit = !regmatch(&p->regexp, line, eol, match, eflags);

9
grep.h
View File

@ -1,6 +1,12 @@
#ifndef GREP_H
#define GREP_H
#include "color.h"
#ifdef USE_LIBPCRE
#include <pcre.h>
#else
typedef int pcre;
typedef int pcre_extra;
#endif
enum grep_pat_token {
GREP_PATTERN,
@ -33,6 +39,8 @@ struct grep_pat {
size_t patternlen;
enum grep_header_field field;
regex_t regexp;
pcre *pcre_regexp;
pcre_extra *pcre_extra_info;
unsigned fixed:1;
unsigned ignore_case:1;
unsigned word_regexp:1;
@ -83,6 +91,7 @@ struct grep_opt {
#define GREP_BINARY_TEXT 2
int binary;
int extended;
int pcre;
int relative;
int pathname;
int null_following_name;