git-commit-vandalism/grep.c
Junio C Hamano c91b0b7c72 Merge branch 'rs/pcre2-utf'
"git grep --perl-regexp" failed to match UTF-8 characters with
wildcard when the pattern consists only of ASCII letters, which has
been corrected.

* rs/pcre2-utf:
  grep/pcre2: factor out literal variable
  grep/pcre2: use PCRE2_UTF even with ASCII patterns
2022-01-05 14:01:31 -08:00

2024 lines
50 KiB
C

#include "cache.h"
#include "config.h"
#include "grep.h"
#include "object-store.h"
#include "userdiff.h"
#include "xdiff-interface.h"
#include "diff.h"
#include "diffcore.h"
#include "commit.h"
#include "quote.h"
#include "help.h"
static int grep_source_load(struct grep_source *gs);
static int grep_source_is_binary(struct grep_source *gs,
struct index_state *istate);
static void std_output(struct grep_opt *opt, const void *buf, size_t size)
{
fwrite(buf, size, 1, stdout);
}
static struct grep_opt grep_defaults = {
.relative = 1,
.pathname = 1,
.max_depth = -1,
.pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED,
.colors = {
[GREP_COLOR_CONTEXT] = "",
[GREP_COLOR_FILENAME] = "",
[GREP_COLOR_FUNCTION] = "",
[GREP_COLOR_LINENO] = "",
[GREP_COLOR_COLUMNNO] = "",
[GREP_COLOR_MATCH_CONTEXT] = GIT_COLOR_BOLD_RED,
[GREP_COLOR_MATCH_SELECTED] = GIT_COLOR_BOLD_RED,
[GREP_COLOR_SELECTED] = "",
[GREP_COLOR_SEP] = GIT_COLOR_CYAN,
},
.only_matching = 0,
.color = -1,
.output = std_output,
};
static const char *color_grep_slots[] = {
[GREP_COLOR_CONTEXT] = "context",
[GREP_COLOR_FILENAME] = "filename",
[GREP_COLOR_FUNCTION] = "function",
[GREP_COLOR_LINENO] = "lineNumber",
[GREP_COLOR_COLUMNNO] = "column",
[GREP_COLOR_MATCH_CONTEXT] = "matchContext",
[GREP_COLOR_MATCH_SELECTED] = "matchSelected",
[GREP_COLOR_SELECTED] = "selected",
[GREP_COLOR_SEP] = "separator",
};
static int parse_pattern_type_arg(const char *opt, const char *arg)
{
if (!strcmp(arg, "default"))
return GREP_PATTERN_TYPE_UNSPECIFIED;
else if (!strcmp(arg, "basic"))
return GREP_PATTERN_TYPE_BRE;
else if (!strcmp(arg, "extended"))
return GREP_PATTERN_TYPE_ERE;
else if (!strcmp(arg, "fixed"))
return GREP_PATTERN_TYPE_FIXED;
else if (!strcmp(arg, "perl"))
return GREP_PATTERN_TYPE_PCRE;
die("bad %s argument: %s", opt, arg);
}
define_list_config_array_extra(color_grep_slots, {"match"});
/*
* Read the configuration file once and store it in
* the grep_defaults template.
*/
int grep_config(const char *var, const char *value, void *cb)
{
struct grep_opt *opt = &grep_defaults;
const char *slot;
if (userdiff_config(var, value) < 0)
return -1;
/*
* The instance of grep_opt that we set up here is copied by
* grep_init() to be used by each individual invocation.
* When populating a new field of this structure here, be
* sure to think about ownership -- e.g., you might need to
* override the shallow copy in grep_init() with a deep copy.
*/
if (!strcmp(var, "grep.extendedregexp")) {
opt->extended_regexp_option = git_config_bool(var, value);
return 0;
}
if (!strcmp(var, "grep.patterntype")) {
opt->pattern_type_option = parse_pattern_type_arg(var, value);
return 0;
}
if (!strcmp(var, "grep.linenumber")) {
opt->linenum = git_config_bool(var, value);
return 0;
}
if (!strcmp(var, "grep.column")) {
opt->columnnum = git_config_bool(var, value);
return 0;
}
if (!strcmp(var, "grep.fullname")) {
opt->relative = !git_config_bool(var, value);
return 0;
}
if (!strcmp(var, "color.grep"))
opt->color = git_config_colorbool(var, value);
if (!strcmp(var, "color.grep.match")) {
if (grep_config("color.grep.matchcontext", value, cb) < 0)
return -1;
if (grep_config("color.grep.matchselected", value, cb) < 0)
return -1;
} else if (skip_prefix(var, "color.grep.", &slot)) {
int i = LOOKUP_CONFIG(color_grep_slots, slot);
char *color;
if (i < 0)
return -1;
color = opt->colors[i];
if (!value)
return config_error_nonbool(var);
return color_parse(value, color);
}
return 0;
}
/*
* Initialize one instance of grep_opt and copy the
* default values from the template we read the configuration
* information in an earlier call to git_config(grep_config).
*/
void grep_init(struct grep_opt *opt, struct repository *repo, const char *prefix)
{
*opt = grep_defaults;
opt->repo = repo;
opt->prefix = prefix;
opt->prefix_length = (prefix && *prefix) ? strlen(prefix) : 0;
opt->pattern_tail = &opt->pattern_list;
opt->header_tail = &opt->header_list;
}
static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt)
{
/*
* When committing to the pattern type by setting the relevant
* fields in grep_opt it's generally not necessary to zero out
* the fields we're not choosing, since they won't have been
* set by anything. The extended_regexp_option field is the
* only exception to this.
*
* This is because in the process of parsing grep.patternType
* & grep.extendedRegexp we set opt->pattern_type_option and
* opt->extended_regexp_option, respectively. We then
* internally use opt->extended_regexp_option to see if we're
* compiling an ERE. It must be unset if that's not actually
* the case.
*/
if (pattern_type != GREP_PATTERN_TYPE_ERE &&
opt->extended_regexp_option)
opt->extended_regexp_option = 0;
switch (pattern_type) {
case GREP_PATTERN_TYPE_UNSPECIFIED:
/* fall through */
case GREP_PATTERN_TYPE_BRE:
break;
case GREP_PATTERN_TYPE_ERE:
opt->extended_regexp_option = 1;
break;
case GREP_PATTERN_TYPE_FIXED:
opt->fixed = 1;
break;
case GREP_PATTERN_TYPE_PCRE:
opt->pcre2 = 1;
break;
}
}
void grep_commit_pattern_type(enum grep_pattern_type pattern_type, struct grep_opt *opt)
{
if (pattern_type != GREP_PATTERN_TYPE_UNSPECIFIED)
grep_set_pattern_type_option(pattern_type, opt);
else if (opt->pattern_type_option != GREP_PATTERN_TYPE_UNSPECIFIED)
grep_set_pattern_type_option(opt->pattern_type_option, opt);
else if (opt->extended_regexp_option)
/*
* This branch *must* happen after setting from the
* opt->pattern_type_option above, we don't want
* grep.extendedRegexp to override grep.patternType!
*/
grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, opt);
}
static struct grep_pat *create_grep_pat(const char *pat, size_t patlen,
const char *origin, int no,
enum grep_pat_token t,
enum grep_header_field field)
{
struct grep_pat *p = xcalloc(1, sizeof(*p));
p->pattern = xmemdupz(pat, patlen);
p->patternlen = patlen;
p->origin = origin;
p->no = no;
p->token = t;
p->field = field;
return p;
}
static void do_append_grep_pat(struct grep_pat ***tail, struct grep_pat *p)
{
**tail = p;
*tail = &p->next;
p->next = NULL;
switch (p->token) {
case GREP_PATTERN: /* atom */
case GREP_PATTERN_HEAD:
case GREP_PATTERN_BODY:
for (;;) {
struct grep_pat *new_pat;
size_t len = 0;
char *cp = p->pattern + p->patternlen, *nl = NULL;
while (++len <= p->patternlen) {
if (*(--cp) == '\n') {
nl = cp;
break;
}
}
if (!nl)
break;
new_pat = create_grep_pat(nl + 1, len - 1, p->origin,
p->no, p->token, p->field);
new_pat->next = p->next;
if (!p->next)
*tail = &new_pat->next;
p->next = new_pat;
*nl = '\0';
p->patternlen -= len;
}
break;
default:
break;
}
}
void append_header_grep_pattern(struct grep_opt *opt,
enum grep_header_field field, const char *pat)
{
struct grep_pat *p = create_grep_pat(pat, strlen(pat), "header", 0,
GREP_PATTERN_HEAD, field);
if (field == GREP_HEADER_REFLOG)
opt->use_reflog_filter = 1;
do_append_grep_pat(&opt->header_tail, p);
}
void append_grep_pattern(struct grep_opt *opt, const char *pat,
const char *origin, int no, enum grep_pat_token t)
{
append_grep_pat(opt, pat, strlen(pat), origin, no, t);
}
void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
const char *origin, int no, enum grep_pat_token t)
{
struct grep_pat *p = create_grep_pat(pat, patlen, origin, no, t, 0);
do_append_grep_pat(&opt->pattern_tail, p);
}
struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
{
struct grep_pat *pat;
struct grep_opt *ret = xmalloc(sizeof(struct grep_opt));
*ret = *opt;
ret->pattern_list = NULL;
ret->pattern_tail = &ret->pattern_list;
for(pat = opt->pattern_list; pat != NULL; pat = pat->next)
{
if(pat->token == GREP_PATTERN_HEAD)
append_header_grep_pattern(ret, pat->field,
pat->pattern);
else
append_grep_pat(ret, pat->pattern, pat->patternlen,
pat->origin, pat->no, pat->token);
}
return ret;
}
static NORETURN void compile_regexp_failed(const struct grep_pat *p,
const char *error)
{
char where[1024];
if (p->no)
xsnprintf(where, sizeof(where), "In '%s' at %d, ", p->origin, p->no);
else if (p->origin)
xsnprintf(where, sizeof(where), "%s, ", p->origin);
else
where[0] = 0;
die("%s'%s': %s", where, p->pattern, error);
}
static int is_fixed(const char *s, size_t len)
{
size_t i;
for (i = 0; i < len; i++) {
if (is_regex_special(s[i]))
return 0;
}
return 1;
}
#ifdef USE_LIBPCRE2
#define GREP_PCRE2_DEBUG_MALLOC 0
static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data)
{
void *pointer = malloc(size);
#if GREP_PCRE2_DEBUG_MALLOC
static int count = 1;
fprintf(stderr, "PCRE2:%p -> #%02d: alloc(%lu)\n", pointer, count++, size);
#endif
return pointer;
}
static void pcre2_free(void *pointer, MAYBE_UNUSED void *memory_data)
{
#if GREP_PCRE2_DEBUG_MALLOC
static int count = 1;
if (pointer)
fprintf(stderr, "PCRE2:%p -> #%02d: free()\n", pointer, count++);
#endif
free(pointer);
}
static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
{
int error;
PCRE2_UCHAR errbuf[256];
PCRE2_SIZE erroffset;
int options = PCRE2_MULTILINE;
int jitret;
int patinforet;
size_t jitsizearg;
int literal = !opt->ignore_case && (p->fixed || p->is_fixed);
/*
* Call pcre2_general_context_create() before calling any
* other pcre2_*(). It sets up our malloc()/free() functions
* with which everything else is allocated.
*/
p->pcre2_general_context = pcre2_general_context_create(
pcre2_malloc, pcre2_free, NULL);
if (!p->pcre2_general_context)
die("Couldn't allocate PCRE2 general context");
if (opt->ignore_case) {
if (!opt->ignore_locale && has_non_ascii(p->pattern)) {
p->pcre2_tables = pcre2_maketables(p->pcre2_general_context);
p->pcre2_compile_context = pcre2_compile_context_create(p->pcre2_general_context);
pcre2_set_character_tables(p->pcre2_compile_context,
p->pcre2_tables);
}
options |= PCRE2_CASELESS;
}
if (!opt->ignore_locale && is_utf8_locale() && !literal)
options |= (PCRE2_UTF | PCRE2_MATCH_INVALID_UTF);
#ifdef GIT_PCRE2_VERSION_10_36_OR_HIGHER
/* Work around https://bugs.exim.org/show_bug.cgi?id=2642 fixed in 10.36 */
if (PCRE2_MATCH_INVALID_UTF && options & (PCRE2_UTF | PCRE2_CASELESS))
options |= PCRE2_NO_START_OPTIMIZE;
#endif
p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern,
p->patternlen, options, &error, &erroffset,
p->pcre2_compile_context);
if (p->pcre2_pattern) {
p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, p->pcre2_general_context);
if (!p->pcre2_match_data)
die("Couldn't allocate PCRE2 match data");
} else {
pcre2_get_error_message(error, errbuf, sizeof(errbuf));
compile_regexp_failed(p, (const char *)&errbuf);
}
pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
if (p->pcre2_jit_on) {
jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
if (jitret)
die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", p->pattern, jitret);
/*
* The pcre2_config(PCRE2_CONFIG_JIT, ...) call just
* tells us whether the library itself supports JIT,
* but to see whether we're going to be actually using
* JIT we need to extract PCRE2_INFO_JITSIZE from the
* pattern *after* we do pcre2_jit_compile() above.
*
* This is because if the pattern contains the
* (*NO_JIT) verb (see pcre2syntax(3))
* pcre2_jit_compile() will exit early with 0. If we
* then proceed to call pcre2_jit_match() further down
* the line instead of pcre2_match() we'll either
* segfault (pre PCRE 10.31) or run into a fatal error
* (post PCRE2 10.31)
*/
patinforet = pcre2_pattern_info(p->pcre2_pattern, PCRE2_INFO_JITSIZE, &jitsizearg);
if (patinforet)
BUG("pcre2_pattern_info() failed: %d", patinforet);
if (jitsizearg == 0) {
p->pcre2_jit_on = 0;
return;
}
}
}
static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
regmatch_t *match, int eflags)
{
int ret, flags = 0;
PCRE2_SIZE *ovector;
PCRE2_UCHAR errbuf[256];
if (eflags & REG_NOTBOL)
flags |= PCRE2_NOTBOL;
if (p->pcre2_jit_on)
ret = pcre2_jit_match(p->pcre2_pattern, (unsigned char *)line,
eol - line, 0, flags, p->pcre2_match_data,
NULL);
else
ret = pcre2_match(p->pcre2_pattern, (unsigned char *)line,
eol - line, 0, flags, p->pcre2_match_data,
NULL);
if (ret < 0 && ret != PCRE2_ERROR_NOMATCH) {
pcre2_get_error_message(ret, errbuf, sizeof(errbuf));
die("%s failed with error code %d: %s",
(p->pcre2_jit_on ? "pcre2_jit_match" : "pcre2_match"), ret,
errbuf);
}
if (ret > 0) {
ovector = pcre2_get_ovector_pointer(p->pcre2_match_data);
ret = 0;
match->rm_so = (int)ovector[0];
match->rm_eo = (int)ovector[1];
}
return ret;
}
static void free_pcre2_pattern(struct grep_pat *p)
{
pcre2_compile_context_free(p->pcre2_compile_context);
pcre2_code_free(p->pcre2_pattern);
pcre2_match_data_free(p->pcre2_match_data);
#ifdef GIT_PCRE2_VERSION_10_34_OR_HIGHER
pcre2_maketables_free(p->pcre2_general_context, p->pcre2_tables);
#else
free((void *)p->pcre2_tables);
#endif
pcre2_general_context_free(p->pcre2_general_context);
}
#else /* !USE_LIBPCRE2 */
static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
{
die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
}
static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
regmatch_t *match, int eflags)
{
return 1;
}
static void free_pcre2_pattern(struct grep_pat *p)
{
}
static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
{
struct strbuf sb = STRBUF_INIT;
int err;
int regflags = 0;
basic_regex_quote_buf(&sb, p->pattern);
if (opt->ignore_case)
regflags |= REG_ICASE;
err = regcomp(&p->regexp, sb.buf, regflags);
strbuf_release(&sb);
if (err) {
char errbuf[1024];
regerror(err, &p->regexp, errbuf, sizeof(errbuf));
compile_regexp_failed(p, errbuf);
}
}
#endif /* !USE_LIBPCRE2 */
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
{
int err;
int regflags = REG_NEWLINE;
p->word_regexp = opt->word_regexp;
p->ignore_case = opt->ignore_case;
p->fixed = opt->fixed;
if (memchr(p->pattern, 0, p->patternlen) && !opt->pcre2)
die(_("given pattern contains NULL byte (via -f <file>). This is only supported with -P under PCRE v2"));
p->is_fixed = is_fixed(p->pattern, p->patternlen);
#ifdef USE_LIBPCRE2
if (!p->fixed && !p->is_fixed) {
const char *no_jit = "(*NO_JIT)";
const int no_jit_len = strlen(no_jit);
if (starts_with(p->pattern, no_jit) &&
is_fixed(p->pattern + no_jit_len,
p->patternlen - no_jit_len))
p->is_fixed = 1;
}
#endif
if (p->fixed || p->is_fixed) {
#ifdef USE_LIBPCRE2
if (p->is_fixed) {
compile_pcre2_pattern(p, opt);
} else {
/*
* E.g. t7811-grep-open.sh relies on the
* pattern being restored.
*/
char *old_pattern = p->pattern;
size_t old_patternlen = p->patternlen;
struct strbuf sb = STRBUF_INIT;
/*
* There is the PCRE2_LITERAL flag, but it's
* only in PCRE v2 10.30 and later. Needing to
* ifdef our way around that and dealing with
* it + PCRE2_MULTILINE being an error is more
* complex than just quoting this ourselves.
*/
strbuf_add(&sb, "\\Q", 2);
strbuf_add(&sb, p->pattern, p->patternlen);
strbuf_add(&sb, "\\E", 2);
p->pattern = sb.buf;
p->patternlen = sb.len;
compile_pcre2_pattern(p, opt);
p->pattern = old_pattern;
p->patternlen = old_patternlen;
strbuf_release(&sb);
}
#else /* !USE_LIBPCRE2 */
compile_fixed_regexp(p, opt);
#endif /* !USE_LIBPCRE2 */
return;
}
if (opt->pcre2) {
compile_pcre2_pattern(p, opt);
return;
}
if (p->ignore_case)
regflags |= REG_ICASE;
if (opt->extended_regexp_option)
regflags |= REG_EXTENDED;
err = regcomp(&p->regexp, p->pattern, regflags);
if (err) {
char errbuf[1024];
regerror(err, &p->regexp, errbuf, 1024);
compile_regexp_failed(p, errbuf);
}
}
static struct grep_expr *compile_pattern_or(struct grep_pat **);
static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
{
struct grep_pat *p;
struct grep_expr *x;
p = *list;
if (!p)
return NULL;
switch (p->token) {
case GREP_PATTERN: /* atom */
case GREP_PATTERN_HEAD:
case GREP_PATTERN_BODY:
CALLOC_ARRAY(x, 1);
x->node = GREP_NODE_ATOM;
x->u.atom = p;
*list = p->next;
return x;
case GREP_OPEN_PAREN:
*list = p->next;
x = compile_pattern_or(list);
if (!*list || (*list)->token != GREP_CLOSE_PAREN)
die("unmatched parenthesis");
*list = (*list)->next;
return x;
default:
return NULL;
}
}
static struct grep_expr *compile_pattern_not(struct grep_pat **list)
{
struct grep_pat *p;
struct grep_expr *x;
p = *list;
if (!p)
return NULL;
switch (p->token) {
case GREP_NOT:
if (!p->next)
die("--not not followed by pattern expression");
*list = p->next;
CALLOC_ARRAY(x, 1);
x->node = GREP_NODE_NOT;
x->u.unary = compile_pattern_not(list);
if (!x->u.unary)
die("--not followed by non pattern expression");
return x;
default:
return compile_pattern_atom(list);
}
}
static struct grep_expr *compile_pattern_and(struct grep_pat **list)
{
struct grep_pat *p;
struct grep_expr *x, *y, *z;
x = compile_pattern_not(list);
p = *list;
if (p && p->token == GREP_AND) {
if (!x)
die("--and not preceded by pattern expression");
if (!p->next)
die("--and not followed by pattern expression");
*list = p->next;
y = compile_pattern_and(list);
if (!y)
die("--and not followed by pattern expression");
CALLOC_ARRAY(z, 1);
z->node = GREP_NODE_AND;
z->u.binary.left = x;
z->u.binary.right = y;
return z;
}
return x;
}
static struct grep_expr *compile_pattern_or(struct grep_pat **list)
{
struct grep_pat *p;
struct grep_expr *x, *y, *z;
x = compile_pattern_and(list);
p = *list;
if (x && p && p->token != GREP_CLOSE_PAREN) {
y = compile_pattern_or(list);
if (!y)
die("not a pattern expression %s", p->pattern);
CALLOC_ARRAY(z, 1);
z->node = GREP_NODE_OR;
z->u.binary.left = x;
z->u.binary.right = y;
return z;
}
return x;
}
static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
{
return compile_pattern_or(list);
}
static struct grep_expr *grep_not_expr(struct grep_expr *expr)
{
struct grep_expr *z = xcalloc(1, sizeof(*z));
z->node = GREP_NODE_NOT;
z->u.unary = expr;
return z;
}
static struct grep_expr *grep_true_expr(void)
{
struct grep_expr *z = xcalloc(1, sizeof(*z));
z->node = GREP_NODE_TRUE;
return z;
}
static struct grep_expr *grep_or_expr(struct grep_expr *left, struct grep_expr *right)
{
struct grep_expr *z = xcalloc(1, sizeof(*z));
z->node = GREP_NODE_OR;
z->u.binary.left = left;
z->u.binary.right = right;
return z;
}
static struct grep_expr *prep_header_patterns(struct grep_opt *opt)
{
struct grep_pat *p;
struct grep_expr *header_expr;
struct grep_expr *(header_group[GREP_HEADER_FIELD_MAX]);
enum grep_header_field fld;
if (!opt->header_list)
return NULL;
for (p = opt->header_list; p; p = p->next) {
if (p->token != GREP_PATTERN_HEAD)
BUG("a non-header pattern in grep header list.");
if (p->field < GREP_HEADER_FIELD_MIN ||
GREP_HEADER_FIELD_MAX <= p->field)
BUG("unknown header field %d", p->field);
compile_regexp(p, opt);
}
for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++)
header_group[fld] = NULL;
for (p = opt->header_list; p; p = p->next) {
struct grep_expr *h;
struct grep_pat *pp = p;
h = compile_pattern_atom(&pp);
if (!h || pp != p->next)
BUG("malformed header expr");
if (!header_group[p->field]) {
header_group[p->field] = h;
continue;
}
header_group[p->field] = grep_or_expr(h, header_group[p->field]);
}
header_expr = NULL;
for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++) {
if (!header_group[fld])
continue;
if (!header_expr)
header_expr = grep_true_expr();
header_expr = grep_or_expr(header_group[fld], header_expr);
}
return header_expr;
}
static struct grep_expr *grep_splice_or(struct grep_expr *x, struct grep_expr *y)
{
struct grep_expr *z = x;
while (x) {
assert(x->node == GREP_NODE_OR);
if (x->u.binary.right &&
x->u.binary.right->node == GREP_NODE_TRUE) {
x->u.binary.right = y;
break;
}
x = x->u.binary.right;
}
return z;
}
void compile_grep_patterns(struct grep_opt *opt)
{
struct grep_pat *p;
struct grep_expr *header_expr = prep_header_patterns(opt);
for (p = opt->pattern_list; p; p = p->next) {
switch (p->token) {
case GREP_PATTERN: /* atom */
case GREP_PATTERN_HEAD:
case GREP_PATTERN_BODY:
compile_regexp(p, opt);
break;
default:
opt->extended = 1;
break;
}
}
if (opt->all_match || opt->no_body_match || header_expr)
opt->extended = 1;
else if (!opt->extended)
return;
p = opt->pattern_list;
if (p)
opt->pattern_expression = compile_pattern_expr(&p);
if (p)
die("incomplete pattern expression: %s", p->pattern);
if (opt->no_body_match && opt->pattern_expression)
opt->pattern_expression = grep_not_expr(opt->pattern_expression);
if (!header_expr)
return;
if (!opt->pattern_expression)
opt->pattern_expression = header_expr;
else if (opt->all_match)
opt->pattern_expression = grep_splice_or(header_expr,
opt->pattern_expression);
else
opt->pattern_expression = grep_or_expr(opt->pattern_expression,
header_expr);
opt->all_match = 1;
}
static void free_pattern_expr(struct grep_expr *x)
{
switch (x->node) {
case GREP_NODE_TRUE:
case GREP_NODE_ATOM:
break;
case GREP_NODE_NOT:
free_pattern_expr(x->u.unary);
break;
case GREP_NODE_AND:
case GREP_NODE_OR:
free_pattern_expr(x->u.binary.left);
free_pattern_expr(x->u.binary.right);
break;
}
free(x);
}
void free_grep_patterns(struct grep_opt *opt)
{
struct grep_pat *p, *n;
for (p = opt->pattern_list; p; p = n) {
n = p->next;
switch (p->token) {
case GREP_PATTERN: /* atom */
case GREP_PATTERN_HEAD:
case GREP_PATTERN_BODY:
if (p->pcre2_pattern)
free_pcre2_pattern(p);
else
regfree(&p->regexp);
free(p->pattern);
break;
default:
break;
}
free(p);
}
if (!opt->extended)
return;
free_pattern_expr(opt->pattern_expression);
}
static const char *end_of_line(const char *cp, unsigned long *left)
{
unsigned long l = *left;
while (l && *cp != '\n') {
l--;
cp++;
}
*left = l;
return cp;
}
static int word_char(char ch)
{
return isalnum(ch) || ch == '_';
}
static void output_color(struct grep_opt *opt, const void *data, size_t size,
const char *color)
{
if (want_color(opt->color) && color && color[0]) {
opt->output(opt, color, strlen(color));
opt->output(opt, data, size);
opt->output(opt, GIT_COLOR_RESET, strlen(GIT_COLOR_RESET));
} else
opt->output(opt, data, size);
}
static void output_sep(struct grep_opt *opt, char sign)
{
if (opt->null_following_name)
opt->output(opt, "\0", 1);
else
output_color(opt, &sign, 1, opt->colors[GREP_COLOR_SEP]);
}
static void show_name(struct grep_opt *opt, const char *name)
{
output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
}
static int patmatch(struct grep_pat *p,
const char *line, const char *eol,
regmatch_t *match, int eflags)
{
int hit;
if (p->pcre2_pattern)
hit = !pcre2match(p, line, eol, match, eflags);
else
hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
eflags);
return hit;
}
static void strip_timestamp(const char *bol, const char **eol_p)
{
const char *eol = *eol_p;
while (bol < --eol) {
if (*eol != '>')
continue;
*eol_p = ++eol;
break;
}
}
static struct {
const char *field;
size_t len;
} header_field[] = {
{ "author ", 7 },
{ "committer ", 10 },
{ "reflog ", 7 },
};
static int headerless_match_one_pattern(struct grep_pat *p,
const char *bol, const char *eol,
enum grep_context ctx,
regmatch_t *pmatch, int eflags)
{
int hit = 0;
const char *start = bol;
if ((p->token != GREP_PATTERN) &&
((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
return 0;
again:
hit = patmatch(p, bol, eol, pmatch, eflags);
if (hit && p->word_regexp) {
if ((pmatch[0].rm_so < 0) ||
(eol - bol) < pmatch[0].rm_so ||
(pmatch[0].rm_eo < 0) ||
(eol - bol) < pmatch[0].rm_eo)
die("regexp returned nonsense");
/* Match beginning must be either beginning of the
* line, or at word boundary (i.e. the last char must
* not be a word char). Similarly, match end must be
* either end of the line, or at word boundary
* (i.e. the next char must not be a word char).
*/
if ( ((pmatch[0].rm_so == 0) ||
!word_char(bol[pmatch[0].rm_so-1])) &&
((pmatch[0].rm_eo == (eol-bol)) ||
!word_char(bol[pmatch[0].rm_eo])) )
;
else
hit = 0;
/* Words consist of at least one character. */
if (pmatch->rm_so == pmatch->rm_eo)
hit = 0;
if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
/* There could be more than one match on the
* line, and the first match might not be
* strict word match. But later ones could be!
* Forward to the next possible start, i.e. the
* next position following a non-word char.
*/
bol = pmatch[0].rm_so + bol + 1;
while (word_char(bol[-1]) && bol < eol)
bol++;
eflags |= REG_NOTBOL;
if (bol < eol)
goto again;
}
}
if (hit) {
pmatch[0].rm_so += bol - start;
pmatch[0].rm_eo += bol - start;
}
return hit;
}
static int match_one_pattern(struct grep_pat *p,
const char *bol, const char *eol,
enum grep_context ctx, regmatch_t *pmatch,
int eflags)
{
const char *field;
size_t len;
if (p->token == GREP_PATTERN_HEAD) {
assert(p->field < ARRAY_SIZE(header_field));
field = header_field[p->field].field;
len = header_field[p->field].len;
if (strncmp(bol, field, len))
return 0;
bol += len;
switch (p->field) {
case GREP_HEADER_AUTHOR:
case GREP_HEADER_COMMITTER:
strip_timestamp(bol, &eol);
break;
default:
break;
}
}
return headerless_match_one_pattern(p, bol, eol, ctx, pmatch, eflags);
}
static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x,
const char *bol, const char *eol,
enum grep_context ctx, ssize_t *col,
ssize_t *icol, int collect_hits)
{
int h = 0;
if (!x)
die("Not a valid grep expression");
switch (x->node) {
case GREP_NODE_TRUE:
h = 1;
break;
case GREP_NODE_ATOM:
{
regmatch_t tmp;
h = match_one_pattern(x->u.atom, bol, eol, ctx,
&tmp, 0);
if (h && (*col < 0 || tmp.rm_so < *col))
*col = tmp.rm_so;
}
if (x->u.atom->token == GREP_PATTERN_BODY)
opt->body_hit |= h;
break;
case GREP_NODE_NOT:
/*
* Upon visiting a GREP_NODE_NOT, col and icol become swapped.
*/
h = !match_expr_eval(opt, x->u.unary, bol, eol, ctx, icol, col,
0);
break;
case GREP_NODE_AND:
h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
icol, 0);
if (h || opt->columnnum) {
/*
* Don't short-circuit AND when given --column, since a
* NOT earlier in the tree may turn this into an OR. In
* this case, see the below comment.
*/
h &= match_expr_eval(opt, x->u.binary.right, bol, eol,
ctx, col, icol, 0);
}
break;
case GREP_NODE_OR:
if (!(collect_hits || opt->columnnum)) {
/*
* Don't short-circuit OR when given --column (or
* collecting hits) to ensure we don't skip a later
* child that would produce an earlier match.
*/
return (match_expr_eval(opt, x->u.binary.left, bol, eol,
ctx, col, icol, 0) ||
match_expr_eval(opt, x->u.binary.right, bol,
eol, ctx, col, icol, 0));
}
h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
icol, 0);
if (collect_hits)
x->u.binary.left->hit |= h;
h |= match_expr_eval(opt, x->u.binary.right, bol, eol, ctx, col,
icol, collect_hits);
break;
default:
die("Unexpected node type (internal error) %d", x->node);
}
if (collect_hits)
x->hit |= h;
return h;
}
static int match_expr(struct grep_opt *opt,
const char *bol, const char *eol,
enum grep_context ctx, ssize_t *col,
ssize_t *icol, int collect_hits)
{
struct grep_expr *x = opt->pattern_expression;
return match_expr_eval(opt, x, bol, eol, ctx, col, icol, collect_hits);
}
static int match_line(struct grep_opt *opt,
const char *bol, const char *eol,
ssize_t *col, ssize_t *icol,
enum grep_context ctx, int collect_hits)
{
struct grep_pat *p;
int hit = 0;
if (opt->extended)
return match_expr(opt, bol, eol, ctx, col, icol,
collect_hits);
/* we do not call with collect_hits without being extended */
for (p = opt->pattern_list; p; p = p->next) {
regmatch_t tmp;
if (match_one_pattern(p, bol, eol, ctx, &tmp, 0)) {
hit |= 1;
if (!opt->columnnum) {
/*
* Without --column, any single match on a line
* is enough to know that it needs to be
* printed. With --column, scan _all_ patterns
* to find the earliest.
*/
break;
}
if (*col < 0 || tmp.rm_so < *col)
*col = tmp.rm_so;
}
}
return hit;
}
static int match_next_pattern(struct grep_pat *p,
const char *bol, const char *eol,
enum grep_context ctx,
regmatch_t *pmatch, int eflags)
{
regmatch_t match;
if (!headerless_match_one_pattern(p, bol, eol, ctx, &match, eflags))
return 0;
if (match.rm_so < 0 || match.rm_eo < 0)
return 0;
if (pmatch->rm_so >= 0 && pmatch->rm_eo >= 0) {
if (match.rm_so > pmatch->rm_so)
return 1;
if (match.rm_so == pmatch->rm_so && match.rm_eo < pmatch->rm_eo)
return 1;
}
pmatch->rm_so = match.rm_so;
pmatch->rm_eo = match.rm_eo;
return 1;
}
int grep_next_match(struct grep_opt *opt,
const char *bol, const char *eol,
enum grep_context ctx, regmatch_t *pmatch,
enum grep_header_field field, int eflags)
{
struct grep_pat *p;
int hit = 0;
pmatch->rm_so = pmatch->rm_eo = -1;
if (bol < eol) {
for (p = ((ctx == GREP_CONTEXT_HEAD)
? opt->header_list : opt->pattern_list);
p; p = p->next) {
switch (p->token) {
case GREP_PATTERN_HEAD:
if ((field != GREP_HEADER_FIELD_MAX) &&
(p->field != field))
continue;
/* fall thru */
case GREP_PATTERN: /* atom */
case GREP_PATTERN_BODY:
hit |= match_next_pattern(p, bol, eol, ctx,
pmatch, eflags);
break;
default:
break;
}
}
}
return hit;
}
static void show_line_header(struct grep_opt *opt, const char *name,
unsigned lno, ssize_t cno, char sign)
{
if (opt->heading && opt->last_shown == 0) {
output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
opt->output(opt, "\n", 1);
}
opt->last_shown = lno;
if (!opt->heading && opt->pathname) {
output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
output_sep(opt, sign);
}
if (opt->linenum) {
char buf[32];
xsnprintf(buf, sizeof(buf), "%d", lno);
output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_LINENO]);
output_sep(opt, sign);
}
/*
* Treat 'cno' as the 1-indexed offset from the start of a non-context
* line to its first match. Otherwise, 'cno' is 0 indicating that we are
* being called with a context line.
*/
if (opt->columnnum && cno) {
char buf[32];
xsnprintf(buf, sizeof(buf), "%"PRIuMAX, (uintmax_t)cno);
output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_COLUMNNO]);
output_sep(opt, sign);
}
}
static void show_line(struct grep_opt *opt,
const char *bol, const char *eol,
const char *name, unsigned lno, ssize_t cno, char sign)
{
int rest = eol - bol;
const char *match_color = NULL;
const char *line_color = NULL;
if (opt->file_break && opt->last_shown == 0) {
if (opt->show_hunk_mark)
opt->output(opt, "\n", 1);
} else if (opt->pre_context || opt->post_context || opt->funcbody) {
if (opt->last_shown == 0) {
if (opt->show_hunk_mark) {
output_color(opt, "--", 2, opt->colors[GREP_COLOR_SEP]);
opt->output(opt, "\n", 1);
}
} else if (lno > opt->last_shown + 1) {
output_color(opt, "--", 2, opt->colors[GREP_COLOR_SEP]);
opt->output(opt, "\n", 1);
}
}
if (!opt->only_matching) {
/*
* In case the line we're being called with contains more than
* one match, leave printing each header to the loop below.
*/
show_line_header(opt, name, lno, cno, sign);
}
if (opt->color || opt->only_matching) {
regmatch_t match;
enum grep_context ctx = GREP_CONTEXT_BODY;
int eflags = 0;
if (opt->color) {
if (sign == ':')
match_color = opt->colors[GREP_COLOR_MATCH_SELECTED];
else
match_color = opt->colors[GREP_COLOR_MATCH_CONTEXT];
if (sign == ':')
line_color = opt->colors[GREP_COLOR_SELECTED];
else if (sign == '-')
line_color = opt->colors[GREP_COLOR_CONTEXT];
else if (sign == '=')
line_color = opt->colors[GREP_COLOR_FUNCTION];
}
while (grep_next_match(opt, bol, eol, ctx, &match,
GREP_HEADER_FIELD_MAX, eflags)) {
if (match.rm_so == match.rm_eo)
break;
if (opt->only_matching)
show_line_header(opt, name, lno, cno, sign);
else
output_color(opt, bol, match.rm_so, line_color);
output_color(opt, bol + match.rm_so,
match.rm_eo - match.rm_so, match_color);
if (opt->only_matching)
opt->output(opt, "\n", 1);
bol += match.rm_eo;
cno += match.rm_eo;
rest -= match.rm_eo;
eflags = REG_NOTBOL;
}
}
if (!opt->only_matching) {
output_color(opt, bol, rest, line_color);
opt->output(opt, "\n", 1);
}
}
int grep_use_locks;
/*
* This lock protects access to the gitattributes machinery, which is
* not thread-safe.
*/
pthread_mutex_t grep_attr_mutex;
static inline void grep_attr_lock(void)
{
if (grep_use_locks)
pthread_mutex_lock(&grep_attr_mutex);
}
static inline void grep_attr_unlock(void)
{
if (grep_use_locks)
pthread_mutex_unlock(&grep_attr_mutex);
}
static int match_funcname(struct grep_opt *opt, struct grep_source *gs,
const char *bol, const char *eol)
{
xdemitconf_t *xecfg = opt->priv;
if (xecfg && !xecfg->find_func) {
grep_source_load_driver(gs, opt->repo->index);
if (gs->driver->funcname.pattern) {
const struct userdiff_funcname *pe = &gs->driver->funcname;
xdiff_set_find_func(xecfg, pe->pattern, pe->cflags);
} else {
xecfg = opt->priv = NULL;
}
}
if (xecfg) {
char buf[1];
return xecfg->find_func(bol, eol - bol, buf, 1,
xecfg->find_func_priv) >= 0;
}
if (bol == eol)
return 0;
if (isalpha(*bol) || *bol == '_' || *bol == '$')
return 1;
return 0;
}
static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs,
const char *bol, unsigned lno)
{
while (bol > gs->buf) {
const char *eol = --bol;
while (bol > gs->buf && bol[-1] != '\n')
bol--;
lno--;
if (lno <= opt->last_shown)
break;
if (match_funcname(opt, gs, bol, eol)) {
show_line(opt, bol, eol, gs->name, lno, 0, '=');
break;
}
}
}
static int is_empty_line(const char *bol, const char *eol);
static void show_pre_context(struct grep_opt *opt, struct grep_source *gs,
const char *bol, const char *end, unsigned lno)
{
unsigned cur = lno, from = 1, funcname_lno = 0, orig_from;
int funcname_needed = !!opt->funcname, comment_needed = 0;
if (opt->pre_context < lno)
from = lno - opt->pre_context;
if (from <= opt->last_shown)
from = opt->last_shown + 1;
orig_from = from;
if (opt->funcbody) {
if (match_funcname(opt, gs, bol, end))
comment_needed = 1;
else
funcname_needed = 1;
from = opt->last_shown + 1;
}
/* Rewind. */
while (bol > gs->buf && cur > from) {
const char *next_bol = bol;
const char *eol = --bol;
while (bol > gs->buf && bol[-1] != '\n')
bol--;
cur--;
if (comment_needed && (is_empty_line(bol, eol) ||
match_funcname(opt, gs, bol, eol))) {
comment_needed = 0;
from = orig_from;
if (cur < from) {
cur++;
bol = next_bol;
break;
}
}
if (funcname_needed && match_funcname(opt, gs, bol, eol)) {
funcname_lno = cur;
funcname_needed = 0;
if (opt->funcbody)
comment_needed = 1;
else
from = orig_from;
}
}
/* We need to look even further back to find a function signature. */
if (opt->funcname && funcname_needed)
show_funcname_line(opt, gs, bol, cur);
/* Back forward. */
while (cur < lno) {
const char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-';
while (*eol != '\n')
eol++;
show_line(opt, bol, eol, gs->name, cur, 0, sign);
bol = eol + 1;
cur++;
}
}
static int should_lookahead(struct grep_opt *opt)
{
struct grep_pat *p;
if (opt->extended)
return 0; /* punt for too complex stuff */
if (opt->invert)
return 0;
for (p = opt->pattern_list; p; p = p->next) {
if (p->token != GREP_PATTERN)
return 0; /* punt for "header only" and stuff */
}
return 1;
}
static int look_ahead(struct grep_opt *opt,
unsigned long *left_p,
unsigned *lno_p,
const char **bol_p)
{
unsigned lno = *lno_p;
const char *bol = *bol_p;
struct grep_pat *p;
const char *sp, *last_bol;
regoff_t earliest = -1;
for (p = opt->pattern_list; p; p = p->next) {
int hit;
regmatch_t m;
hit = patmatch(p, bol, bol + *left_p, &m, 0);
if (!hit || m.rm_so < 0 || m.rm_eo < 0)
continue;
if (earliest < 0 || m.rm_so < earliest)
earliest = m.rm_so;
}
if (earliest < 0) {
*bol_p = bol + *left_p;
*left_p = 0;
return 1;
}
for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--)
; /* find the beginning of the line */
last_bol = sp;
for (sp = bol; sp < last_bol; sp++) {
if (*sp == '\n')
lno++;
}
*left_p -= last_bol - bol;
*bol_p = last_bol;
*lno_p = lno;
return 0;
}
static int fill_textconv_grep(struct repository *r,
struct userdiff_driver *driver,
struct grep_source *gs)
{
struct diff_filespec *df;
char *buf;
size_t size;
if (!driver || !driver->textconv)
return grep_source_load(gs);
/*
* The textconv interface is intimately tied to diff_filespecs, so we
* have to pretend to be one. If we could unify the grep_source
* and diff_filespec structs, this mess could just go away.
*/
df = alloc_filespec(gs->path);
switch (gs->type) {
case GREP_SOURCE_OID:
fill_filespec(df, gs->identifier, 1, 0100644);
break;
case GREP_SOURCE_FILE:
fill_filespec(df, null_oid(), 0, 0100644);
break;
default:
BUG("attempt to textconv something without a path?");
}
/*
* fill_textconv is not remotely thread-safe; it modifies the global
* diff tempfile structure, writes to the_repo's odb and might
* internally call thread-unsafe functions such as the
* prepare_packed_git() lazy-initializator. Because of the last two, we
* must ensure mutual exclusion between this call and the object reading
* API, thus we use obj_read_lock() here.
*
* TODO: allowing text conversion to run in parallel with object
* reading operations might increase performance in the multithreaded
* non-worktreee git-grep with --textconv.
*/
obj_read_lock();
size = fill_textconv(r, driver, df, &buf);
obj_read_unlock();
free_filespec(df);
/*
* The normal fill_textconv usage by the diff machinery would just keep
* the textconv'd buf separate from the diff_filespec. But much of the
* grep code passes around a grep_source and assumes that its "buf"
* pointer is the beginning of the thing we are searching. So let's
* install our textconv'd version into the grep_source, taking care not
* to leak any existing buffer.
*/
grep_source_clear_data(gs);
gs->buf = buf;
gs->size = size;
return 0;
}
static int is_empty_line(const char *bol, const char *eol)
{
while (bol < eol && isspace(*bol))
bol++;
return bol == eol;
}
static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits)
{
const char *bol;
const char *peek_bol = NULL;
unsigned long left;
unsigned lno = 1;
unsigned last_hit = 0;
int binary_match_only = 0;
unsigned count = 0;
int try_lookahead = 0;
int show_function = 0;
struct userdiff_driver *textconv = NULL;
enum grep_context ctx = GREP_CONTEXT_HEAD;
xdemitconf_t xecfg;
if (!opt->status_only && gs->name == NULL)
BUG("grep call which could print a name requires "
"grep_source.name be non-NULL");
if (!opt->output)
opt->output = std_output;
if (opt->pre_context || opt->post_context || opt->file_break ||
opt->funcbody) {
/* Show hunk marks, except for the first file. */
if (opt->last_shown)
opt->show_hunk_mark = 1;
/*
* If we're using threads then we can't easily identify
* the first file. Always put hunk marks in that case
* and skip the very first one later in work_done().
*/
if (opt->output != std_output)
opt->show_hunk_mark = 1;
}
opt->last_shown = 0;
if (opt->allow_textconv) {
grep_source_load_driver(gs, opt->repo->index);
/*
* We might set up the shared textconv cache data here, which
* is not thread-safe. Also, get_oid_with_context() and
* parse_object() might be internally called. As they are not
* currently thread-safe and might be racy with object reading,
* obj_read_lock() must be called.
*/
grep_attr_lock();
obj_read_lock();
textconv = userdiff_get_textconv(opt->repo, gs->driver);
obj_read_unlock();
grep_attr_unlock();
}
/*
* We know the result of a textconv is text, so we only have to care
* about binary handling if we are not using it.
*/
if (!textconv) {
switch (opt->binary) {
case GREP_BINARY_DEFAULT:
if (grep_source_is_binary(gs, opt->repo->index))
binary_match_only = 1;
break;
case GREP_BINARY_NOMATCH:
if (grep_source_is_binary(gs, opt->repo->index))
return 0; /* Assume unmatch */
break;
case GREP_BINARY_TEXT:
break;
default:
BUG("unknown binary handling mode");
}
}
memset(&xecfg, 0, sizeof(xecfg));
opt->priv = &xecfg;
try_lookahead = should_lookahead(opt);
if (fill_textconv_grep(opt->repo, textconv, gs) < 0)
return 0;
bol = gs->buf;
left = gs->size;
while (left) {
const char *eol;
int hit;
ssize_t cno;
ssize_t col = -1, icol = -1;
/*
* look_ahead() skips quickly to the line that possibly
* has the next hit; don't call it if we need to do
* something more than just skipping the current line
* in response to an unmatch for the current line. E.g.
* inside a post-context window, we will show the current
* line as a context around the previous hit when it
* doesn't hit.
*/
if (try_lookahead
&& !(last_hit
&& (show_function ||
lno <= last_hit + opt->post_context))
&& look_ahead(opt, &left, &lno, &bol))
break;
eol = end_of_line(bol, &left);
if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
ctx = GREP_CONTEXT_BODY;
hit = match_line(opt, bol, eol, &col, &icol, ctx, collect_hits);
if (collect_hits)
goto next_line;
/* "grep -v -e foo -e bla" should list lines
* that do not have either, so inversion should
* be done outside.
*/
if (opt->invert)
hit = !hit;
if (opt->unmatch_name_only) {
if (hit)
return 0;
goto next_line;
}
if (hit) {
count++;
if (opt->status_only)
return 1;
if (opt->name_only) {
show_name(opt, gs->name);
return 1;
}
if (opt->count)
goto next_line;
if (binary_match_only) {
opt->output(opt, "Binary file ", 12);
output_color(opt, gs->name, strlen(gs->name),
opt->colors[GREP_COLOR_FILENAME]);
opt->output(opt, " matches\n", 9);
return 1;
}
/* Hit at this line. If we haven't shown the
* pre-context lines, we would need to show them.
*/
if (opt->pre_context || opt->funcbody)
show_pre_context(opt, gs, bol, eol, lno);
else if (opt->funcname)
show_funcname_line(opt, gs, bol, lno);
cno = opt->invert ? icol : col;
if (cno < 0) {
/*
* A negative cno indicates that there was no
* match on the line. We are thus inverted and
* being asked to show all lines that _don't_
* match a given expression. Therefore, set cno
* to 0 to suggest the whole line matches.
*/
cno = 0;
}
show_line(opt, bol, eol, gs->name, lno, cno + 1, ':');
last_hit = lno;
if (opt->funcbody)
show_function = 1;
goto next_line;
}
if (show_function && (!peek_bol || peek_bol < bol)) {
unsigned long peek_left = left;
const char *peek_eol = eol;
/*
* Trailing empty lines are not interesting.
* Peek past them to see if they belong to the
* body of the current function.
*/
peek_bol = bol;
while (is_empty_line(peek_bol, peek_eol)) {
peek_bol = peek_eol + 1;
peek_eol = end_of_line(peek_bol, &peek_left);
}
if (match_funcname(opt, gs, peek_bol, peek_eol))
show_function = 0;
}
if (show_function ||
(last_hit && lno <= last_hit + opt->post_context)) {
/* If the last hit is within the post context,
* we need to show this line.
*/
show_line(opt, bol, eol, gs->name, lno, col + 1, '-');
}
next_line:
bol = eol + 1;
if (!left)
break;
left--;
lno++;
}
if (collect_hits)
return 0;
if (opt->status_only)
return opt->unmatch_name_only;
if (opt->unmatch_name_only) {
/* We did not see any hit, so we want to show this */
show_name(opt, gs->name);
return 1;
}
xdiff_clear_find_func(&xecfg);
opt->priv = NULL;
/* NEEDSWORK:
* The real "grep -c foo *.c" gives many "bar.c:0" lines,
* which feels mostly useless but sometimes useful. Maybe
* make it another option? For now suppress them.
*/
if (opt->count && count) {
char buf[32];
if (opt->pathname) {
output_color(opt, gs->name, strlen(gs->name),
opt->colors[GREP_COLOR_FILENAME]);
output_sep(opt, ':');
}
xsnprintf(buf, sizeof(buf), "%u\n", count);
opt->output(opt, buf, strlen(buf));
return 1;
}
return !!last_hit;
}
static void clr_hit_marker(struct grep_expr *x)
{
/* All-hit markers are meaningful only at the very top level
* OR node.
*/
while (1) {
x->hit = 0;
if (x->node != GREP_NODE_OR)
return;
x->u.binary.left->hit = 0;
x = x->u.binary.right;
}
}
static int chk_hit_marker(struct grep_expr *x)
{
/* Top level nodes have hit markers. See if they all are hits */
while (1) {
if (x->node != GREP_NODE_OR)
return x->hit;
if (!x->u.binary.left->hit)
return 0;
x = x->u.binary.right;
}
}
int grep_source(struct grep_opt *opt, struct grep_source *gs)
{
/*
* we do not have to do the two-pass grep when we do not check
* buffer-wide "all-match".
*/
if (!opt->all_match && !opt->no_body_match)
return grep_source_1(opt, gs, 0);
/* Otherwise the toplevel "or" terms hit a bit differently.
* We first clear hit markers from them.
*/
clr_hit_marker(opt->pattern_expression);
opt->body_hit = 0;
grep_source_1(opt, gs, 1);
if (opt->all_match && !chk_hit_marker(opt->pattern_expression))
return 0;
if (opt->no_body_match && opt->body_hit)
return 0;
return grep_source_1(opt, gs, 0);
}
static void grep_source_init_buf(struct grep_source *gs,
const char *buf,
unsigned long size)
{
gs->type = GREP_SOURCE_BUF;
gs->name = NULL;
gs->path = NULL;
gs->buf = buf;
gs->size = size;
gs->driver = NULL;
gs->identifier = NULL;
}
int grep_buffer(struct grep_opt *opt, const char *buf, unsigned long size)
{
struct grep_source gs;
int r;
grep_source_init_buf(&gs, buf, size);
r = grep_source(opt, &gs);
grep_source_clear(&gs);
return r;
}
void grep_source_init_file(struct grep_source *gs, const char *name,
const char *path)
{
gs->type = GREP_SOURCE_FILE;
gs->name = xstrdup_or_null(name);
gs->path = xstrdup_or_null(path);
gs->buf = NULL;
gs->size = 0;
gs->driver = NULL;
gs->identifier = xstrdup(path);
}
void grep_source_init_oid(struct grep_source *gs, const char *name,
const char *path, const struct object_id *oid,
struct repository *repo)
{
gs->type = GREP_SOURCE_OID;
gs->name = xstrdup_or_null(name);
gs->path = xstrdup_or_null(path);
gs->buf = NULL;
gs->size = 0;
gs->driver = NULL;
gs->identifier = oiddup(oid);
gs->repo = repo;
}
void grep_source_clear(struct grep_source *gs)
{
FREE_AND_NULL(gs->name);
FREE_AND_NULL(gs->path);
FREE_AND_NULL(gs->identifier);
grep_source_clear_data(gs);
}
void grep_source_clear_data(struct grep_source *gs)
{
switch (gs->type) {
case GREP_SOURCE_FILE:
case GREP_SOURCE_OID:
/* these types own the buffer */
free((char *)gs->buf);
gs->buf = NULL;
gs->size = 0;
break;
case GREP_SOURCE_BUF:
/* leave user-provided buf intact */
break;
}
}
static int grep_source_load_oid(struct grep_source *gs)
{
enum object_type type;
gs->buf = repo_read_object_file(gs->repo, gs->identifier, &type,
&gs->size);
if (!gs->buf)
return error(_("'%s': unable to read %s"),
gs->name,
oid_to_hex(gs->identifier));
return 0;
}
static int grep_source_load_file(struct grep_source *gs)
{
const char *filename = gs->identifier;
struct stat st;
char *data;
size_t size;
int i;
if (lstat(filename, &st) < 0) {
err_ret:
if (errno != ENOENT)
error_errno(_("failed to stat '%s'"), filename);
return -1;
}
if (!S_ISREG(st.st_mode))
return -1;
size = xsize_t(st.st_size);
i = open(filename, O_RDONLY);
if (i < 0)
goto err_ret;
data = xmallocz(size);
if (st.st_size != read_in_full(i, data, size)) {
error_errno(_("'%s': short read"), filename);
close(i);
free(data);
return -1;
}
close(i);
gs->buf = data;
gs->size = size;
return 0;
}
static int grep_source_load(struct grep_source *gs)
{
if (gs->buf)
return 0;
switch (gs->type) {
case GREP_SOURCE_FILE:
return grep_source_load_file(gs);
case GREP_SOURCE_OID:
return grep_source_load_oid(gs);
case GREP_SOURCE_BUF:
return gs->buf ? 0 : -1;
}
BUG("invalid grep_source type to load");
}
void grep_source_load_driver(struct grep_source *gs,
struct index_state *istate)
{
if (gs->driver)
return;
grep_attr_lock();
if (gs->path)
gs->driver = userdiff_find_by_path(istate, gs->path);
if (!gs->driver)
gs->driver = userdiff_find_by_name("default");
grep_attr_unlock();
}
static int grep_source_is_binary(struct grep_source *gs,
struct index_state *istate)
{
grep_source_load_driver(gs, istate);
if (gs->driver->binary != -1)
return gs->driver->binary;
if (!grep_source_load(gs))
return buffer_is_binary(gs->buf, gs->size);
return 0;
}