git-commit-vandalism/ident.c

498 lines
12 KiB
C
Raw Normal View History

/*
* ident.c
*
* create git identifier lines of the form "name <email> date"
*
* Copyright (C) 2005 Linus Torvalds
*/
#include "cache.h"
static struct strbuf git_default_name = STRBUF_INIT;
static struct strbuf git_default_email = STRBUF_INIT;
static struct strbuf git_default_date = STRBUF_INIT;
static int default_email_is_bogus;
static int default_name_is_bogus;
#define IDENT_NAME_GIVEN 01
#define IDENT_MAIL_GIVEN 02
#define IDENT_ALL_GIVEN (IDENT_NAME_GIVEN|IDENT_MAIL_GIVEN)
static int committer_ident_explicitly_given;
static int author_ident_explicitly_given;
#ifdef NO_GECOS_IN_PWENT
#define get_gecos(ignored) "&"
#else
#define get_gecos(struct_passwd) ((struct_passwd)->pw_gecos)
#endif
static struct passwd *xgetpwuid_self(int *is_bogus)
{
struct passwd *pw;
errno = 0;
pw = getpwuid(getuid());
if (!pw) {
static struct passwd fallback;
fallback.pw_name = "unknown";
#ifndef NO_GECOS_IN_PWENT
fallback.pw_gecos = "Unknown";
#endif
pw = &fallback;
if (is_bogus)
*is_bogus = 1;
}
return pw;
}
static void copy_gecos(const struct passwd *w, struct strbuf *name)
{
char *src;
/* Traditionally GECOS field had office phone numbers etc, separated
* with commas. Also & stands for capitalized form of the login name.
*/
for (src = get_gecos(w); *src && *src != ','; src++) {
int ch = *src;
if (ch != '&')
strbuf_addch(name, ch);
else {
/* Sorry, Mr. McDonald... */
strbuf_addch(name, toupper(*w->pw_name));
strbuf_addstr(name, w->pw_name + 1);
}
}
}
static int add_mailname_host(struct strbuf *buf)
ident: check /etc/mailname if email is unknown Before falling back to gethostname(), check /etc/mailname if GIT_AUTHOR_EMAIL is not set in the environment or through config files. Only fall back if /etc/mailname cannot be opened or read. The /etc/mailname convention comes from Debian policy section 11.6 ("mail transport, delivery and user agents"), though maybe it could be useful sometimes on other machines, too. The lack of this support was noticed by various people in different ways: - Ian observed that git was choosing the address 'ian@anarres.relativity.greenend.org.uk' rather than 'ian@davenant.greenend.org.uk' as it should have done. - Jonathan noticed that operations like "git commit" were needlessly slow when using a resolver that was slow to handle reverse DNS lookups. Alas, after this patch, if /etc/mailname is set up and the [user] name and email configuration aren't, the committer email will not provide a charming reminder of which machine commits were made on any more. But I think it's worth it. Mechanics: the functionality of reading mailname goes in its own function, so people who care about other distros can easily add an implementation to a similar location without making copy_email() too long and losing clarity. While at it, we split out the fallback default logic that does gethostname(), too (rearranging it a little and adding a check for errors from gethostname while at it). Based on a patch by Gerrit Pape <pape@smarden.org>. Requested-by: Ian Jackson <ijackson@chiark.greenend.org.uk> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Improved-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-10-03 08:16:33 +02:00
{
FILE *mailname;
struct strbuf mailnamebuf = STRBUF_INIT;
ident: check /etc/mailname if email is unknown Before falling back to gethostname(), check /etc/mailname if GIT_AUTHOR_EMAIL is not set in the environment or through config files. Only fall back if /etc/mailname cannot be opened or read. The /etc/mailname convention comes from Debian policy section 11.6 ("mail transport, delivery and user agents"), though maybe it could be useful sometimes on other machines, too. The lack of this support was noticed by various people in different ways: - Ian observed that git was choosing the address 'ian@anarres.relativity.greenend.org.uk' rather than 'ian@davenant.greenend.org.uk' as it should have done. - Jonathan noticed that operations like "git commit" were needlessly slow when using a resolver that was slow to handle reverse DNS lookups. Alas, after this patch, if /etc/mailname is set up and the [user] name and email configuration aren't, the committer email will not provide a charming reminder of which machine commits were made on any more. But I think it's worth it. Mechanics: the functionality of reading mailname goes in its own function, so people who care about other distros can easily add an implementation to a similar location without making copy_email() too long and losing clarity. While at it, we split out the fallback default logic that does gethostname(), too (rearranging it a little and adding a check for errors from gethostname while at it). Based on a patch by Gerrit Pape <pape@smarden.org>. Requested-by: Ian Jackson <ijackson@chiark.greenend.org.uk> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Improved-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-10-03 08:16:33 +02:00
mailname = fopen("/etc/mailname", "r");
if (!mailname) {
if (errno != ENOENT)
warning("cannot open /etc/mailname: %s",
strerror(errno));
return -1;
}
if (strbuf_getline(&mailnamebuf, mailname, '\n') == EOF) {
ident: check /etc/mailname if email is unknown Before falling back to gethostname(), check /etc/mailname if GIT_AUTHOR_EMAIL is not set in the environment or through config files. Only fall back if /etc/mailname cannot be opened or read. The /etc/mailname convention comes from Debian policy section 11.6 ("mail transport, delivery and user agents"), though maybe it could be useful sometimes on other machines, too. The lack of this support was noticed by various people in different ways: - Ian observed that git was choosing the address 'ian@anarres.relativity.greenend.org.uk' rather than 'ian@davenant.greenend.org.uk' as it should have done. - Jonathan noticed that operations like "git commit" were needlessly slow when using a resolver that was slow to handle reverse DNS lookups. Alas, after this patch, if /etc/mailname is set up and the [user] name and email configuration aren't, the committer email will not provide a charming reminder of which machine commits were made on any more. But I think it's worth it. Mechanics: the functionality of reading mailname goes in its own function, so people who care about other distros can easily add an implementation to a similar location without making copy_email() too long and losing clarity. While at it, we split out the fallback default logic that does gethostname(), too (rearranging it a little and adding a check for errors from gethostname while at it). Based on a patch by Gerrit Pape <pape@smarden.org>. Requested-by: Ian Jackson <ijackson@chiark.greenend.org.uk> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Improved-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-10-03 08:16:33 +02:00
if (ferror(mailname))
warning("cannot read /etc/mailname: %s",
strerror(errno));
strbuf_release(&mailnamebuf);
ident: check /etc/mailname if email is unknown Before falling back to gethostname(), check /etc/mailname if GIT_AUTHOR_EMAIL is not set in the environment or through config files. Only fall back if /etc/mailname cannot be opened or read. The /etc/mailname convention comes from Debian policy section 11.6 ("mail transport, delivery and user agents"), though maybe it could be useful sometimes on other machines, too. The lack of this support was noticed by various people in different ways: - Ian observed that git was choosing the address 'ian@anarres.relativity.greenend.org.uk' rather than 'ian@davenant.greenend.org.uk' as it should have done. - Jonathan noticed that operations like "git commit" were needlessly slow when using a resolver that was slow to handle reverse DNS lookups. Alas, after this patch, if /etc/mailname is set up and the [user] name and email configuration aren't, the committer email will not provide a charming reminder of which machine commits were made on any more. But I think it's worth it. Mechanics: the functionality of reading mailname goes in its own function, so people who care about other distros can easily add an implementation to a similar location without making copy_email() too long and losing clarity. While at it, we split out the fallback default logic that does gethostname(), too (rearranging it a little and adding a check for errors from gethostname while at it). Based on a patch by Gerrit Pape <pape@smarden.org>. Requested-by: Ian Jackson <ijackson@chiark.greenend.org.uk> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Improved-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-10-03 08:16:33 +02:00
fclose(mailname);
return -1;
}
/* success! */
strbuf_addbuf(buf, &mailnamebuf);
strbuf_release(&mailnamebuf);
ident: check /etc/mailname if email is unknown Before falling back to gethostname(), check /etc/mailname if GIT_AUTHOR_EMAIL is not set in the environment or through config files. Only fall back if /etc/mailname cannot be opened or read. The /etc/mailname convention comes from Debian policy section 11.6 ("mail transport, delivery and user agents"), though maybe it could be useful sometimes on other machines, too. The lack of this support was noticed by various people in different ways: - Ian observed that git was choosing the address 'ian@anarres.relativity.greenend.org.uk' rather than 'ian@davenant.greenend.org.uk' as it should have done. - Jonathan noticed that operations like "git commit" were needlessly slow when using a resolver that was slow to handle reverse DNS lookups. Alas, after this patch, if /etc/mailname is set up and the [user] name and email configuration aren't, the committer email will not provide a charming reminder of which machine commits were made on any more. But I think it's worth it. Mechanics: the functionality of reading mailname goes in its own function, so people who care about other distros can easily add an implementation to a similar location without making copy_email() too long and losing clarity. While at it, we split out the fallback default logic that does gethostname(), too (rearranging it a little and adding a check for errors from gethostname while at it). Based on a patch by Gerrit Pape <pape@smarden.org>. Requested-by: Ian Jackson <ijackson@chiark.greenend.org.uk> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Improved-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-10-03 08:16:33 +02:00
fclose(mailname);
return 0;
}
static int canonical_name(const char *host, struct strbuf *out)
{
int status = -1;
#ifndef NO_IPV6
struct addrinfo hints, *ai;
memset (&hints, '\0', sizeof (hints));
hints.ai_flags = AI_CANONNAME;
if (!getaddrinfo(host, NULL, &hints, &ai)) {
if (ai && strchr(ai->ai_canonname, '.')) {
strbuf_addstr(out, ai->ai_canonname);
status = 0;
}
freeaddrinfo(ai);
}
#else
struct hostent *he = gethostbyname(host);
if (he && strchr(he->h_name, '.')) {
strbuf_addstr(out, he->h_name);
status = 0;
}
#endif /* NO_IPV6 */
return status;
}
static void add_domainname(struct strbuf *out, int *is_bogus)
ident: check /etc/mailname if email is unknown Before falling back to gethostname(), check /etc/mailname if GIT_AUTHOR_EMAIL is not set in the environment or through config files. Only fall back if /etc/mailname cannot be opened or read. The /etc/mailname convention comes from Debian policy section 11.6 ("mail transport, delivery and user agents"), though maybe it could be useful sometimes on other machines, too. The lack of this support was noticed by various people in different ways: - Ian observed that git was choosing the address 'ian@anarres.relativity.greenend.org.uk' rather than 'ian@davenant.greenend.org.uk' as it should have done. - Jonathan noticed that operations like "git commit" were needlessly slow when using a resolver that was slow to handle reverse DNS lookups. Alas, after this patch, if /etc/mailname is set up and the [user] name and email configuration aren't, the committer email will not provide a charming reminder of which machine commits were made on any more. But I think it's worth it. Mechanics: the functionality of reading mailname goes in its own function, so people who care about other distros can easily add an implementation to a similar location without making copy_email() too long and losing clarity. While at it, we split out the fallback default logic that does gethostname(), too (rearranging it a little and adding a check for errors from gethostname while at it). Based on a patch by Gerrit Pape <pape@smarden.org>. Requested-by: Ian Jackson <ijackson@chiark.greenend.org.uk> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Improved-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-10-03 08:16:33 +02:00
{
char buf[1024];
ident: check /etc/mailname if email is unknown Before falling back to gethostname(), check /etc/mailname if GIT_AUTHOR_EMAIL is not set in the environment or through config files. Only fall back if /etc/mailname cannot be opened or read. The /etc/mailname convention comes from Debian policy section 11.6 ("mail transport, delivery and user agents"), though maybe it could be useful sometimes on other machines, too. The lack of this support was noticed by various people in different ways: - Ian observed that git was choosing the address 'ian@anarres.relativity.greenend.org.uk' rather than 'ian@davenant.greenend.org.uk' as it should have done. - Jonathan noticed that operations like "git commit" were needlessly slow when using a resolver that was slow to handle reverse DNS lookups. Alas, after this patch, if /etc/mailname is set up and the [user] name and email configuration aren't, the committer email will not provide a charming reminder of which machine commits were made on any more. But I think it's worth it. Mechanics: the functionality of reading mailname goes in its own function, so people who care about other distros can easily add an implementation to a similar location without making copy_email() too long and losing clarity. While at it, we split out the fallback default logic that does gethostname(), too (rearranging it a little and adding a check for errors from gethostname while at it). Based on a patch by Gerrit Pape <pape@smarden.org>. Requested-by: Ian Jackson <ijackson@chiark.greenend.org.uk> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Improved-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-10-03 08:16:33 +02:00
if (gethostname(buf, sizeof(buf))) {
ident: check /etc/mailname if email is unknown Before falling back to gethostname(), check /etc/mailname if GIT_AUTHOR_EMAIL is not set in the environment or through config files. Only fall back if /etc/mailname cannot be opened or read. The /etc/mailname convention comes from Debian policy section 11.6 ("mail transport, delivery and user agents"), though maybe it could be useful sometimes on other machines, too. The lack of this support was noticed by various people in different ways: - Ian observed that git was choosing the address 'ian@anarres.relativity.greenend.org.uk' rather than 'ian@davenant.greenend.org.uk' as it should have done. - Jonathan noticed that operations like "git commit" were needlessly slow when using a resolver that was slow to handle reverse DNS lookups. Alas, after this patch, if /etc/mailname is set up and the [user] name and email configuration aren't, the committer email will not provide a charming reminder of which machine commits were made on any more. But I think it's worth it. Mechanics: the functionality of reading mailname goes in its own function, so people who care about other distros can easily add an implementation to a similar location without making copy_email() too long and losing clarity. While at it, we split out the fallback default logic that does gethostname(), too (rearranging it a little and adding a check for errors from gethostname while at it). Based on a patch by Gerrit Pape <pape@smarden.org>. Requested-by: Ian Jackson <ijackson@chiark.greenend.org.uk> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Improved-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-10-03 08:16:33 +02:00
warning("cannot get host name: %s", strerror(errno));
strbuf_addstr(out, "(none)");
*is_bogus = 1;
ident: check /etc/mailname if email is unknown Before falling back to gethostname(), check /etc/mailname if GIT_AUTHOR_EMAIL is not set in the environment or through config files. Only fall back if /etc/mailname cannot be opened or read. The /etc/mailname convention comes from Debian policy section 11.6 ("mail transport, delivery and user agents"), though maybe it could be useful sometimes on other machines, too. The lack of this support was noticed by various people in different ways: - Ian observed that git was choosing the address 'ian@anarres.relativity.greenend.org.uk' rather than 'ian@davenant.greenend.org.uk' as it should have done. - Jonathan noticed that operations like "git commit" were needlessly slow when using a resolver that was slow to handle reverse DNS lookups. Alas, after this patch, if /etc/mailname is set up and the [user] name and email configuration aren't, the committer email will not provide a charming reminder of which machine commits were made on any more. But I think it's worth it. Mechanics: the functionality of reading mailname goes in its own function, so people who care about other distros can easily add an implementation to a similar location without making copy_email() too long and losing clarity. While at it, we split out the fallback default logic that does gethostname(), too (rearranging it a little and adding a check for errors from gethostname while at it). Based on a patch by Gerrit Pape <pape@smarden.org>. Requested-by: Ian Jackson <ijackson@chiark.greenend.org.uk> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Improved-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-10-03 08:16:33 +02:00
return;
}
if (strchr(buf, '.'))
strbuf_addstr(out, buf);
else if (canonical_name(buf, out) < 0) {
strbuf_addf(out, "%s.(none)", buf);
*is_bogus = 1;
}
ident: check /etc/mailname if email is unknown Before falling back to gethostname(), check /etc/mailname if GIT_AUTHOR_EMAIL is not set in the environment or through config files. Only fall back if /etc/mailname cannot be opened or read. The /etc/mailname convention comes from Debian policy section 11.6 ("mail transport, delivery and user agents"), though maybe it could be useful sometimes on other machines, too. The lack of this support was noticed by various people in different ways: - Ian observed that git was choosing the address 'ian@anarres.relativity.greenend.org.uk' rather than 'ian@davenant.greenend.org.uk' as it should have done. - Jonathan noticed that operations like "git commit" were needlessly slow when using a resolver that was slow to handle reverse DNS lookups. Alas, after this patch, if /etc/mailname is set up and the [user] name and email configuration aren't, the committer email will not provide a charming reminder of which machine commits were made on any more. But I think it's worth it. Mechanics: the functionality of reading mailname goes in its own function, so people who care about other distros can easily add an implementation to a similar location without making copy_email() too long and losing clarity. While at it, we split out the fallback default logic that does gethostname(), too (rearranging it a little and adding a check for errors from gethostname while at it). Based on a patch by Gerrit Pape <pape@smarden.org>. Requested-by: Ian Jackson <ijackson@chiark.greenend.org.uk> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Improved-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-10-03 08:16:33 +02:00
}
static void copy_email(const struct passwd *pw, struct strbuf *email,
int *is_bogus)
{
/*
* Make up a fake email address
* (name + '@' + hostname [+ '.' + domainname])
*/
strbuf_addstr(email, pw->pw_name);
strbuf_addch(email, '@');
if (!add_mailname_host(email))
ident: check /etc/mailname if email is unknown Before falling back to gethostname(), check /etc/mailname if GIT_AUTHOR_EMAIL is not set in the environment or through config files. Only fall back if /etc/mailname cannot be opened or read. The /etc/mailname convention comes from Debian policy section 11.6 ("mail transport, delivery and user agents"), though maybe it could be useful sometimes on other machines, too. The lack of this support was noticed by various people in different ways: - Ian observed that git was choosing the address 'ian@anarres.relativity.greenend.org.uk' rather than 'ian@davenant.greenend.org.uk' as it should have done. - Jonathan noticed that operations like "git commit" were needlessly slow when using a resolver that was slow to handle reverse DNS lookups. Alas, after this patch, if /etc/mailname is set up and the [user] name and email configuration aren't, the committer email will not provide a charming reminder of which machine commits were made on any more. But I think it's worth it. Mechanics: the functionality of reading mailname goes in its own function, so people who care about other distros can easily add an implementation to a similar location without making copy_email() too long and losing clarity. While at it, we split out the fallback default logic that does gethostname(), too (rearranging it a little and adding a check for errors from gethostname while at it). Based on a patch by Gerrit Pape <pape@smarden.org>. Requested-by: Ian Jackson <ijackson@chiark.greenend.org.uk> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Improved-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-10-03 08:16:33 +02:00
return; /* read from "/etc/mailname" (Debian) */
add_domainname(email, is_bogus);
}
const char *ident_default_name(void)
{
if (!git_default_name.len) {
copy_gecos(xgetpwuid_self(&default_name_is_bogus), &git_default_name);
strbuf_trim(&git_default_name);
}
return git_default_name.buf;
}
const char *ident_default_email(void)
{
if (!git_default_email.len) {
const char *email = getenv("EMAIL");
if (email && email[0]) {
strbuf_addstr(&git_default_email, email);
committer_ident_explicitly_given |= IDENT_MAIL_GIVEN;
author_ident_explicitly_given |= IDENT_MAIL_GIVEN;
} else
copy_email(xgetpwuid_self(&default_email_is_bogus),
&git_default_email, &default_email_is_bogus);
strbuf_trim(&git_default_email);
}
return git_default_email.buf;
}
static const char *ident_default_date(void)
{
if (!git_default_date.len)
datestamp(&git_default_date);
return git_default_date.buf;
}
static int crud(unsigned char c)
{
return c <= 32 ||
c == '.' ||
c == ',' ||
c == ':' ||
c == ';' ||
c == '<' ||
c == '>' ||
c == '"' ||
c == '\\' ||
c == '\'';
}
/*
* Copy over a string to the destination, but avoid special
* characters ('\n', '<' and '>') and remove crud at the end
*/
static void strbuf_addstr_without_crud(struct strbuf *sb, const char *src)
{
size_t i, len;
unsigned char c;
/* Remove crud from the beginning.. */
while ((c = *src) != 0) {
if (!crud(c))
break;
src++;
}
/* Remove crud from the end.. */
len = strlen(src);
while (len > 0) {
c = src[len-1];
if (!crud(c))
break;
--len;
}
/*
* Copy the rest to the buffer, but avoid the special
* characters '\n' '<' and '>' that act as delimiters on
* an identification line. We can only remove crud, never add it,
* so 'len' is our maximum.
*/
strbuf_grow(sb, len);
for (i = 0; i < len; i++) {
c = *src++;
switch (c) {
case '\n': case '<': case '>':
continue;
}
sb->buf[sb->len++] = c;
}
sb->buf[sb->len] = '\0';
}
/*
* Reverse of fmt_ident(); given an ident line, split the fields
* to allow the caller to parse it.
* Signal a success by returning 0, but date/tz fields of the result
* can still be NULL if the input line only has the name/email part
* (e.g. reading from a reflog entry).
*/
int split_ident_line(struct ident_split *split, const char *line, int len)
{
const char *cp;
size_t span;
int status = -1;
memset(split, 0, sizeof(*split));
split->name_begin = line;
for (cp = line; *cp && cp < line + len; cp++)
if (*cp == '<') {
split->mail_begin = cp + 1;
break;
}
if (!split->mail_begin)
return status;
for (cp = split->mail_begin - 2; line <= cp; cp--)
if (!isspace(*cp)) {
split->name_end = cp + 1;
break;
}
if (!split->name_end) {
/* no human readable name */
split->name_end = split->name_begin;
}
for (cp = split->mail_begin; cp < line + len; cp++)
if (*cp == '>') {
split->mail_end = cp;
break;
}
if (!split->mail_end)
return status;
split_ident: parse timestamp from end of line Split_ident currently parses left to right. Given this input: Your Name <email@example.com> 123456789 -0500\n We assume the name starts the line and runs until the first "<". That starts the email address, which runs until the first ">". Everything after that is assumed to be the timestamp. This works fine in the normal case, but is easily broken by corrupted ident lines that contain an extra ">". Some examples seen in the wild are: 1. Name <email>-<> 123456789 -0500\n 2. Name <email> <Name<email>> 123456789 -0500\n 3. Name1 <email1>, Name2 <email2> 123456789 -0500\n Currently each of these produces some email address (which is not necessarily the one the user intended) and end up with a NULL date (which is generally interpreted as the epoch by "git log" and friends). But in each case we could get the correct timestamp simply by parsing from the right-hand side, looking backwards for the final ">", and then reading the timestamp from there. In general, it's a losing battle to try to automatically guess what the user meant with their broken crud. But this particular workaround is probably worth doing. One, it's dirt simple, and can't impact non-broken cases. Two, it doesn't catch a single breakage we've seen, but rather a large class of errors (i.e., any breakage inside the email angle brackets may affect the email, but won't spill over into the timestamp parsing). And three, the timestamp is arguably more valuable to get right, because it can affect correctness (e.g., in --until cutoffs). This patch implements the right-to-left scheme described above. We adjust the tests in t4212, which generate a commit with such a broken ident, and now gets the timestamp right. We also add a test that fsck continues to detect the breakage. For reference, here are pointers to the breakages seen (as numbered above): [1] http://article.gmane.org/gmane.comp.version-control.git/221441 [2] http://article.gmane.org/gmane.comp.version-control.git/222362 [3] http://perl5.git.perl.org/perl.git/commit/13b79730adea97e660de84bbe67f9d7cbe344302 Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-10-15 00:45:00 +02:00
/*
* Look from the end-of-line to find the trailing ">" of the mail
* address, even though we should already know it as split->mail_end.
* This can help in cases of broken idents with an extra ">" somewhere
* in the email address. Note that we are assuming the timestamp will
* never have a ">" in it.
*
* Note that we will always find some ">" before going off the front of
* the string, because will always hit the split->mail_end closing
* bracket.
*/
for (cp = line + len - 1; *cp != '>'; cp--)
;
for (cp = cp + 1; cp < line + len && isspace(*cp); cp++)
;
if (line + len <= cp)
goto person_only;
split->date_begin = cp;
span = strspn(cp, "0123456789");
if (!span)
goto person_only;
split->date_end = split->date_begin + span;
for (cp = split->date_end; cp < line + len && isspace(*cp); cp++)
;
if (line + len <= cp || (*cp != '+' && *cp != '-'))
goto person_only;
split->tz_begin = cp;
span = strspn(cp + 1, "0123456789");
if (!span)
goto person_only;
split->tz_end = split->tz_begin + 1 + span;
return 0;
person_only:
split->date_begin = NULL;
split->date_end = NULL;
split->tz_begin = NULL;
split->tz_end = NULL;
return 0;
}
static const char *env_hint =
"\n"
"*** Please tell me who you are.\n"
"\n"
"Run\n"
"\n"
" git config --global user.email \"you@example.com\"\n"
" git config --global user.name \"Your Name\"\n"
"\n"
"to set your account\'s default identity.\n"
"Omit --global to set the identity only in this repository.\n"
"\n";
Re-fix "builtin-commit: fix --signoff" An earlier fix to the said commit was incomplete; it mixed up the meaning of the flag parameter passed to the internal fmt_ident() function, so this corrects it. git_author_info() and git_committer_info() can be told to issue a warning when no usable user information is found, and optionally can be told to error out. Operations that actually use the information to record a new commit or a tag will still error out, but the caller to leave reflog record will just silently use bogus user information. Not warning on misconfigured user information while writing a reflog entry is somewhat debatable, but it is probably nicer to the users to silently let it pass, because the only information you are losing is who checked out the branch. * git_author_info() and git_committer_info() used to take 1 (positive int) to error out with a warning on misconfiguration; this is now signalled with a symbolic constant IDENT_ERROR_ON_NO_NAME. * These functions used to take -1 (negative int) to warn but continue; this is now signalled with a symbolic constant IDENT_WARN_ON_NO_NAME. * fmt_ident() function implements the above error reporting behaviour common to git_author_info() and git_committer_info(). A symbolic constant IDENT_NO_DATE can be or'ed in to the flag parameter to make it return only the "Name <email@address.xz>". * fmt_name() is a thin wrapper around fmt_ident() that always passes IDENT_ERROR_ON_NO_NAME and IDENT_NO_DATE. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-12-09 02:32:08 +01:00
const char *fmt_ident(const char *name, const char *email,
const char *date_str, int flag)
{
static struct strbuf ident = STRBUF_INIT;
int strict = (flag & IDENT_STRICT);
int want_date = !(flag & IDENT_NO_DATE);
int want_name = !(flag & IDENT_NO_NAME);
if (want_name && !name)
name = ident_default_name();
if (!email)
email = ident_default_email();
if (want_name && !*name) {
Allow non-developer to clone, checkout and fetch more easily. The code that uses committer_info() in reflog can barf and die whenever it is asked to update a ref. And I do not think calling ignore_missing_committer_name() upfront like recent receive-pack did in the aplication is a reasonable workaround. What the patch does. - git_committer_info() takes one parameter. It used to be "if this is true, then die() if the name is not available due to bad GECOS, otherwise issue a warning once but leave the name empty". The reason was because we wanted to prevent bad commits from being made by git-commit-tree (and its callers). The value 0 is only used by "git var -l". Now it takes -1, 0 or 1. When set to -1, it does not complain but uses the pw->pw_name when name is not available. Existing 0 and 1 values mean the same thing as they used to mean before. 0 means issue warnings and leave it empty, 1 means barf and die. - ignore_missing_committer_name() and its existing caller (receive-pack, to set the reflog) have been removed. - git-format-patch, to come up with the phoney message ID when asked to thread, now passes -1 to git_committer_info(). This codepath uses only the e-mail part, ignoring the name. It used to barf and die. The other call in the same program when asked to add signed-off-by line based on committer identity still passes 1 to make sure it barfs instead of adding a bogus s-o-b line. - log_ref_write in refs.c, to come up with the name to record who initiated the ref update in the reflog, passes -1. It used to barf and die. The last change means that git-update-ref, git-branch, and commit walker backends can now be used in a repository with reflog by somebody who does not have the user identity required to make a commit. They all used to barf and die. I've run tests and all of them seem to pass, and also tried "git clone" as a user whose GECOS is empty -- git clone works again now (it was broken when reflog was enabled by default). But this definitely needs extra sets of eyeballs. Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-01-26 04:05:01 +01:00
struct passwd *pw;
if (strict) {
if (name == git_default_name.buf)
fputs(env_hint, stderr);
die("empty ident name (for <%s>) not allowed", email);
}
pw = xgetpwuid_self(NULL);
ident: don't write fallback username into git_default_name The fmt_ident function gets a flag that tells us whether to die if the name field is blank. If it is blank and we don't die, then we fall back to the username from the passwd file. The current code writes the value into git_default_name. However, that's not necessarily correct, as the empty value might have come from git_default_name, or it might have been passed in. This leads to two potential problems: 1. If we are overriding an empty name in the passed-in value, then we may be overwriting a perfectly good name (from gitconfig or gecos) in the git_default_name buffer. Later calls to fmt_ident will end up using the fallback name, even though a better name was available. 2. If we override an empty gecos name, we end up with the fallback name in git_default_name. A later call that uses IDENT_ERROR_ON_NO_NAME will see the fallback name and think that it is a good name, instead of producing an error. In other words, a blank gecos name would cause an error with this code: git_committer_info(IDENT_ERROR_ON_NO_NAME); but not this: git_committer_info(0); git_committer_info(IDENT_ERROR_ON_NO_NAME); because in the latter case, the first call has polluted the name buffer. Instead, let's make the fallback a per-invocation variable. We can just use the pw->pw_name string directly, since it only needs to persist through the rest of the function (and we don't do any other getpwent calls). Note that while this solves (1) for future invocations of fmt_indent, the current invocation might use the fallback when it could in theory load a better value from git_default_name. However, by not passing IDENT_ERROR_ON_NO_NAME, the caller is indicating that it does not care too much about the name, anyway, so we don't bother; this is primarily about protecting future callers who do care. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-05-22 01:10:14 +02:00
name = pw->pw_name;
}
if (want_name && strict &&
name == git_default_name.buf && default_name_is_bogus) {
fputs(env_hint, stderr);
die("unable to auto-detect name (got '%s')", name);
}
if (strict && email == git_default_email.buf && default_email_is_bogus) {
fputs(env_hint, stderr);
die("unable to auto-detect email address (got '%s')", email);
}
strbuf_reset(&ident);
if (want_name) {
strbuf_addstr_without_crud(&ident, name);
strbuf_addstr(&ident, " <");
}
strbuf_addstr_without_crud(&ident, email);
if (want_name)
strbuf_addch(&ident, '>');
if (want_date) {
strbuf_addch(&ident, ' ');
if (date_str && date_str[0]) {
if (parse_date(date_str, &ident) < 0)
die("invalid date format: %s", date_str);
}
else
strbuf_addstr(&ident, ident_default_date());
}
return ident.buf;
}
const char *fmt_name(const char *name, const char *email)
{
return fmt_ident(name, email, NULL, IDENT_STRICT | IDENT_NO_DATE);
}
Re-fix "builtin-commit: fix --signoff" An earlier fix to the said commit was incomplete; it mixed up the meaning of the flag parameter passed to the internal fmt_ident() function, so this corrects it. git_author_info() and git_committer_info() can be told to issue a warning when no usable user information is found, and optionally can be told to error out. Operations that actually use the information to record a new commit or a tag will still error out, but the caller to leave reflog record will just silently use bogus user information. Not warning on misconfigured user information while writing a reflog entry is somewhat debatable, but it is probably nicer to the users to silently let it pass, because the only information you are losing is who checked out the branch. * git_author_info() and git_committer_info() used to take 1 (positive int) to error out with a warning on misconfiguration; this is now signalled with a symbolic constant IDENT_ERROR_ON_NO_NAME. * These functions used to take -1 (negative int) to warn but continue; this is now signalled with a symbolic constant IDENT_WARN_ON_NO_NAME. * fmt_ident() function implements the above error reporting behaviour common to git_author_info() and git_committer_info(). A symbolic constant IDENT_NO_DATE can be or'ed in to the flag parameter to make it return only the "Name <email@address.xz>". * fmt_name() is a thin wrapper around fmt_ident() that always passes IDENT_ERROR_ON_NO_NAME and IDENT_NO_DATE. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-12-09 02:32:08 +01:00
const char *git_author_info(int flag)
{
if (getenv("GIT_AUTHOR_NAME"))
author_ident_explicitly_given |= IDENT_NAME_GIVEN;
if (getenv("GIT_AUTHOR_EMAIL"))
author_ident_explicitly_given |= IDENT_MAIL_GIVEN;
return fmt_ident(getenv("GIT_AUTHOR_NAME"),
getenv("GIT_AUTHOR_EMAIL"),
getenv("GIT_AUTHOR_DATE"),
Re-fix "builtin-commit: fix --signoff" An earlier fix to the said commit was incomplete; it mixed up the meaning of the flag parameter passed to the internal fmt_ident() function, so this corrects it. git_author_info() and git_committer_info() can be told to issue a warning when no usable user information is found, and optionally can be told to error out. Operations that actually use the information to record a new commit or a tag will still error out, but the caller to leave reflog record will just silently use bogus user information. Not warning on misconfigured user information while writing a reflog entry is somewhat debatable, but it is probably nicer to the users to silently let it pass, because the only information you are losing is who checked out the branch. * git_author_info() and git_committer_info() used to take 1 (positive int) to error out with a warning on misconfiguration; this is now signalled with a symbolic constant IDENT_ERROR_ON_NO_NAME. * These functions used to take -1 (negative int) to warn but continue; this is now signalled with a symbolic constant IDENT_WARN_ON_NO_NAME. * fmt_ident() function implements the above error reporting behaviour common to git_author_info() and git_committer_info(). A symbolic constant IDENT_NO_DATE can be or'ed in to the flag parameter to make it return only the "Name <email@address.xz>". * fmt_name() is a thin wrapper around fmt_ident() that always passes IDENT_ERROR_ON_NO_NAME and IDENT_NO_DATE. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-12-09 02:32:08 +01:00
flag);
}
Re-fix "builtin-commit: fix --signoff" An earlier fix to the said commit was incomplete; it mixed up the meaning of the flag parameter passed to the internal fmt_ident() function, so this corrects it. git_author_info() and git_committer_info() can be told to issue a warning when no usable user information is found, and optionally can be told to error out. Operations that actually use the information to record a new commit or a tag will still error out, but the caller to leave reflog record will just silently use bogus user information. Not warning on misconfigured user information while writing a reflog entry is somewhat debatable, but it is probably nicer to the users to silently let it pass, because the only information you are losing is who checked out the branch. * git_author_info() and git_committer_info() used to take 1 (positive int) to error out with a warning on misconfiguration; this is now signalled with a symbolic constant IDENT_ERROR_ON_NO_NAME. * These functions used to take -1 (negative int) to warn but continue; this is now signalled with a symbolic constant IDENT_WARN_ON_NO_NAME. * fmt_ident() function implements the above error reporting behaviour common to git_author_info() and git_committer_info(). A symbolic constant IDENT_NO_DATE can be or'ed in to the flag parameter to make it return only the "Name <email@address.xz>". * fmt_name() is a thin wrapper around fmt_ident() that always passes IDENT_ERROR_ON_NO_NAME and IDENT_NO_DATE. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-12-09 02:32:08 +01:00
const char *git_committer_info(int flag)
{
if (getenv("GIT_COMMITTER_NAME"))
committer_ident_explicitly_given |= IDENT_NAME_GIVEN;
if (getenv("GIT_COMMITTER_EMAIL"))
committer_ident_explicitly_given |= IDENT_MAIL_GIVEN;
return fmt_ident(getenv("GIT_COMMITTER_NAME"),
getenv("GIT_COMMITTER_EMAIL"),
getenv("GIT_COMMITTER_DATE"),
Re-fix "builtin-commit: fix --signoff" An earlier fix to the said commit was incomplete; it mixed up the meaning of the flag parameter passed to the internal fmt_ident() function, so this corrects it. git_author_info() and git_committer_info() can be told to issue a warning when no usable user information is found, and optionally can be told to error out. Operations that actually use the information to record a new commit or a tag will still error out, but the caller to leave reflog record will just silently use bogus user information. Not warning on misconfigured user information while writing a reflog entry is somewhat debatable, but it is probably nicer to the users to silently let it pass, because the only information you are losing is who checked out the branch. * git_author_info() and git_committer_info() used to take 1 (positive int) to error out with a warning on misconfiguration; this is now signalled with a symbolic constant IDENT_ERROR_ON_NO_NAME. * These functions used to take -1 (negative int) to warn but continue; this is now signalled with a symbolic constant IDENT_WARN_ON_NO_NAME. * fmt_ident() function implements the above error reporting behaviour common to git_author_info() and git_committer_info(). A symbolic constant IDENT_NO_DATE can be or'ed in to the flag parameter to make it return only the "Name <email@address.xz>". * fmt_name() is a thin wrapper around fmt_ident() that always passes IDENT_ERROR_ON_NO_NAME and IDENT_NO_DATE. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-12-09 02:32:08 +01:00
flag);
}
static int ident_is_sufficient(int user_ident_explicitly_given)
{
#ifndef WINDOWS
return (user_ident_explicitly_given & IDENT_MAIL_GIVEN);
#else
return (user_ident_explicitly_given == IDENT_ALL_GIVEN);
#endif
}
int committer_ident_sufficiently_given(void)
{
return ident_is_sufficient(committer_ident_explicitly_given);
}
int author_ident_sufficiently_given(void)
{
return ident_is_sufficient(author_ident_explicitly_given);
}
int git_ident_config(const char *var, const char *value, void *data)
{
if (!strcmp(var, "user.name")) {
if (!value)
return config_error_nonbool(var);
strbuf_reset(&git_default_name);
strbuf_addstr(&git_default_name, value);
committer_ident_explicitly_given |= IDENT_NAME_GIVEN;
author_ident_explicitly_given |= IDENT_NAME_GIVEN;
return 0;
}
if (!strcmp(var, "user.email")) {
if (!value)
return config_error_nonbool(var);
strbuf_reset(&git_default_email);
strbuf_addstr(&git_default_email, value);
committer_ident_explicitly_given |= IDENT_MAIL_GIVEN;
author_ident_explicitly_given |= IDENT_MAIL_GIVEN;
return 0;
}
return 0;
}
static int buf_cmp(const char *a_begin, const char *a_end,
const char *b_begin, const char *b_end)
{
int a_len = a_end - a_begin;
int b_len = b_end - b_begin;
int min = a_len < b_len ? a_len : b_len;
int cmp;
cmp = memcmp(a_begin, b_begin, min);
if (cmp)
return cmp;
return a_len - b_len;
}
int ident_cmp(const struct ident_split *a,
const struct ident_split *b)
{
int cmp;
cmp = buf_cmp(a->mail_begin, a->mail_end,
b->mail_begin, b->mail_end);
if (cmp)
return cmp;
return buf_cmp(a->name_begin, a->name_end,
b->name_begin, b->name_end);
}