git-commit-vandalism/tools/mailinfo.c
Junio C Hamano 1d8fa411e0 [PATCH] mailinfo: handle folded header.
Some people split their long E-mail address over two lines
using the RFC2822 header "folding".  We can lose authorship
information this way, so make a minimum effort to deal with it,
instead of special casing only the "Subject:" field.

We could teach mailsplit to unfold the folded header, but
teaching mailinfo about folding would make more sense; a single
message can be fed to mailinfo without going through mailsplit.

Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-07-23 11:13:11 -07:00

269 lines
4.6 KiB
C

/*
* Another stupid program, this one parsing the headers of an
* email to figure out authorship and subject
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
static FILE *cmitmsg, *patchfile;
static char line[1000];
static char date[1000];
static char name[1000];
static char email[1000];
static char subject[1000];
static char *sanity_check(char *name, char *email)
{
int len = strlen(name);
if (len < 3 || len > 60)
return email;
if (strchr(name, '@') || strchr(name, '<') || strchr(name, '>'))
return email;
return name;
}
static int handle_from(char *line)
{
char *at = strchr(line, '@');
char *dst;
if (!at)
return 0;
/*
* If we already have one email, don't take any confusing lines
*/
if (*email && strchr(at+1, '@'))
return 0;
while (at > line) {
char c = at[-1];
if (isspace(c) || c == '<')
break;
at--;
}
dst = email;
for (;;) {
unsigned char c = *at;
if (!c || c == '>' || isspace(c))
break;
*at++ = ' ';
*dst++ = c;
}
*dst++ = 0;
at = line + strlen(line);
while (at > line) {
unsigned char c = *--at;
if (isalnum(c))
break;
*at = 0;
}
at = line;
for (;;) {
unsigned char c = *at;
if (!c)
break;
if (isalnum(c))
break;
at++;
}
at = sanity_check(at, email);
strcpy(name, at);
return 1;
}
static void handle_date(char *line)
{
strcpy(date, line);
}
static void handle_subject(char *line)
{
strcpy(subject, line);
}
static void check_line(char *line, int len)
{
if (!memcmp(line, "From:", 5) && isspace(line[5]))
handle_from(line+6);
else if (!memcmp(line, "Date:", 5) && isspace(line[5]))
handle_date(line+6);
else if (!memcmp(line, "Subject:", 8) && isspace(line[8]))
handle_subject(line+9);
}
static char * cleanup_subject(char *subject)
{
for (;;) {
char *p;
int len, remove;
switch (*subject) {
case 'r': case 'R':
if (!memcmp("e:", subject+1, 2)) {
subject +=3;
continue;
}
break;
case ' ': case '\t': case ':':
subject++;
continue;
case '[':
p = strchr(subject, ']');
if (!p) {
subject++;
continue;
}
len = strlen(p);
remove = p - subject;
if (remove <= len *2) {
subject = p+1;
continue;
}
break;
}
return subject;
}
}
static void cleanup_space(char *buf)
{
unsigned char c;
while ((c = *buf) != 0) {
buf++;
if (isspace(c)) {
buf[-1] = ' ';
c = *buf;
while (isspace(c)) {
int len = strlen(buf);
memmove(buf, buf+1, len);
c = *buf;
}
}
}
}
static void handle_rest(void)
{
char *sub = cleanup_subject(subject);
cleanup_space(name);
cleanup_space(date);
cleanup_space(email);
cleanup_space(sub);
printf("Author: %s\nEmail: %s\nSubject: %s\nDate: %s\n\n", name, email, sub, date);
FILE *out = cmitmsg;
do {
if (!memcmp("diff -", line, 6) ||
!memcmp("---", line, 3) ||
!memcmp("Index: ", line, 7))
out = patchfile;
fputs(line, out);
} while (fgets(line, sizeof(line), stdin) != NULL);
if (out == cmitmsg) {
fprintf(stderr, "No patch found\n");
exit(1);
}
fclose(cmitmsg);
fclose(patchfile);
}
static int eatspace(char *line)
{
int len = strlen(line);
while (len > 0 && isspace(line[len-1]))
line[--len] = 0;
return len;
}
static void handle_body(void)
{
int has_from = 0;
int has_date = 0;
/* First lines of body can have From: and Date: */
while (fgets(line, sizeof(line), stdin) != NULL) {
int len = eatspace(line);
if (!len)
continue;
if (!memcmp("From:", line, 5) && isspace(line[5])) {
if (!has_from && handle_from(line+6)) {
has_from = 1;
continue;
}
}
if (!memcmp("Date:", line, 5) && isspace(line[5])) {
if (!has_date) {
handle_date(line+6);
has_date = 1;
continue;
}
}
line[len] = '\n';
handle_rest();
break;
}
}
static int read_one_header_line(char *line, int sz, FILE *in)
{
int ofs = 0;
while (ofs < sz) {
int peek, len;
if (fgets(line + ofs, sz - ofs, in) == NULL)
return ofs;
len = eatspace(line + ofs);
if (len == 0)
return ofs;
peek = fgetc(in); ungetc(peek, in);
if (peek == ' ' || peek == '\t') {
/* Yuck, 2822 header "folding" */
ofs += len;
continue;
}
return ofs + len;
}
return ofs;
}
static void usage(void)
{
fprintf(stderr, "mailinfo msg-file patch-file < email\n");
exit(1);
}
int main(int argc, char ** argv)
{
if (argc != 3)
usage();
cmitmsg = fopen(argv[1], "w");
if (!cmitmsg) {
perror(argv[1]);
exit(1);
}
patchfile = fopen(argv[2], "w");
if (!patchfile) {
perror(argv[2]);
exit(1);
}
while (1) {
int len = read_one_header_line(line, sizeof(line), stdin);
if (!len) {
handle_body();
break;
}
check_line(line, len);
}
return 0;
}