Merge branch 'cn/bom-in-gitignore'
Teach the codepaths that read .gitignore and .gitattributes files that these files encoded in UTF-8 may have UTF-8 BOM marker at the beginning; this makes it in line with what we do for configuration files already. * cn/bom-in-gitignore: attr: skip UTF8 BOM at the beginning of the input file config: use utf8_bom[] from utf.[ch] in git_parse_source() utf8-bom: introduce skip_utf8_bom() helper add_excludes_from_file: clarify the bom skipping logic dir: allow a BOM at the beginning of exclude files
This commit is contained in:
commit
2e1dfd62dc
9
attr.c
9
attr.c
@ -12,6 +12,7 @@
|
|||||||
#include "exec_cmd.h"
|
#include "exec_cmd.h"
|
||||||
#include "attr.h"
|
#include "attr.h"
|
||||||
#include "dir.h"
|
#include "dir.h"
|
||||||
|
#include "utf8.h"
|
||||||
|
|
||||||
const char git_attr__true[] = "(builtin)true";
|
const char git_attr__true[] = "(builtin)true";
|
||||||
const char git_attr__false[] = "\0(builtin)false";
|
const char git_attr__false[] = "\0(builtin)false";
|
||||||
@ -379,8 +380,12 @@ static struct attr_stack *read_attr_from_file(const char *path, int macro_ok)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
res = xcalloc(1, sizeof(*res));
|
res = xcalloc(1, sizeof(*res));
|
||||||
while (fgets(buf, sizeof(buf), fp))
|
while (fgets(buf, sizeof(buf), fp)) {
|
||||||
handle_attr_line(res, buf, path, ++lineno, macro_ok);
|
char *bufp = buf;
|
||||||
|
if (!lineno)
|
||||||
|
skip_utf8_bom(&bufp, strlen(bufp));
|
||||||
|
handle_attr_line(res, bufp, path, ++lineno, macro_ok);
|
||||||
|
}
|
||||||
fclose(fp);
|
fclose(fp);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
6
config.c
6
config.c
@ -12,6 +12,7 @@
|
|||||||
#include "quote.h"
|
#include "quote.h"
|
||||||
#include "hashmap.h"
|
#include "hashmap.h"
|
||||||
#include "string-list.h"
|
#include "string-list.h"
|
||||||
|
#include "utf8.h"
|
||||||
|
|
||||||
struct config_source {
|
struct config_source {
|
||||||
struct config_source *prev;
|
struct config_source *prev;
|
||||||
@ -417,8 +418,7 @@ static int git_parse_source(config_fn_t fn, void *data)
|
|||||||
struct strbuf *var = &cf->var;
|
struct strbuf *var = &cf->var;
|
||||||
|
|
||||||
/* U+FEFF Byte Order Mark in UTF8 */
|
/* U+FEFF Byte Order Mark in UTF8 */
|
||||||
static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf";
|
const char *bomptr = utf8_bom;
|
||||||
const unsigned char *bomptr = utf8_bom;
|
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
int c = get_next_char();
|
int c = get_next_char();
|
||||||
@ -426,7 +426,7 @@ static int git_parse_source(config_fn_t fn, void *data)
|
|||||||
/* We are at the file beginning; skip UTF8-encoded BOM
|
/* We are at the file beginning; skip UTF8-encoded BOM
|
||||||
* if present. Sane editors won't put this in on their
|
* if present. Sane editors won't put this in on their
|
||||||
* own, but e.g. Windows Notepad will do it happily. */
|
* own, but e.g. Windows Notepad will do it happily. */
|
||||||
if ((unsigned char) c == *bomptr) {
|
if (c == (*bomptr & 0377)) {
|
||||||
bomptr++;
|
bomptr++;
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
|
6
dir.c
6
dir.c
@ -12,6 +12,7 @@
|
|||||||
#include "refs.h"
|
#include "refs.h"
|
||||||
#include "wildmatch.h"
|
#include "wildmatch.h"
|
||||||
#include "pathspec.h"
|
#include "pathspec.h"
|
||||||
|
#include "utf8.h"
|
||||||
|
|
||||||
struct path_simplify {
|
struct path_simplify {
|
||||||
int len;
|
int len;
|
||||||
@ -617,7 +618,12 @@ int add_excludes_from_file_to_list(const char *fname,
|
|||||||
}
|
}
|
||||||
|
|
||||||
el->filebuf = buf;
|
el->filebuf = buf;
|
||||||
|
|
||||||
|
if (skip_utf8_bom(&buf, size))
|
||||||
|
size -= buf - el->filebuf;
|
||||||
|
|
||||||
entry = buf;
|
entry = buf;
|
||||||
|
|
||||||
for (i = 0; i < size; i++) {
|
for (i = 0; i < size; i++) {
|
||||||
if (buf[i] == '\n') {
|
if (buf[i] == '\n') {
|
||||||
if (entry != buf + i && entry[0] != '#') {
|
if (entry != buf + i && entry[0] != '#') {
|
||||||
|
@ -20,6 +20,15 @@ test_expect_success 'status untracked directory with --ignored' '
|
|||||||
test_cmp expected actual
|
test_cmp expected actual
|
||||||
'
|
'
|
||||||
|
|
||||||
|
test_expect_success 'same with gitignore starting with BOM' '
|
||||||
|
printf "\357\273\277ignored\n" >.gitignore &&
|
||||||
|
mkdir -p untracked &&
|
||||||
|
: >untracked/ignored &&
|
||||||
|
: >untracked/uncommitted &&
|
||||||
|
git status --porcelain --ignored >actual &&
|
||||||
|
test_cmp expected actual
|
||||||
|
'
|
||||||
|
|
||||||
cat >expected <<\EOF
|
cat >expected <<\EOF
|
||||||
?? .gitignore
|
?? .gitignore
|
||||||
?? actual
|
?? actual
|
||||||
|
11
utf8.c
11
utf8.c
@ -633,3 +633,14 @@ int is_hfs_dotgit(const char *path)
|
|||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char utf8_bom[] = "\357\273\277";
|
||||||
|
|
||||||
|
int skip_utf8_bom(char **text, size_t len)
|
||||||
|
{
|
||||||
|
if (len < strlen(utf8_bom) ||
|
||||||
|
memcmp(*text, utf8_bom, strlen(utf8_bom)))
|
||||||
|
return 0;
|
||||||
|
*text += strlen(utf8_bom);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
3
utf8.h
3
utf8.h
@ -13,6 +13,9 @@ int same_encoding(const char *, const char *);
|
|||||||
__attribute__((format (printf, 2, 3)))
|
__attribute__((format (printf, 2, 3)))
|
||||||
int utf8_fprintf(FILE *, const char *, ...);
|
int utf8_fprintf(FILE *, const char *, ...);
|
||||||
|
|
||||||
|
extern const char utf8_bom[];
|
||||||
|
extern int skip_utf8_bom(char **, size_t);
|
||||||
|
|
||||||
void strbuf_add_wrapped_text(struct strbuf *buf,
|
void strbuf_add_wrapped_text(struct strbuf *buf,
|
||||||
const char *text, int indent, int indent2, int width);
|
const char *text, int indent, int indent2, int width);
|
||||||
void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len,
|
void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len,
|
||||||
|
Loading…
Reference in New Issue
Block a user