Merge branch 'tb/ls-files-eol'

"git ls-files" learned a new "--eol" option to help diagnose
end-of-line problems.

* tb/ls-files-eol:
  ls-files: add eol diagnostics
This commit is contained in:
Junio C Hamano 2016-02-03 14:15:59 -08:00
commit 05f1539b7f
5 changed files with 237 additions and 49 deletions

View File

@ -12,6 +12,7 @@ SYNOPSIS
'git ls-files' [-z] [-t] [-v] 'git ls-files' [-z] [-t] [-v]
(--[cached|deleted|others|ignored|stage|unmerged|killed|modified])* (--[cached|deleted|others|ignored|stage|unmerged|killed|modified])*
(-[c|d|o|i|s|u|k|m])* (-[c|d|o|i|s|u|k|m])*
[--eol]
[-x <pattern>|--exclude=<pattern>] [-x <pattern>|--exclude=<pattern>]
[-X <file>|--exclude-from=<file>] [-X <file>|--exclude-from=<file>]
[--exclude-per-directory=<file>] [--exclude-per-directory=<file>]
@ -147,6 +148,24 @@ a space) at the start of each line:
possible for manual inspection; the exact format may change at possible for manual inspection; the exact format may change at
any time. any time.
--eol::
Show <eolinfo> and <eolattr> of files.
<eolinfo> is the file content identification used by Git when
the "text" attribute is "auto" (or not set and core.autocrlf is not false).
<eolinfo> is either "-text", "none", "lf", "crlf", "mixed" or "".
+
"" means the file is not a regular file, it is not in the index or
not accessable in the working tree.
+
<eolattr> is the attribute that is used when checking out or committing,
it is either "", "-text", "text", "text=auto", "text eol=lf", "text eol=crlf".
Note: Currently Git does not support "text=auto eol=lf" or "text=auto eol=crlf",
that may change in the future.
+
Both the <eolinfo> in the index ("i/<eolinfo>")
and in the working tree ("w/<eolinfo>") are shown for regular files,
followed by the ("attr/<eolattr>").
\--:: \--::
Do not interpret any more arguments as options. Do not interpret any more arguments as options.
@ -161,6 +180,9 @@ which case it outputs:
[<tag> ]<mode> <object> <stage> <file> [<tag> ]<mode> <object> <stage> <file>
'git ls-files --eol' will show
i/<eolinfo><SPACES>w/<eolinfo><SPACES>attr/<eolattr><SPACE*><TAB><file>
'git ls-files --unmerged' and 'git ls-files --stage' can be used to examine 'git ls-files --unmerged' and 'git ls-files --stage' can be used to examine
detailed information on unmerged paths. detailed information on unmerged paths.

View File

@ -27,6 +27,7 @@ static int show_killed;
static int show_valid_bit; static int show_valid_bit;
static int line_terminator = '\n'; static int line_terminator = '\n';
static int debug_mode; static int debug_mode;
static int show_eol;
static const char *prefix; static const char *prefix;
static int max_prefix_len; static int max_prefix_len;
@ -47,6 +48,23 @@ static const char *tag_modified = "";
static const char *tag_skip_worktree = ""; static const char *tag_skip_worktree = "";
static const char *tag_resolve_undo = ""; static const char *tag_resolve_undo = "";
static void write_eolinfo(const struct cache_entry *ce, const char *path)
{
if (!show_eol)
return;
else {
struct stat st;
const char *i_txt = "";
const char *w_txt = "";
const char *a_txt = get_convert_attr_ascii(path);
if (ce && S_ISREG(ce->ce_mode))
i_txt = get_cached_convert_stats_ascii(ce->name);
if (!lstat(path, &st) && S_ISREG(st.st_mode))
w_txt = get_wt_convert_stats_ascii(path);
printf("i/%-5s w/%-5s attr/%-17s\t", i_txt, w_txt, a_txt);
}
}
static void write_name(const char *name) static void write_name(const char *name)
{ {
/* /*
@ -68,6 +86,7 @@ static void show_dir_entry(const char *tag, struct dir_entry *ent)
return; return;
fputs(tag, stdout); fputs(tag, stdout);
write_eolinfo(NULL, ent->name);
write_name(ent->name); write_name(ent->name);
} }
@ -170,6 +189,7 @@ static void show_ce_entry(const char *tag, const struct cache_entry *ce)
find_unique_abbrev(ce->sha1,abbrev), find_unique_abbrev(ce->sha1,abbrev),
ce_stage(ce)); ce_stage(ce));
} }
write_eolinfo(ce, ce->name);
write_name(ce->name); write_name(ce->name);
if (debug_mode) { if (debug_mode) {
const struct stat_data *sd = &ce->ce_stat_data; const struct stat_data *sd = &ce->ce_stat_data;
@ -433,6 +453,7 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
OPT_BIT(0, "directory", &dir.flags, OPT_BIT(0, "directory", &dir.flags,
N_("show 'other' directories' names only"), N_("show 'other' directories' names only"),
DIR_SHOW_OTHER_DIRECTORIES), DIR_SHOW_OTHER_DIRECTORIES),
OPT_BOOL(0, "eol", &show_eol, N_("show line endings of files")),
OPT_NEGBIT(0, "empty-directory", &dir.flags, OPT_NEGBIT(0, "empty-directory", &dir.flags,
N_("don't show empty directories"), N_("don't show empty directories"),
DIR_HIDE_EMPTY_DIRECTORIES), DIR_HIDE_EMPTY_DIRECTORIES),

119
convert.c
View File

@ -13,6 +13,11 @@
* translation when the "text" attribute or "auto_crlf" option is set. * translation when the "text" attribute or "auto_crlf" option is set.
*/ */
/* Stat bits: When BIN is set, the txt bits are unset */
#define CONVERT_STAT_BITS_TXT_LF 0x1
#define CONVERT_STAT_BITS_TXT_CRLF 0x2
#define CONVERT_STAT_BITS_BIN 0x4
enum crlf_action { enum crlf_action {
CRLF_GUESS = -1, CRLF_GUESS = -1,
CRLF_BINARY = 0, CRLF_BINARY = 0,
@ -75,26 +80,75 @@ static void gather_stats(const char *buf, unsigned long size, struct text_stat *
/* /*
* The same heuristics as diff.c::mmfile_is_binary() * The same heuristics as diff.c::mmfile_is_binary()
* We treat files with bare CR as binary
*/ */
static int is_binary(unsigned long size, struct text_stat *stats) static int convert_is_binary(unsigned long size, const struct text_stat *stats)
{ {
if (stats->cr != stats->crlf)
return 1;
if (stats->nul) if (stats->nul)
return 1; return 1;
if ((stats->printable >> 7) < stats->nonprintable) if ((stats->printable >> 7) < stats->nonprintable)
return 1; return 1;
/*
* Other heuristics? Average line length might be relevant,
* as might LF vs CR vs CRLF counts..
*
* NOTE! It might be normal to have a low ratio of CRLF to LF
* (somebody starts with a LF-only file and edits it with an editor
* that adds CRLF only to lines that are added..). But do we
* want to support CR-only? Probably not.
*/
return 0; return 0;
} }
static unsigned int gather_convert_stats(const char *data, unsigned long size)
{
struct text_stat stats;
if (!data || !size)
return 0;
gather_stats(data, size, &stats);
if (convert_is_binary(size, &stats))
return CONVERT_STAT_BITS_BIN;
else if (stats.crlf && stats.crlf == stats.lf)
return CONVERT_STAT_BITS_TXT_CRLF;
else if (stats.crlf && stats.lf)
return CONVERT_STAT_BITS_TXT_CRLF | CONVERT_STAT_BITS_TXT_LF;
else if (stats.lf)
return CONVERT_STAT_BITS_TXT_LF;
else
return 0;
}
static const char *gather_convert_stats_ascii(const char *data, unsigned long size)
{
unsigned int convert_stats = gather_convert_stats(data, size);
if (convert_stats & CONVERT_STAT_BITS_BIN)
return "-text";
switch (convert_stats) {
case CONVERT_STAT_BITS_TXT_LF:
return "lf";
case CONVERT_STAT_BITS_TXT_CRLF:
return "crlf";
case CONVERT_STAT_BITS_TXT_LF | CONVERT_STAT_BITS_TXT_CRLF:
return "mixed";
default:
return "none";
}
}
const char *get_cached_convert_stats_ascii(const char *path)
{
const char *ret;
unsigned long sz;
void *data = read_blob_data_from_cache(path, &sz);
ret = gather_convert_stats_ascii(data, sz);
free(data);
return ret;
}
const char *get_wt_convert_stats_ascii(const char *path)
{
const char *ret = "";
struct strbuf sb = STRBUF_INIT;
if (strbuf_read_file(&sb, path, 0) >= 0)
ret = gather_convert_stats_ascii(sb.buf, sb.len);
strbuf_release(&sb);
return ret;
}
static enum eol output_eol(enum crlf_action crlf_action) static enum eol output_eol(enum crlf_action crlf_action)
{ {
switch (crlf_action) { switch (crlf_action) {
@ -187,18 +241,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
gather_stats(src, len, &stats); gather_stats(src, len, &stats);
if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) { if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) {
/* if (convert_is_binary(len, &stats))
* We're currently not going to even try to convert stuff
* that has bare CR characters. Does anybody do that crazy
* stuff?
*/
if (stats.cr != stats.crlf)
return 0;
/*
* And add some heuristics for binary vs text, of course...
*/
if (is_binary(len, &stats))
return 0; return 0;
if (crlf_action == CRLF_GUESS) { if (crlf_action == CRLF_GUESS) {
@ -277,11 +320,7 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len,
return 0; return 0;
} }
/* If we have any bare CR characters, we're not going to touch it */ if (convert_is_binary(len, &stats))
if (stats.cr != stats.crlf)
return 0;
if (is_binary(len, &stats))
return 0; return 0;
} }
@ -777,6 +816,30 @@ int would_convert_to_git_filter_fd(const char *path)
return apply_filter(path, NULL, 0, -1, NULL, ca.drv->clean); return apply_filter(path, NULL, 0, -1, NULL, ca.drv->clean);
} }
const char *get_convert_attr_ascii(const char *path)
{
struct conv_attrs ca;
enum crlf_action crlf_action;
convert_attrs(&ca, path);
crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
switch (crlf_action) {
case CRLF_GUESS:
return "";
case CRLF_BINARY:
return "-text";
case CRLF_TEXT:
return "text";
case CRLF_INPUT:
return "text eol=lf";
case CRLF_CRLF:
return "text=auto eol=crlf";
case CRLF_AUTO:
return "text=auto";
}
return "";
}
int convert_to_git(const char *path, const char *src, size_t len, int convert_to_git(const char *path, const char *src, size_t len,
struct strbuf *dst, enum safe_crlf checksafe) struct strbuf *dst, enum safe_crlf checksafe)
{ {

View File

@ -32,6 +32,9 @@ enum eol {
}; };
extern enum eol core_eol; extern enum eol core_eol;
extern const char *get_cached_convert_stats_ascii(const char *path);
extern const char *get_wt_convert_stats_ascii(const char *path);
extern const char *get_convert_attr_ascii(const char *path);
/* returns 1 if *dst was used */ /* returns 1 if *dst was used */
extern int convert_to_git(const char *path, const char *src, size_t len, extern int convert_to_git(const char *path, const char *src, size_t len,

View File

@ -56,21 +56,16 @@ create_gitattributes () {
} }
create_NNO_files () { create_NNO_files () {
lfname=$1
crlfname=$2
lfmixcrlf=$3
lfmixcr=$4
crlfnul=$5
for crlf in false true input for crlf in false true input
do do
for attr in "" auto text -text lf crlf for attr in "" auto text -text lf crlf
do do
pfx=NNO_${crlf}_attr_${attr} && pfx=NNO_${crlf}_attr_${attr} &&
cp $lfname ${pfx}_LF.txt && cp CRLF_mix_LF ${pfx}_LF.txt &&
cp $crlfname ${pfx}_CRLF.txt && cp CRLF_mix_LF ${pfx}_CRLF.txt &&
cp $lfmixcrlf ${pfx}_CRLF_mix_LF.txt && cp CRLF_mix_LF ${pfx}_CRLF_mix_LF.txt &&
cp $lfmixcr ${pfx}_LF_mix_CR.txt && cp CRLF_mix_LF ${pfx}_LF_mix_CR.txt &&
cp $crlfnul ${pfx}_CRLF_nul.txt cp CRLF_mix_LF ${pfx}_CRLF_nul.txt
done done
done done
} }
@ -96,7 +91,7 @@ commit_check_warn () {
crlfnul=$7 crlfnul=$7
pfx=crlf_${crlf}_attr_${attr} pfx=crlf_${crlf}_attr_${attr}
create_gitattributes "$attr" && create_gitattributes "$attr" &&
for f in LF CRLF repoMIX LF_mix_CR CRLF_mix_LF LF_nul CRLF_nul for f in LF CRLF LF_mix_CR CRLF_mix_LF LF_nul CRLF_nul
do do
fname=${pfx}_$f.txt && fname=${pfx}_$f.txt &&
cp $f $fname && cp $f $fname &&
@ -149,6 +144,27 @@ commit_chk_wrnNNO () {
' '
} }
stats_ascii () {
case "$1" in
LF)
echo lf
;;
CRLF)
echo crlf
;;
CRLF_mix_LF)
echo mixed
;;
LF_mix_CR|CRLF_nul|LF_nul|CRLF_mix_CR)
echo "-text"
;;
*)
echo error_invalid $1
;;
esac
}
check_files_in_repo () { check_files_in_repo () {
crlf=$1 crlf=$1
attr=$2 attr=$2
@ -203,35 +219,83 @@ checkout_files () {
create_gitattributes $attr && create_gitattributes $attr &&
git config core.autocrlf $crlf && git config core.autocrlf $crlf &&
pfx=eol_${eol}_crlf_${crlf}_attr_${attr}_ && pfx=eol_${eol}_crlf_${crlf}_attr_${attr}_ &&
src=crlf_false_attr__ &&
for f in LF CRLF LF_mix_CR CRLF_mix_LF LF_nul for f in LF CRLF LF_mix_CR CRLF_mix_LF LF_nul
do do
rm $src$f.txt && rm crlf_false_attr__$f.txt &&
if test -z "$eol"; then if test -z "$eol"; then
git checkout $src$f.txt git checkout crlf_false_attr__$f.txt
else else
git -c core.eol=$eol checkout $src$f.txt git -c core.eol=$eol checkout crlf_false_attr__$f.txt
fi fi
done done
test_expect_success "ls-files --eol $lfname ${pfx}LF.txt" '
test_when_finished "rm expect actual" &&
sort <<-EOF >expect &&
i/crlf w/$(stats_ascii $crlfname) crlf_false_attr__CRLF.txt
i/mixed w/$(stats_ascii $lfmixcrlf) crlf_false_attr__CRLF_mix_LF.txt
i/lf w/$(stats_ascii $lfname) crlf_false_attr__LF.txt
i/-text w/$(stats_ascii $lfmixcr) crlf_false_attr__LF_mix_CR.txt
i/-text w/$(stats_ascii $crlfnul) crlf_false_attr__CRLF_nul.txt
i/-text w/$(stats_ascii $crlfnul) crlf_false_attr__LF_nul.txt
EOF
git ls-files --eol crlf_false_attr__* |
sed -e "s!attr/[^ ]*!!g" -e "s/ / /g" -e "s/ */ /g" |
sort >actual &&
test_cmp expect actual
'
test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=LF" " test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=LF" "
compare_ws_file $pfx $lfname ${src}LF.txt compare_ws_file $pfx $lfname crlf_false_attr__LF.txt
" "
test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=CRLF" " test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=CRLF" "
compare_ws_file $pfx $crlfname ${src}CRLF.txt compare_ws_file $pfx $crlfname crlf_false_attr__CRLF.txt
" "
test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=CRLF_mix_LF" " test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=CRLF_mix_LF" "
compare_ws_file $pfx $lfmixcrlf ${src}CRLF_mix_LF.txt compare_ws_file $pfx $lfmixcrlf crlf_false_attr__CRLF_mix_LF.txt
" "
test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=LF_mix_CR" " test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=LF_mix_CR" "
compare_ws_file $pfx $lfmixcr ${src}LF_mix_CR.txt compare_ws_file $pfx $lfmixcr crlf_false_attr__LF_mix_CR.txt
" "
test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=LF_nul" " test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=LF_nul" "
compare_ws_file $pfx $crlfnul ${src}LF_nul.txt compare_ws_file $pfx $crlfnul crlf_false_attr__LF_nul.txt
" "
} }
####### # Test control characters
# NUL SOH CR EOF==^Z
test_expect_success 'ls-files --eol -o Text/Binary' '
test_when_finished "rm expect actual TeBi_*" &&
STRT=AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA &&
STR=$STRT$STRT$STRT$STRT &&
printf "${STR}BBB\001" >TeBi_127_S &&
printf "${STR}BBBB\001">TeBi_128_S &&
printf "${STR}BBB\032" >TeBi_127_E &&
printf "\032${STR}BBB" >TeBi_E_127 &&
printf "${STR}BBBB\000">TeBi_128_N &&
printf "${STR}BBB\012">TeBi_128_L &&
printf "${STR}BBB\015">TeBi_127_C &&
printf "${STR}BB\015\012" >TeBi_126_CL &&
printf "${STR}BB\015\012\015" >TeBi_126_CLC &&
sort <<-\EOF >expect &&
i/ w/-text TeBi_127_S
i/ w/none TeBi_128_S
i/ w/none TeBi_127_E
i/ w/-text TeBi_E_127
i/ w/-text TeBi_128_N
i/ w/lf TeBi_128_L
i/ w/-text TeBi_127_C
i/ w/crlf TeBi_126_CL
i/ w/-text TeBi_126_CLC
EOF
git ls-files --eol -o |
sed -n -e "/TeBi_/{s!attr/[ ]*!!g
s! ! !g
s! *! !g
p
}" | sort >actual &&
test_cmp expect actual
'
test_expect_success 'setup master' ' test_expect_success 'setup master' '
echo >.gitattributes && echo >.gitattributes &&
git checkout -b master && git checkout -b master &&
@ -480,4 +544,19 @@ checkout_files native true "lf" LF CRLF CRLF_mix_LF LF_mix_CR
checkout_files native false "crlf" CRLF CRLF CRLF CRLF_mix_CR CRLF_nul checkout_files native false "crlf" CRLF CRLF CRLF CRLF_mix_CR CRLF_nul
checkout_files native true "crlf" CRLF CRLF CRLF CRLF_mix_CR CRLF_nul checkout_files native true "crlf" CRLF CRLF CRLF CRLF_mix_CR CRLF_nul
# Should be the last test case: remove some files from the worktree
test_expect_success 'ls-files --eol -d -z' '
rm crlf_false_attr__CRLF.txt crlf_false_attr__CRLF_mix_LF.txt crlf_false_attr__LF.txt .gitattributes &&
cat >expect <<-\EOF &&
i/crlf w/ crlf_false_attr__CRLF.txt
i/lf w/ .gitattributes
i/lf w/ crlf_false_attr__LF.txt
i/mixed w/ crlf_false_attr__CRLF_mix_LF.txt
EOF
git ls-files --eol -d |
sed -e "s!attr/[^ ]*!!g" -e "s/ / /g" -e "s/ */ /g" |
sort >actual &&
test_cmp expect actual
'
test_done test_done