Merge branch 'tb/ls-files-eol'

"git ls-files" learned a new "--eol" option to help diagnose
end-of-line problems.

* tb/ls-files-eol:
  ls-files: add eol diagnostics
This commit is contained in:
Junio C Hamano 2016-02-03 14:15:59 -08:00
commit 05f1539b7f
5 changed files with 237 additions and 49 deletions

View File

@ -12,6 +12,7 @@ SYNOPSIS
'git ls-files' [-z] [-t] [-v]
(--[cached|deleted|others|ignored|stage|unmerged|killed|modified])*
(-[c|d|o|i|s|u|k|m])*
[--eol]
[-x <pattern>|--exclude=<pattern>]
[-X <file>|--exclude-from=<file>]
[--exclude-per-directory=<file>]
@ -147,6 +148,24 @@ a space) at the start of each line:
possible for manual inspection; the exact format may change at
any time.
--eol::
Show <eolinfo> and <eolattr> of files.
<eolinfo> is the file content identification used by Git when
the "text" attribute is "auto" (or not set and core.autocrlf is not false).
<eolinfo> is either "-text", "none", "lf", "crlf", "mixed" or "".
+
"" means the file is not a regular file, it is not in the index or
not accessable in the working tree.
+
<eolattr> is the attribute that is used when checking out or committing,
it is either "", "-text", "text", "text=auto", "text eol=lf", "text eol=crlf".
Note: Currently Git does not support "text=auto eol=lf" or "text=auto eol=crlf",
that may change in the future.
+
Both the <eolinfo> in the index ("i/<eolinfo>")
and in the working tree ("w/<eolinfo>") are shown for regular files,
followed by the ("attr/<eolattr>").
\--::
Do not interpret any more arguments as options.
@ -161,6 +180,9 @@ which case it outputs:
[<tag> ]<mode> <object> <stage> <file>
'git ls-files --eol' will show
i/<eolinfo><SPACES>w/<eolinfo><SPACES>attr/<eolattr><SPACE*><TAB><file>
'git ls-files --unmerged' and 'git ls-files --stage' can be used to examine
detailed information on unmerged paths.

View File

@ -27,6 +27,7 @@ static int show_killed;
static int show_valid_bit;
static int line_terminator = '\n';
static int debug_mode;
static int show_eol;
static const char *prefix;
static int max_prefix_len;
@ -47,6 +48,23 @@ static const char *tag_modified = "";
static const char *tag_skip_worktree = "";
static const char *tag_resolve_undo = "";
static void write_eolinfo(const struct cache_entry *ce, const char *path)
{
if (!show_eol)
return;
else {
struct stat st;
const char *i_txt = "";
const char *w_txt = "";
const char *a_txt = get_convert_attr_ascii(path);
if (ce && S_ISREG(ce->ce_mode))
i_txt = get_cached_convert_stats_ascii(ce->name);
if (!lstat(path, &st) && S_ISREG(st.st_mode))
w_txt = get_wt_convert_stats_ascii(path);
printf("i/%-5s w/%-5s attr/%-17s\t", i_txt, w_txt, a_txt);
}
}
static void write_name(const char *name)
{
/*
@ -68,6 +86,7 @@ static void show_dir_entry(const char *tag, struct dir_entry *ent)
return;
fputs(tag, stdout);
write_eolinfo(NULL, ent->name);
write_name(ent->name);
}
@ -170,6 +189,7 @@ static void show_ce_entry(const char *tag, const struct cache_entry *ce)
find_unique_abbrev(ce->sha1,abbrev),
ce_stage(ce));
}
write_eolinfo(ce, ce->name);
write_name(ce->name);
if (debug_mode) {
const struct stat_data *sd = &ce->ce_stat_data;
@ -433,6 +453,7 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
OPT_BIT(0, "directory", &dir.flags,
N_("show 'other' directories' names only"),
DIR_SHOW_OTHER_DIRECTORIES),
OPT_BOOL(0, "eol", &show_eol, N_("show line endings of files")),
OPT_NEGBIT(0, "empty-directory", &dir.flags,
N_("don't show empty directories"),
DIR_HIDE_EMPTY_DIRECTORIES),

119
convert.c
View File

@ -13,6 +13,11 @@
* translation when the "text" attribute or "auto_crlf" option is set.
*/
/* Stat bits: When BIN is set, the txt bits are unset */
#define CONVERT_STAT_BITS_TXT_LF 0x1
#define CONVERT_STAT_BITS_TXT_CRLF 0x2
#define CONVERT_STAT_BITS_BIN 0x4
enum crlf_action {
CRLF_GUESS = -1,
CRLF_BINARY = 0,
@ -75,26 +80,75 @@ static void gather_stats(const char *buf, unsigned long size, struct text_stat *
/*
* The same heuristics as diff.c::mmfile_is_binary()
* We treat files with bare CR as binary
*/
static int is_binary(unsigned long size, struct text_stat *stats)
static int convert_is_binary(unsigned long size, const struct text_stat *stats)
{
if (stats->cr != stats->crlf)
return 1;
if (stats->nul)
return 1;
if ((stats->printable >> 7) < stats->nonprintable)
return 1;
/*
* Other heuristics? Average line length might be relevant,
* as might LF vs CR vs CRLF counts..
*
* NOTE! It might be normal to have a low ratio of CRLF to LF
* (somebody starts with a LF-only file and edits it with an editor
* that adds CRLF only to lines that are added..). But do we
* want to support CR-only? Probably not.
*/
return 0;
}
static unsigned int gather_convert_stats(const char *data, unsigned long size)
{
struct text_stat stats;
if (!data || !size)
return 0;
gather_stats(data, size, &stats);
if (convert_is_binary(size, &stats))
return CONVERT_STAT_BITS_BIN;
else if (stats.crlf && stats.crlf == stats.lf)
return CONVERT_STAT_BITS_TXT_CRLF;
else if (stats.crlf && stats.lf)
return CONVERT_STAT_BITS_TXT_CRLF | CONVERT_STAT_BITS_TXT_LF;
else if (stats.lf)
return CONVERT_STAT_BITS_TXT_LF;
else
return 0;
}
static const char *gather_convert_stats_ascii(const char *data, unsigned long size)
{
unsigned int convert_stats = gather_convert_stats(data, size);
if (convert_stats & CONVERT_STAT_BITS_BIN)
return "-text";
switch (convert_stats) {
case CONVERT_STAT_BITS_TXT_LF:
return "lf";
case CONVERT_STAT_BITS_TXT_CRLF:
return "crlf";
case CONVERT_STAT_BITS_TXT_LF | CONVERT_STAT_BITS_TXT_CRLF:
return "mixed";
default:
return "none";
}
}
const char *get_cached_convert_stats_ascii(const char *path)
{
const char *ret;
unsigned long sz;
void *data = read_blob_data_from_cache(path, &sz);
ret = gather_convert_stats_ascii(data, sz);
free(data);
return ret;
}
const char *get_wt_convert_stats_ascii(const char *path)
{
const char *ret = "";
struct strbuf sb = STRBUF_INIT;
if (strbuf_read_file(&sb, path, 0) >= 0)
ret = gather_convert_stats_ascii(sb.buf, sb.len);
strbuf_release(&sb);
return ret;
}
static enum eol output_eol(enum crlf_action crlf_action)
{
switch (crlf_action) {
@ -187,18 +241,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
gather_stats(src, len, &stats);
if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) {
/*
* We're currently not going to even try to convert stuff
* that has bare CR characters. Does anybody do that crazy
* stuff?
*/
if (stats.cr != stats.crlf)
return 0;
/*
* And add some heuristics for binary vs text, of course...
*/
if (is_binary(len, &stats))
if (convert_is_binary(len, &stats))
return 0;
if (crlf_action == CRLF_GUESS) {
@ -277,11 +320,7 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len,
return 0;
}
/* If we have any bare CR characters, we're not going to touch it */
if (stats.cr != stats.crlf)
return 0;
if (is_binary(len, &stats))
if (convert_is_binary(len, &stats))
return 0;
}
@ -777,6 +816,30 @@ int would_convert_to_git_filter_fd(const char *path)
return apply_filter(path, NULL, 0, -1, NULL, ca.drv->clean);
}
const char *get_convert_attr_ascii(const char *path)
{
struct conv_attrs ca;
enum crlf_action crlf_action;
convert_attrs(&ca, path);
crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
switch (crlf_action) {
case CRLF_GUESS:
return "";
case CRLF_BINARY:
return "-text";
case CRLF_TEXT:
return "text";
case CRLF_INPUT:
return "text eol=lf";
case CRLF_CRLF:
return "text=auto eol=crlf";
case CRLF_AUTO:
return "text=auto";
}
return "";
}
int convert_to_git(const char *path, const char *src, size_t len,
struct strbuf *dst, enum safe_crlf checksafe)
{

View File

@ -32,6 +32,9 @@ enum eol {
};
extern enum eol core_eol;
extern const char *get_cached_convert_stats_ascii(const char *path);
extern const char *get_wt_convert_stats_ascii(const char *path);
extern const char *get_convert_attr_ascii(const char *path);
/* returns 1 if *dst was used */
extern int convert_to_git(const char *path, const char *src, size_t len,

View File

@ -56,21 +56,16 @@ create_gitattributes () {
}
create_NNO_files () {
lfname=$1
crlfname=$2
lfmixcrlf=$3
lfmixcr=$4
crlfnul=$5
for crlf in false true input
do
for attr in "" auto text -text lf crlf
do
pfx=NNO_${crlf}_attr_${attr} &&
cp $lfname ${pfx}_LF.txt &&
cp $crlfname ${pfx}_CRLF.txt &&
cp $lfmixcrlf ${pfx}_CRLF_mix_LF.txt &&
cp $lfmixcr ${pfx}_LF_mix_CR.txt &&
cp $crlfnul ${pfx}_CRLF_nul.txt
cp CRLF_mix_LF ${pfx}_LF.txt &&
cp CRLF_mix_LF ${pfx}_CRLF.txt &&
cp CRLF_mix_LF ${pfx}_CRLF_mix_LF.txt &&
cp CRLF_mix_LF ${pfx}_LF_mix_CR.txt &&
cp CRLF_mix_LF ${pfx}_CRLF_nul.txt
done
done
}
@ -96,7 +91,7 @@ commit_check_warn () {
crlfnul=$7
pfx=crlf_${crlf}_attr_${attr}
create_gitattributes "$attr" &&
for f in LF CRLF repoMIX LF_mix_CR CRLF_mix_LF LF_nul CRLF_nul
for f in LF CRLF LF_mix_CR CRLF_mix_LF LF_nul CRLF_nul
do
fname=${pfx}_$f.txt &&
cp $f $fname &&
@ -149,6 +144,27 @@ commit_chk_wrnNNO () {
'
}
stats_ascii () {
case "$1" in
LF)
echo lf
;;
CRLF)
echo crlf
;;
CRLF_mix_LF)
echo mixed
;;
LF_mix_CR|CRLF_nul|LF_nul|CRLF_mix_CR)
echo "-text"
;;
*)
echo error_invalid $1
;;
esac
}
check_files_in_repo () {
crlf=$1
attr=$2
@ -203,35 +219,83 @@ checkout_files () {
create_gitattributes $attr &&
git config core.autocrlf $crlf &&
pfx=eol_${eol}_crlf_${crlf}_attr_${attr}_ &&
src=crlf_false_attr__ &&
for f in LF CRLF LF_mix_CR CRLF_mix_LF LF_nul
do
rm $src$f.txt &&
rm crlf_false_attr__$f.txt &&
if test -z "$eol"; then
git checkout $src$f.txt
git checkout crlf_false_attr__$f.txt
else
git -c core.eol=$eol checkout $src$f.txt
git -c core.eol=$eol checkout crlf_false_attr__$f.txt
fi
done
test_expect_success "ls-files --eol $lfname ${pfx}LF.txt" '
test_when_finished "rm expect actual" &&
sort <<-EOF >expect &&
i/crlf w/$(stats_ascii $crlfname) crlf_false_attr__CRLF.txt
i/mixed w/$(stats_ascii $lfmixcrlf) crlf_false_attr__CRLF_mix_LF.txt
i/lf w/$(stats_ascii $lfname) crlf_false_attr__LF.txt
i/-text w/$(stats_ascii $lfmixcr) crlf_false_attr__LF_mix_CR.txt
i/-text w/$(stats_ascii $crlfnul) crlf_false_attr__CRLF_nul.txt
i/-text w/$(stats_ascii $crlfnul) crlf_false_attr__LF_nul.txt
EOF
git ls-files --eol crlf_false_attr__* |
sed -e "s!attr/[^ ]*!!g" -e "s/ / /g" -e "s/ */ /g" |
sort >actual &&
test_cmp expect actual
'
test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=LF" "
compare_ws_file $pfx $lfname ${src}LF.txt
compare_ws_file $pfx $lfname crlf_false_attr__LF.txt
"
test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=CRLF" "
compare_ws_file $pfx $crlfname ${src}CRLF.txt
compare_ws_file $pfx $crlfname crlf_false_attr__CRLF.txt
"
test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=CRLF_mix_LF" "
compare_ws_file $pfx $lfmixcrlf ${src}CRLF_mix_LF.txt
compare_ws_file $pfx $lfmixcrlf crlf_false_attr__CRLF_mix_LF.txt
"
test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=LF_mix_CR" "
compare_ws_file $pfx $lfmixcr ${src}LF_mix_CR.txt
compare_ws_file $pfx $lfmixcr crlf_false_attr__LF_mix_CR.txt
"
test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=LF_nul" "
compare_ws_file $pfx $crlfnul ${src}LF_nul.txt
compare_ws_file $pfx $crlfnul crlf_false_attr__LF_nul.txt
"
}
#######
# Test control characters
# NUL SOH CR EOF==^Z
test_expect_success 'ls-files --eol -o Text/Binary' '
test_when_finished "rm expect actual TeBi_*" &&
STRT=AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA &&
STR=$STRT$STRT$STRT$STRT &&
printf "${STR}BBB\001" >TeBi_127_S &&
printf "${STR}BBBB\001">TeBi_128_S &&
printf "${STR}BBB\032" >TeBi_127_E &&
printf "\032${STR}BBB" >TeBi_E_127 &&
printf "${STR}BBBB\000">TeBi_128_N &&
printf "${STR}BBB\012">TeBi_128_L &&
printf "${STR}BBB\015">TeBi_127_C &&
printf "${STR}BB\015\012" >TeBi_126_CL &&
printf "${STR}BB\015\012\015" >TeBi_126_CLC &&
sort <<-\EOF >expect &&
i/ w/-text TeBi_127_S
i/ w/none TeBi_128_S
i/ w/none TeBi_127_E
i/ w/-text TeBi_E_127
i/ w/-text TeBi_128_N
i/ w/lf TeBi_128_L
i/ w/-text TeBi_127_C
i/ w/crlf TeBi_126_CL
i/ w/-text TeBi_126_CLC
EOF
git ls-files --eol -o |
sed -n -e "/TeBi_/{s!attr/[ ]*!!g
s! ! !g
s! *! !g
p
}" | sort >actual &&
test_cmp expect actual
'
test_expect_success 'setup master' '
echo >.gitattributes &&
git checkout -b master &&
@ -480,4 +544,19 @@ checkout_files native true "lf" LF CRLF CRLF_mix_LF LF_mix_CR
checkout_files native false "crlf" CRLF CRLF CRLF CRLF_mix_CR CRLF_nul
checkout_files native true "crlf" CRLF CRLF CRLF CRLF_mix_CR CRLF_nul
# Should be the last test case: remove some files from the worktree
test_expect_success 'ls-files --eol -d -z' '
rm crlf_false_attr__CRLF.txt crlf_false_attr__CRLF_mix_LF.txt crlf_false_attr__LF.txt .gitattributes &&
cat >expect <<-\EOF &&
i/crlf w/ crlf_false_attr__CRLF.txt
i/lf w/ .gitattributes
i/lf w/ crlf_false_attr__LF.txt
i/mixed w/ crlf_false_attr__CRLF_mix_LF.txt
EOF
git ls-files --eol -d |
sed -e "s!attr/[^ ]*!!g" -e "s/ / /g" -e "s/ */ /g" |
sort >actual &&
test_cmp expect actual
'
test_done