Merge branch 'tb/ls-files-eol'
"git ls-files" learned a new "--eol" option to help diagnose end-of-line problems. * tb/ls-files-eol: ls-files: add eol diagnostics
This commit is contained in:
commit
05f1539b7f
@ -12,6 +12,7 @@ SYNOPSIS
|
||||
'git ls-files' [-z] [-t] [-v]
|
||||
(--[cached|deleted|others|ignored|stage|unmerged|killed|modified])*
|
||||
(-[c|d|o|i|s|u|k|m])*
|
||||
[--eol]
|
||||
[-x <pattern>|--exclude=<pattern>]
|
||||
[-X <file>|--exclude-from=<file>]
|
||||
[--exclude-per-directory=<file>]
|
||||
@ -147,6 +148,24 @@ a space) at the start of each line:
|
||||
possible for manual inspection; the exact format may change at
|
||||
any time.
|
||||
|
||||
--eol::
|
||||
Show <eolinfo> and <eolattr> of files.
|
||||
<eolinfo> is the file content identification used by Git when
|
||||
the "text" attribute is "auto" (or not set and core.autocrlf is not false).
|
||||
<eolinfo> is either "-text", "none", "lf", "crlf", "mixed" or "".
|
||||
+
|
||||
"" means the file is not a regular file, it is not in the index or
|
||||
not accessable in the working tree.
|
||||
+
|
||||
<eolattr> is the attribute that is used when checking out or committing,
|
||||
it is either "", "-text", "text", "text=auto", "text eol=lf", "text eol=crlf".
|
||||
Note: Currently Git does not support "text=auto eol=lf" or "text=auto eol=crlf",
|
||||
that may change in the future.
|
||||
+
|
||||
Both the <eolinfo> in the index ("i/<eolinfo>")
|
||||
and in the working tree ("w/<eolinfo>") are shown for regular files,
|
||||
followed by the ("attr/<eolattr>").
|
||||
|
||||
\--::
|
||||
Do not interpret any more arguments as options.
|
||||
|
||||
@ -161,6 +180,9 @@ which case it outputs:
|
||||
|
||||
[<tag> ]<mode> <object> <stage> <file>
|
||||
|
||||
'git ls-files --eol' will show
|
||||
i/<eolinfo><SPACES>w/<eolinfo><SPACES>attr/<eolattr><SPACE*><TAB><file>
|
||||
|
||||
'git ls-files --unmerged' and 'git ls-files --stage' can be used to examine
|
||||
detailed information on unmerged paths.
|
||||
|
||||
|
@ -27,6 +27,7 @@ static int show_killed;
|
||||
static int show_valid_bit;
|
||||
static int line_terminator = '\n';
|
||||
static int debug_mode;
|
||||
static int show_eol;
|
||||
|
||||
static const char *prefix;
|
||||
static int max_prefix_len;
|
||||
@ -47,6 +48,23 @@ static const char *tag_modified = "";
|
||||
static const char *tag_skip_worktree = "";
|
||||
static const char *tag_resolve_undo = "";
|
||||
|
||||
static void write_eolinfo(const struct cache_entry *ce, const char *path)
|
||||
{
|
||||
if (!show_eol)
|
||||
return;
|
||||
else {
|
||||
struct stat st;
|
||||
const char *i_txt = "";
|
||||
const char *w_txt = "";
|
||||
const char *a_txt = get_convert_attr_ascii(path);
|
||||
if (ce && S_ISREG(ce->ce_mode))
|
||||
i_txt = get_cached_convert_stats_ascii(ce->name);
|
||||
if (!lstat(path, &st) && S_ISREG(st.st_mode))
|
||||
w_txt = get_wt_convert_stats_ascii(path);
|
||||
printf("i/%-5s w/%-5s attr/%-17s\t", i_txt, w_txt, a_txt);
|
||||
}
|
||||
}
|
||||
|
||||
static void write_name(const char *name)
|
||||
{
|
||||
/*
|
||||
@ -68,6 +86,7 @@ static void show_dir_entry(const char *tag, struct dir_entry *ent)
|
||||
return;
|
||||
|
||||
fputs(tag, stdout);
|
||||
write_eolinfo(NULL, ent->name);
|
||||
write_name(ent->name);
|
||||
}
|
||||
|
||||
@ -170,6 +189,7 @@ static void show_ce_entry(const char *tag, const struct cache_entry *ce)
|
||||
find_unique_abbrev(ce->sha1,abbrev),
|
||||
ce_stage(ce));
|
||||
}
|
||||
write_eolinfo(ce, ce->name);
|
||||
write_name(ce->name);
|
||||
if (debug_mode) {
|
||||
const struct stat_data *sd = &ce->ce_stat_data;
|
||||
@ -433,6 +453,7 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
|
||||
OPT_BIT(0, "directory", &dir.flags,
|
||||
N_("show 'other' directories' names only"),
|
||||
DIR_SHOW_OTHER_DIRECTORIES),
|
||||
OPT_BOOL(0, "eol", &show_eol, N_("show line endings of files")),
|
||||
OPT_NEGBIT(0, "empty-directory", &dir.flags,
|
||||
N_("don't show empty directories"),
|
||||
DIR_HIDE_EMPTY_DIRECTORIES),
|
||||
|
119
convert.c
119
convert.c
@ -13,6 +13,11 @@
|
||||
* translation when the "text" attribute or "auto_crlf" option is set.
|
||||
*/
|
||||
|
||||
/* Stat bits: When BIN is set, the txt bits are unset */
|
||||
#define CONVERT_STAT_BITS_TXT_LF 0x1
|
||||
#define CONVERT_STAT_BITS_TXT_CRLF 0x2
|
||||
#define CONVERT_STAT_BITS_BIN 0x4
|
||||
|
||||
enum crlf_action {
|
||||
CRLF_GUESS = -1,
|
||||
CRLF_BINARY = 0,
|
||||
@ -75,26 +80,75 @@ static void gather_stats(const char *buf, unsigned long size, struct text_stat *
|
||||
|
||||
/*
|
||||
* The same heuristics as diff.c::mmfile_is_binary()
|
||||
* We treat files with bare CR as binary
|
||||
*/
|
||||
static int is_binary(unsigned long size, struct text_stat *stats)
|
||||
static int convert_is_binary(unsigned long size, const struct text_stat *stats)
|
||||
{
|
||||
|
||||
if (stats->cr != stats->crlf)
|
||||
return 1;
|
||||
if (stats->nul)
|
||||
return 1;
|
||||
if ((stats->printable >> 7) < stats->nonprintable)
|
||||
return 1;
|
||||
/*
|
||||
* Other heuristics? Average line length might be relevant,
|
||||
* as might LF vs CR vs CRLF counts..
|
||||
*
|
||||
* NOTE! It might be normal to have a low ratio of CRLF to LF
|
||||
* (somebody starts with a LF-only file and edits it with an editor
|
||||
* that adds CRLF only to lines that are added..). But do we
|
||||
* want to support CR-only? Probably not.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned int gather_convert_stats(const char *data, unsigned long size)
|
||||
{
|
||||
struct text_stat stats;
|
||||
if (!data || !size)
|
||||
return 0;
|
||||
gather_stats(data, size, &stats);
|
||||
if (convert_is_binary(size, &stats))
|
||||
return CONVERT_STAT_BITS_BIN;
|
||||
else if (stats.crlf && stats.crlf == stats.lf)
|
||||
return CONVERT_STAT_BITS_TXT_CRLF;
|
||||
else if (stats.crlf && stats.lf)
|
||||
return CONVERT_STAT_BITS_TXT_CRLF | CONVERT_STAT_BITS_TXT_LF;
|
||||
else if (stats.lf)
|
||||
return CONVERT_STAT_BITS_TXT_LF;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *gather_convert_stats_ascii(const char *data, unsigned long size)
|
||||
{
|
||||
unsigned int convert_stats = gather_convert_stats(data, size);
|
||||
|
||||
if (convert_stats & CONVERT_STAT_BITS_BIN)
|
||||
return "-text";
|
||||
switch (convert_stats) {
|
||||
case CONVERT_STAT_BITS_TXT_LF:
|
||||
return "lf";
|
||||
case CONVERT_STAT_BITS_TXT_CRLF:
|
||||
return "crlf";
|
||||
case CONVERT_STAT_BITS_TXT_LF | CONVERT_STAT_BITS_TXT_CRLF:
|
||||
return "mixed";
|
||||
default:
|
||||
return "none";
|
||||
}
|
||||
}
|
||||
|
||||
const char *get_cached_convert_stats_ascii(const char *path)
|
||||
{
|
||||
const char *ret;
|
||||
unsigned long sz;
|
||||
void *data = read_blob_data_from_cache(path, &sz);
|
||||
ret = gather_convert_stats_ascii(data, sz);
|
||||
free(data);
|
||||
return ret;
|
||||
}
|
||||
|
||||
const char *get_wt_convert_stats_ascii(const char *path)
|
||||
{
|
||||
const char *ret = "";
|
||||
struct strbuf sb = STRBUF_INIT;
|
||||
if (strbuf_read_file(&sb, path, 0) >= 0)
|
||||
ret = gather_convert_stats_ascii(sb.buf, sb.len);
|
||||
strbuf_release(&sb);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static enum eol output_eol(enum crlf_action crlf_action)
|
||||
{
|
||||
switch (crlf_action) {
|
||||
@ -187,18 +241,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
|
||||
gather_stats(src, len, &stats);
|
||||
|
||||
if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) {
|
||||
/*
|
||||
* We're currently not going to even try to convert stuff
|
||||
* that has bare CR characters. Does anybody do that crazy
|
||||
* stuff?
|
||||
*/
|
||||
if (stats.cr != stats.crlf)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* And add some heuristics for binary vs text, of course...
|
||||
*/
|
||||
if (is_binary(len, &stats))
|
||||
if (convert_is_binary(len, &stats))
|
||||
return 0;
|
||||
|
||||
if (crlf_action == CRLF_GUESS) {
|
||||
@ -277,11 +320,7 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* If we have any bare CR characters, we're not going to touch it */
|
||||
if (stats.cr != stats.crlf)
|
||||
return 0;
|
||||
|
||||
if (is_binary(len, &stats))
|
||||
if (convert_is_binary(len, &stats))
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -777,6 +816,30 @@ int would_convert_to_git_filter_fd(const char *path)
|
||||
return apply_filter(path, NULL, 0, -1, NULL, ca.drv->clean);
|
||||
}
|
||||
|
||||
const char *get_convert_attr_ascii(const char *path)
|
||||
{
|
||||
struct conv_attrs ca;
|
||||
enum crlf_action crlf_action;
|
||||
|
||||
convert_attrs(&ca, path);
|
||||
crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
|
||||
switch (crlf_action) {
|
||||
case CRLF_GUESS:
|
||||
return "";
|
||||
case CRLF_BINARY:
|
||||
return "-text";
|
||||
case CRLF_TEXT:
|
||||
return "text";
|
||||
case CRLF_INPUT:
|
||||
return "text eol=lf";
|
||||
case CRLF_CRLF:
|
||||
return "text=auto eol=crlf";
|
||||
case CRLF_AUTO:
|
||||
return "text=auto";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
int convert_to_git(const char *path, const char *src, size_t len,
|
||||
struct strbuf *dst, enum safe_crlf checksafe)
|
||||
{
|
||||
|
@ -32,6 +32,9 @@ enum eol {
|
||||
};
|
||||
|
||||
extern enum eol core_eol;
|
||||
extern const char *get_cached_convert_stats_ascii(const char *path);
|
||||
extern const char *get_wt_convert_stats_ascii(const char *path);
|
||||
extern const char *get_convert_attr_ascii(const char *path);
|
||||
|
||||
/* returns 1 if *dst was used */
|
||||
extern int convert_to_git(const char *path, const char *src, size_t len,
|
||||
|
@ -56,21 +56,16 @@ create_gitattributes () {
|
||||
}
|
||||
|
||||
create_NNO_files () {
|
||||
lfname=$1
|
||||
crlfname=$2
|
||||
lfmixcrlf=$3
|
||||
lfmixcr=$4
|
||||
crlfnul=$5
|
||||
for crlf in false true input
|
||||
do
|
||||
for attr in "" auto text -text lf crlf
|
||||
do
|
||||
pfx=NNO_${crlf}_attr_${attr} &&
|
||||
cp $lfname ${pfx}_LF.txt &&
|
||||
cp $crlfname ${pfx}_CRLF.txt &&
|
||||
cp $lfmixcrlf ${pfx}_CRLF_mix_LF.txt &&
|
||||
cp $lfmixcr ${pfx}_LF_mix_CR.txt &&
|
||||
cp $crlfnul ${pfx}_CRLF_nul.txt
|
||||
cp CRLF_mix_LF ${pfx}_LF.txt &&
|
||||
cp CRLF_mix_LF ${pfx}_CRLF.txt &&
|
||||
cp CRLF_mix_LF ${pfx}_CRLF_mix_LF.txt &&
|
||||
cp CRLF_mix_LF ${pfx}_LF_mix_CR.txt &&
|
||||
cp CRLF_mix_LF ${pfx}_CRLF_nul.txt
|
||||
done
|
||||
done
|
||||
}
|
||||
@ -96,7 +91,7 @@ commit_check_warn () {
|
||||
crlfnul=$7
|
||||
pfx=crlf_${crlf}_attr_${attr}
|
||||
create_gitattributes "$attr" &&
|
||||
for f in LF CRLF repoMIX LF_mix_CR CRLF_mix_LF LF_nul CRLF_nul
|
||||
for f in LF CRLF LF_mix_CR CRLF_mix_LF LF_nul CRLF_nul
|
||||
do
|
||||
fname=${pfx}_$f.txt &&
|
||||
cp $f $fname &&
|
||||
@ -149,6 +144,27 @@ commit_chk_wrnNNO () {
|
||||
'
|
||||
}
|
||||
|
||||
stats_ascii () {
|
||||
case "$1" in
|
||||
LF)
|
||||
echo lf
|
||||
;;
|
||||
CRLF)
|
||||
echo crlf
|
||||
;;
|
||||
CRLF_mix_LF)
|
||||
echo mixed
|
||||
;;
|
||||
LF_mix_CR|CRLF_nul|LF_nul|CRLF_mix_CR)
|
||||
echo "-text"
|
||||
;;
|
||||
*)
|
||||
echo error_invalid $1
|
||||
;;
|
||||
esac
|
||||
|
||||
}
|
||||
|
||||
check_files_in_repo () {
|
||||
crlf=$1
|
||||
attr=$2
|
||||
@ -203,35 +219,83 @@ checkout_files () {
|
||||
create_gitattributes $attr &&
|
||||
git config core.autocrlf $crlf &&
|
||||
pfx=eol_${eol}_crlf_${crlf}_attr_${attr}_ &&
|
||||
src=crlf_false_attr__ &&
|
||||
for f in LF CRLF LF_mix_CR CRLF_mix_LF LF_nul
|
||||
do
|
||||
rm $src$f.txt &&
|
||||
rm crlf_false_attr__$f.txt &&
|
||||
if test -z "$eol"; then
|
||||
git checkout $src$f.txt
|
||||
git checkout crlf_false_attr__$f.txt
|
||||
else
|
||||
git -c core.eol=$eol checkout $src$f.txt
|
||||
git -c core.eol=$eol checkout crlf_false_attr__$f.txt
|
||||
fi
|
||||
done
|
||||
|
||||
test_expect_success "ls-files --eol $lfname ${pfx}LF.txt" '
|
||||
test_when_finished "rm expect actual" &&
|
||||
sort <<-EOF >expect &&
|
||||
i/crlf w/$(stats_ascii $crlfname) crlf_false_attr__CRLF.txt
|
||||
i/mixed w/$(stats_ascii $lfmixcrlf) crlf_false_attr__CRLF_mix_LF.txt
|
||||
i/lf w/$(stats_ascii $lfname) crlf_false_attr__LF.txt
|
||||
i/-text w/$(stats_ascii $lfmixcr) crlf_false_attr__LF_mix_CR.txt
|
||||
i/-text w/$(stats_ascii $crlfnul) crlf_false_attr__CRLF_nul.txt
|
||||
i/-text w/$(stats_ascii $crlfnul) crlf_false_attr__LF_nul.txt
|
||||
EOF
|
||||
git ls-files --eol crlf_false_attr__* |
|
||||
sed -e "s!attr/[^ ]*!!g" -e "s/ / /g" -e "s/ */ /g" |
|
||||
sort >actual &&
|
||||
test_cmp expect actual
|
||||
'
|
||||
test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=LF" "
|
||||
compare_ws_file $pfx $lfname ${src}LF.txt
|
||||
compare_ws_file $pfx $lfname crlf_false_attr__LF.txt
|
||||
"
|
||||
test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=CRLF" "
|
||||
compare_ws_file $pfx $crlfname ${src}CRLF.txt
|
||||
compare_ws_file $pfx $crlfname crlf_false_attr__CRLF.txt
|
||||
"
|
||||
test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=CRLF_mix_LF" "
|
||||
compare_ws_file $pfx $lfmixcrlf ${src}CRLF_mix_LF.txt
|
||||
compare_ws_file $pfx $lfmixcrlf crlf_false_attr__CRLF_mix_LF.txt
|
||||
"
|
||||
test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=LF_mix_CR" "
|
||||
compare_ws_file $pfx $lfmixcr ${src}LF_mix_CR.txt
|
||||
compare_ws_file $pfx $lfmixcr crlf_false_attr__LF_mix_CR.txt
|
||||
"
|
||||
test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=LF_nul" "
|
||||
compare_ws_file $pfx $crlfnul ${src}LF_nul.txt
|
||||
compare_ws_file $pfx $crlfnul crlf_false_attr__LF_nul.txt
|
||||
"
|
||||
}
|
||||
|
||||
#######
|
||||
# Test control characters
|
||||
# NUL SOH CR EOF==^Z
|
||||
test_expect_success 'ls-files --eol -o Text/Binary' '
|
||||
test_when_finished "rm expect actual TeBi_*" &&
|
||||
STRT=AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA &&
|
||||
STR=$STRT$STRT$STRT$STRT &&
|
||||
printf "${STR}BBB\001" >TeBi_127_S &&
|
||||
printf "${STR}BBBB\001">TeBi_128_S &&
|
||||
printf "${STR}BBB\032" >TeBi_127_E &&
|
||||
printf "\032${STR}BBB" >TeBi_E_127 &&
|
||||
printf "${STR}BBBB\000">TeBi_128_N &&
|
||||
printf "${STR}BBB\012">TeBi_128_L &&
|
||||
printf "${STR}BBB\015">TeBi_127_C &&
|
||||
printf "${STR}BB\015\012" >TeBi_126_CL &&
|
||||
printf "${STR}BB\015\012\015" >TeBi_126_CLC &&
|
||||
sort <<-\EOF >expect &&
|
||||
i/ w/-text TeBi_127_S
|
||||
i/ w/none TeBi_128_S
|
||||
i/ w/none TeBi_127_E
|
||||
i/ w/-text TeBi_E_127
|
||||
i/ w/-text TeBi_128_N
|
||||
i/ w/lf TeBi_128_L
|
||||
i/ w/-text TeBi_127_C
|
||||
i/ w/crlf TeBi_126_CL
|
||||
i/ w/-text TeBi_126_CLC
|
||||
EOF
|
||||
git ls-files --eol -o |
|
||||
sed -n -e "/TeBi_/{s!attr/[ ]*!!g
|
||||
s! ! !g
|
||||
s! *! !g
|
||||
p
|
||||
}" | sort >actual &&
|
||||
test_cmp expect actual
|
||||
'
|
||||
|
||||
test_expect_success 'setup master' '
|
||||
echo >.gitattributes &&
|
||||
git checkout -b master &&
|
||||
@ -480,4 +544,19 @@ checkout_files native true "lf" LF CRLF CRLF_mix_LF LF_mix_CR
|
||||
checkout_files native false "crlf" CRLF CRLF CRLF CRLF_mix_CR CRLF_nul
|
||||
checkout_files native true "crlf" CRLF CRLF CRLF CRLF_mix_CR CRLF_nul
|
||||
|
||||
# Should be the last test case: remove some files from the worktree
|
||||
test_expect_success 'ls-files --eol -d -z' '
|
||||
rm crlf_false_attr__CRLF.txt crlf_false_attr__CRLF_mix_LF.txt crlf_false_attr__LF.txt .gitattributes &&
|
||||
cat >expect <<-\EOF &&
|
||||
i/crlf w/ crlf_false_attr__CRLF.txt
|
||||
i/lf w/ .gitattributes
|
||||
i/lf w/ crlf_false_attr__LF.txt
|
||||
i/mixed w/ crlf_false_attr__CRLF_mix_LF.txt
|
||||
EOF
|
||||
git ls-files --eol -d |
|
||||
sed -e "s!attr/[^ ]*!!g" -e "s/ / /g" -e "s/ */ /g" |
|
||||
sort >actual &&
|
||||
test_cmp expect actual
|
||||
'
|
||||
|
||||
test_done
|
||||
|
Loading…
Reference in New Issue
Block a user