Merge branch 'lt/case-insensitive'

* lt/case-insensitive:
  Make git-add behave more sensibly in a case-insensitive environment
  When adding files to the index, add support for case-independent matches
  Make unpack-tree update removed files before any updated files
  Make branch merging aware of underlying case-insensitive filsystems
  Add 'core.ignorecase' option
  Make hash_name_lookup able to do case-independent lookups
  Make "index_name_exists()" return the cache_entry it found
  Move name hashing functions into a file of its own
  Make unpack_trees_options bit flags actual bitfields
This commit is contained in:
Junio C Hamano 2008-05-10 18:14:28 -07:00
commit 380a742679
10 changed files with 241 additions and 105 deletions

View File

@ -423,6 +423,7 @@ LIB_OBJS += log-tree.o
LIB_OBJS += mailmap.o LIB_OBJS += mailmap.o
LIB_OBJS += match-trees.o LIB_OBJS += match-trees.o
LIB_OBJS += merge-file.o LIB_OBJS += merge-file.o
LIB_OBJS += name-hash.o
LIB_OBJS += object.o LIB_OBJS += object.o
LIB_OBJS += pack-check.o LIB_OBJS += pack-check.o
LIB_OBJS += pack-revindex.o LIB_OBJS += pack-revindex.o

View File

@ -40,7 +40,7 @@ static int read_cache_unmerged(void)
for (i = 0; i < active_nr; i++) { for (i = 0; i < active_nr; i++) {
struct cache_entry *ce = active_cache[i]; struct cache_entry *ce = active_cache[i];
if (ce_stage(ce)) { if (ce_stage(ce)) {
remove_index_entry(ce); remove_name_hash(ce);
if (last && !strcmp(ce->name, last->name)) if (last && !strcmp(ce->name, last->name))
continue; continue;
cache_tree_invalidate_path(active_cache_tree, ce->name); cache_tree_invalidate_path(active_cache_tree, ce->name);

37
cache.h
View File

@ -133,6 +133,7 @@ struct cache_entry {
#define CE_UPDATE (0x10000) #define CE_UPDATE (0x10000)
#define CE_REMOVE (0x20000) #define CE_REMOVE (0x20000)
#define CE_UPTODATE (0x40000) #define CE_UPTODATE (0x40000)
#define CE_ADDED (0x80000)
#define CE_HASHED (0x100000) #define CE_HASHED (0x100000)
#define CE_UNHASHED (0x200000) #define CE_UNHASHED (0x200000)
@ -153,20 +154,6 @@ static inline void copy_cache_entry(struct cache_entry *dst, struct cache_entry
dst->ce_flags = (dst->ce_flags & ~CE_STATE_MASK) | state; dst->ce_flags = (dst->ce_flags & ~CE_STATE_MASK) | state;
} }
/*
* We don't actually *remove* it, we can just mark it invalid so that
* we won't find it in lookups.
*
* Not only would we have to search the lists (simple enough), but
* we'd also have to rehash other hash buckets in case this makes the
* hash bucket empty (common). So it's much better to just mark
* it.
*/
static inline void remove_index_entry(struct cache_entry *ce)
{
ce->ce_flags |= CE_UNHASHED;
}
static inline unsigned create_ce_flags(size_t len, unsigned stage) static inline unsigned create_ce_flags(size_t len, unsigned stage)
{ {
if (len >= CE_NAMEMASK) if (len >= CE_NAMEMASK)
@ -241,6 +228,23 @@ struct index_state {
extern struct index_state the_index; extern struct index_state the_index;
/* Name hashing */
extern void add_name_hash(struct index_state *istate, struct cache_entry *ce);
/*
* We don't actually *remove* it, we can just mark it invalid so that
* we won't find it in lookups.
*
* Not only would we have to search the lists (simple enough), but
* we'd also have to rehash other hash buckets in case this makes the
* hash bucket empty (common). So it's much better to just mark
* it.
*/
static inline void remove_name_hash(struct cache_entry *ce)
{
ce->ce_flags |= CE_UNHASHED;
}
#ifndef NO_THE_INDEX_COMPATIBILITY_MACROS #ifndef NO_THE_INDEX_COMPATIBILITY_MACROS
#define active_cache (the_index.cache) #define active_cache (the_index.cache)
#define active_nr (the_index.cache_nr) #define active_nr (the_index.cache_nr)
@ -261,7 +265,7 @@ extern struct index_state the_index;
#define refresh_cache(flags) refresh_index(&the_index, (flags), NULL, NULL) #define refresh_cache(flags) refresh_index(&the_index, (flags), NULL, NULL)
#define ce_match_stat(ce, st, options) ie_match_stat(&the_index, (ce), (st), (options)) #define ce_match_stat(ce, st, options) ie_match_stat(&the_index, (ce), (st), (options))
#define ce_modified(ce, st, options) ie_modified(&the_index, (ce), (st), (options)) #define ce_modified(ce, st, options) ie_modified(&the_index, (ce), (st), (options))
#define cache_name_exists(name, namelen) index_name_exists(&the_index, (name), (namelen)) #define cache_name_exists(name, namelen, igncase) index_name_exists(&the_index, (name), (namelen), (igncase))
#endif #endif
enum object_type { enum object_type {
@ -351,7 +355,7 @@ extern int write_index(const struct index_state *, int newfd);
extern int discard_index(struct index_state *); extern int discard_index(struct index_state *);
extern int unmerged_index(const struct index_state *); extern int unmerged_index(const struct index_state *);
extern int verify_path(const char *path); extern int verify_path(const char *path);
extern int index_name_exists(struct index_state *istate, const char *name, int namelen); extern struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int igncase);
extern int index_name_pos(const struct index_state *, const char *name, int namelen); extern int index_name_pos(const struct index_state *, const char *name, int namelen);
#define ADD_CACHE_OK_TO_ADD 1 /* Ok to add */ #define ADD_CACHE_OK_TO_ADD 1 /* Ok to add */
#define ADD_CACHE_OK_TO_REPLACE 2 /* Ok to replace file/directory */ #define ADD_CACHE_OK_TO_REPLACE 2 /* Ok to replace file/directory */
@ -405,6 +409,7 @@ extern int delete_ref(const char *, const unsigned char *sha1);
extern int trust_executable_bit; extern int trust_executable_bit;
extern int quote_path_fully; extern int quote_path_fully;
extern int has_symlinks; extern int has_symlinks;
extern int ignore_case;
extern int assume_unchanged; extern int assume_unchanged;
extern int prefer_symlink_refs; extern int prefer_symlink_refs;
extern int log_all_ref_updates; extern int log_all_ref_updates;

View File

@ -350,6 +350,11 @@ int git_default_config(const char *var, const char *value)
return 0; return 0;
} }
if (!strcmp(var, "core.ignorecase")) {
ignore_case = git_config_bool(var, value);
return 0;
}
if (!strcmp(var, "core.bare")) { if (!strcmp(var, "core.bare")) {
is_bare_repository_cfg = git_config_bool(var, value); is_bare_repository_cfg = git_config_bool(var, value);
return 0; return 0;

2
dir.c
View File

@ -389,7 +389,7 @@ static struct dir_entry *dir_entry_new(const char *pathname, int len)
struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len) struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len)
{ {
if (cache_name_exists(pathname, len)) if (cache_name_exists(pathname, len, ignore_case))
return NULL; return NULL;
ALLOC_GROW(dir->entries, dir->nr+1, dir->alloc); ALLOC_GROW(dir->entries, dir->nr+1, dir->alloc);

View File

@ -14,6 +14,7 @@ char git_default_name[MAX_GITNAME];
int trust_executable_bit = 1; int trust_executable_bit = 1;
int quote_path_fully = 1; int quote_path_fully = 1;
int has_symlinks = 1; int has_symlinks = 1;
int ignore_case;
int assume_unchanged; int assume_unchanged;
int prefer_symlink_refs; int prefer_symlink_refs;
int is_bare_repository_cfg = -1; /* unspecified */ int is_bare_repository_cfg = -1; /* unspecified */

119
name-hash.c Normal file
View File

@ -0,0 +1,119 @@
/*
* name-hash.c
*
* Hashing names in the index state
*
* Copyright (C) 2008 Linus Torvalds
*/
#define NO_THE_INDEX_COMPATIBILITY_MACROS
#include "cache.h"
/*
* This removes bit 5 if bit 6 is set.
*
* That will make US-ASCII characters hash to their upper-case
* equivalent. We could easily do this one whole word at a time,
* but that's for future worries.
*/
static inline unsigned char icase_hash(unsigned char c)
{
return c & ~((c & 0x40) >> 1);
}
static unsigned int hash_name(const char *name, int namelen)
{
unsigned int hash = 0x123;
do {
unsigned char c = *name++;
c = icase_hash(c);
hash = hash*101 + c;
} while (--namelen);
return hash;
}
static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
{
void **pos;
unsigned int hash;
if (ce->ce_flags & CE_HASHED)
return;
ce->ce_flags |= CE_HASHED;
ce->next = NULL;
hash = hash_name(ce->name, ce_namelen(ce));
pos = insert_hash(hash, ce, &istate->name_hash);
if (pos) {
ce->next = *pos;
*pos = ce;
}
}
static void lazy_init_name_hash(struct index_state *istate)
{
int nr;
if (istate->name_hash_initialized)
return;
for (nr = 0; nr < istate->cache_nr; nr++)
hash_index_entry(istate, istate->cache[nr]);
istate->name_hash_initialized = 1;
}
void add_name_hash(struct index_state *istate, struct cache_entry *ce)
{
ce->ce_flags &= ~CE_UNHASHED;
if (istate->name_hash_initialized)
hash_index_entry(istate, ce);
}
static int slow_same_name(const char *name1, int len1, const char *name2, int len2)
{
if (len1 != len2)
return 0;
while (len1) {
unsigned char c1 = *name1++;
unsigned char c2 = *name2++;
len1--;
if (c1 != c2) {
c1 = toupper(c1);
c2 = toupper(c2);
if (c1 != c2)
return 0;
}
}
return 1;
}
static int same_name(const struct cache_entry *ce, const char *name, int namelen, int icase)
{
int len = ce_namelen(ce);
/*
* Always do exact compare, even if we want a case-ignoring comparison;
* we do the quick exact one first, because it will be the common case.
*/
if (len == namelen && !cache_name_compare(name, namelen, ce->name, len))
return 1;
return icase && slow_same_name(name, namelen, ce->name, len);
}
struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase)
{
unsigned int hash = hash_name(name, namelen);
struct cache_entry *ce;
lazy_init_name_hash(istate);
ce = lookup_hash(hash, &istate->name_hash);
while (ce) {
if (!(ce->ce_flags & CE_UNHASHED)) {
if (same_name(ce, name, namelen, icase))
return ce;
}
ce = ce->next;
}
return NULL;
}

View File

@ -23,80 +23,21 @@
struct index_state the_index; struct index_state the_index;
static unsigned int hash_name(const char *name, int namelen)
{
unsigned int hash = 0x123;
do {
unsigned char c = *name++;
hash = hash*101 + c;
} while (--namelen);
return hash;
}
static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
{
void **pos;
unsigned int hash;
if (ce->ce_flags & CE_HASHED)
return;
ce->ce_flags |= CE_HASHED;
ce->next = NULL;
hash = hash_name(ce->name, ce_namelen(ce));
pos = insert_hash(hash, ce, &istate->name_hash);
if (pos) {
ce->next = *pos;
*pos = ce;
}
}
static void lazy_init_name_hash(struct index_state *istate)
{
int nr;
if (istate->name_hash_initialized)
return;
for (nr = 0; nr < istate->cache_nr; nr++)
hash_index_entry(istate, istate->cache[nr]);
istate->name_hash_initialized = 1;
}
static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce) static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce)
{ {
ce->ce_flags &= ~CE_UNHASHED;
istate->cache[nr] = ce; istate->cache[nr] = ce;
if (istate->name_hash_initialized) add_name_hash(istate, ce);
hash_index_entry(istate, ce);
} }
static void replace_index_entry(struct index_state *istate, int nr, struct cache_entry *ce) static void replace_index_entry(struct index_state *istate, int nr, struct cache_entry *ce)
{ {
struct cache_entry *old = istate->cache[nr]; struct cache_entry *old = istate->cache[nr];
remove_index_entry(old); remove_name_hash(old);
set_index_entry(istate, nr, ce); set_index_entry(istate, nr, ce);
istate->cache_changed = 1; istate->cache_changed = 1;
} }
int index_name_exists(struct index_state *istate, const char *name, int namelen)
{
unsigned int hash = hash_name(name, namelen);
struct cache_entry *ce;
lazy_init_name_hash(istate);
ce = lookup_hash(hash, &istate->name_hash);
while (ce) {
if (!(ce->ce_flags & CE_UNHASHED)) {
if (!cache_name_compare(name, namelen, ce->name, ce->ce_flags))
return 1;
}
ce = ce->next;
}
return 0;
}
/* /*
* This only updates the "non-critical" parts of the directory * This only updates the "non-critical" parts of the directory
* cache, ie the parts that aren't tracked by GIT, and only used * cache, ie the parts that aren't tracked by GIT, and only used
@ -438,7 +379,7 @@ int remove_index_entry_at(struct index_state *istate, int pos)
{ {
struct cache_entry *ce = istate->cache[pos]; struct cache_entry *ce = istate->cache[pos];
remove_index_entry(ce); remove_name_hash(ce);
istate->cache_changed = 1; istate->cache_changed = 1;
istate->cache_nr--; istate->cache_nr--;
if (pos >= istate->cache_nr) if (pos >= istate->cache_nr)
@ -488,11 +429,43 @@ static int index_name_pos_also_unmerged(struct index_state *istate,
return pos; return pos;
} }
static int different_name(struct cache_entry *ce, struct cache_entry *alias)
{
int len = ce_namelen(ce);
return ce_namelen(alias) != len || memcmp(ce->name, alias->name, len);
}
/*
* If we add a filename that aliases in the cache, we will use the
* name that we already have - but we don't want to update the same
* alias twice, because that implies that there were actually two
* different files with aliasing names!
*
* So we use the CE_ADDED flag to verify that the alias was an old
* one before we accept it as
*/
static struct cache_entry *create_alias_ce(struct cache_entry *ce, struct cache_entry *alias)
{
int len;
struct cache_entry *new;
if (alias->ce_flags & CE_ADDED)
die("Will not add file alias '%s' ('%s' already exists in index)", ce->name, alias->name);
/* Ok, create the new entry using the name of the existing alias */
len = ce_namelen(alias);
new = xcalloc(1, cache_entry_size(len));
memcpy(new->name, alias->name, len);
copy_cache_entry(new, ce);
free(ce);
return new;
}
int add_file_to_index(struct index_state *istate, const char *path, int verbose) int add_file_to_index(struct index_state *istate, const char *path, int verbose)
{ {
int size, namelen, pos; int size, namelen;
struct stat st; struct stat st;
struct cache_entry *ce; struct cache_entry *ce, *alias;
unsigned ce_option = CE_MATCH_IGNORE_VALID|CE_MATCH_RACY_IS_DIRTY; unsigned ce_option = CE_MATCH_IGNORE_VALID|CE_MATCH_RACY_IS_DIRTY;
if (lstat(path, &st)) if (lstat(path, &st))
@ -525,18 +498,19 @@ int add_file_to_index(struct index_state *istate, const char *path, int verbose)
ce->ce_mode = ce_mode_from_stat(ent, st.st_mode); ce->ce_mode = ce_mode_from_stat(ent, st.st_mode);
} }
pos = index_name_pos(istate, ce->name, namelen); alias = index_name_exists(istate, ce->name, ce_namelen(ce), ignore_case);
if (0 <= pos && if (alias && !ce_stage(alias) && !ie_match_stat(istate, alias, &st, ce_option)) {
!ce_stage(istate->cache[pos]) &&
!ie_match_stat(istate, istate->cache[pos], &st, ce_option)) {
/* Nothing changed, really */ /* Nothing changed, really */
free(ce); free(ce);
ce_mark_uptodate(istate->cache[pos]); ce_mark_uptodate(alias);
alias->ce_flags |= CE_ADDED;
return 0; return 0;
} }
if (index_path(ce->sha1, path, &st, 1)) if (index_path(ce->sha1, path, &st, 1))
die("unable to index file %s", path); die("unable to index file %s", path);
if (ignore_case && alias && different_name(ce, alias))
ce = create_alias_ce(ce, alias);
ce->ce_flags |= CE_ADDED;
if (add_index_entry(istate, ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE)) if (add_index_entry(istate, ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE))
die("unable to add %s to index",path); die("unable to add %s to index",path);
if (verbose) if (verbose)

View File

@ -79,16 +79,21 @@ static int check_updates(struct unpack_trees_options *o)
for (i = 0; i < index->cache_nr; i++) { for (i = 0; i < index->cache_nr; i++) {
struct cache_entry *ce = index->cache[i]; struct cache_entry *ce = index->cache[i];
if (ce->ce_flags & (CE_UPDATE | CE_REMOVE))
display_progress(progress, ++cnt);
if (ce->ce_flags & CE_REMOVE) { if (ce->ce_flags & CE_REMOVE) {
display_progress(progress, ++cnt);
if (o->update) if (o->update)
unlink_entry(ce->name, last_symlink); unlink_entry(ce->name, last_symlink);
remove_index_entry_at(&o->result, i); remove_index_entry_at(&o->result, i);
i--; i--;
continue; continue;
} }
}
for (i = 0; i < index->cache_nr; i++) {
struct cache_entry *ce = index->cache[i];
if (ce->ce_flags & CE_UPDATE) { if (ce->ce_flags & CE_UPDATE) {
display_progress(progress, ++cnt);
ce->ce_flags &= ~CE_UPDATE; ce->ce_flags &= ~CE_UPDATE;
if (o->update) { if (o->update) {
errs |= checkout_entry(ce, &state, NULL); errs |= checkout_entry(ce, &state, NULL);
@ -520,6 +525,22 @@ static int verify_clean_subdirectory(struct cache_entry *ce, const char *action,
return cnt; return cnt;
} }
/*
* This gets called when there was no index entry for the tree entry 'dst',
* but we found a file in the working tree that 'lstat()' said was fine,
* and we're on a case-insensitive filesystem.
*
* See if we can find a case-insensitive match in the index that also
* matches the stat information, and assume it's that other file!
*/
static int icase_exists(struct unpack_trees_options *o, struct cache_entry *dst, struct stat *st)
{
struct cache_entry *src;
src = index_name_exists(o->src_index, dst->name, ce_namelen(dst), 1);
return src && !ie_match_stat(o->src_index, src, st, CE_MATCH_IGNORE_VALID);
}
/* /*
* We do not want to remove or overwrite a working tree file that * We do not want to remove or overwrite a working tree file that
* is not tracked, unless it is ignored. * is not tracked, unless it is ignored.
@ -538,6 +559,17 @@ static int verify_absent(struct cache_entry *ce, const char *action,
if (!lstat(ce->name, &st)) { if (!lstat(ce->name, &st)) {
int cnt; int cnt;
int dtype = ce_to_dtype(ce); int dtype = ce_to_dtype(ce);
struct cache_entry *result;
/*
* It may be that the 'lstat()' succeeded even though
* target 'ce' was absent, because there is an old
* entry that is different only in case..
*
* Ignore that lstat() if it matches.
*/
if (ignore_case && icase_exists(o, ce, &st))
return 0;
if (o->dir && excluded(o->dir, ce->name, &dtype)) if (o->dir && excluded(o->dir, ce->name, &dtype))
/* /*
@ -581,10 +613,9 @@ static int verify_absent(struct cache_entry *ce, const char *action,
* delete this path, which is in a subdirectory that * delete this path, which is in a subdirectory that
* is being replaced with a blob. * is being replaced with a blob.
*/ */
cnt = index_name_pos(&o->result, ce->name, strlen(ce->name)); result = index_name_exists(&o->result, ce->name, ce_namelen(ce), 0);
if (0 <= cnt) { if (result) {
struct cache_entry *ce = o->result.cache[cnt]; if (result->ce_flags & CE_REMOVE)
if (ce->ce_flags & CE_REMOVE)
return 0; return 0;
} }

View File

@ -9,16 +9,16 @@ typedef int (*merge_fn_t)(struct cache_entry **src,
struct unpack_trees_options *options); struct unpack_trees_options *options);
struct unpack_trees_options { struct unpack_trees_options {
int reset; unsigned int reset:1,
int merge; merge:1,
int update; update:1,
int index_only; index_only:1,
int nontrivial_merge; nontrivial_merge:1,
int trivial_merges_only; trivial_merges_only:1,
int verbose_update; verbose_update:1,
int aggressive; aggressive:1,
int skip_unmerged; skip_unmerged:1,
int gently; gently:1;
const char *prefix; const char *prefix;
int pos; int pos;
struct dir_struct *dir; struct dir_struct *dir;
@ -31,7 +31,7 @@ struct unpack_trees_options {
void *unpack_data; void *unpack_data;
struct index_state *dst_index; struct index_state *dst_index;
const struct index_state *src_index; struct index_state *src_index;
struct index_state result; struct index_state result;
}; };