From b7e58b17b50bbbf00299fc89c480efd7cc92c1bd Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 5 Aug 2006 04:16:02 -0700 Subject: [PATCH 1/2] Racy git: avoid having to be always too careful Immediately after a bulk checkout, most of the paths in the working tree would have the same timestamp as the index file, and this would force ce_match_stat() to take slow path for all of them. When writing an index file out, if many of the paths have very new (read: the same timestamp as the index file being written out) timestamp, we are better off delaying the return from the command, to make sure that later command to touch the working tree files will leave newer timestamps than recorded in the index, thereby avoiding to take the slow path. Signed-off-by: Junio C Hamano --- read-cache.c | 44 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/read-cache.c b/read-cache.c index f92cdaacee..ce76c20f34 100644 --- a/read-cache.c +++ b/read-cache.c @@ -5,6 +5,7 @@ */ #include "cache.h" #include "cache-tree.h" +#include /* Index extensions. * @@ -923,7 +924,7 @@ static void ce_smudge_racily_clean_entry(struct cache_entry *ce) * $ echo filfre >nitfol * $ git-update-index --add nitfol * - * but it does not. Whe the second update-index runs, + * but it does not. When the second update-index runs, * it notices that the entry "frotz" has the same timestamp * as index, and if we were to smudge it by resetting its * size to zero here, then the object name recorded @@ -945,7 +946,9 @@ int write_cache(int newfd, struct cache_entry **cache, int entries) { SHA_CTX c; struct cache_header hdr; - int i, removed; + int i, removed, recent; + struct stat st; + time_t now; for (i = removed = 0; i < entries; i++) if (!cache[i]->ce_mode) @@ -959,15 +962,19 @@ int write_cache(int newfd, struct cache_entry **cache, int entries) if (ce_write(&c, newfd, &hdr, sizeof(hdr)) < 0) return -1; + now = fstat(newfd, &st) ? 0 : st.st_mtime; + recent = 0; for (i = 0; i < entries; i++) { struct cache_entry *ce = cache[i]; + time_t entry_time = (time_t) ntohl(ce->ce_mtime.sec); if (!ce->ce_mode) continue; - if (index_file_timestamp && - index_file_timestamp <= ntohl(ce->ce_mtime.sec)) + if (index_file_timestamp && index_file_timestamp <= entry_time) ce_smudge_racily_clean_entry(ce); if (ce_write(&c, newfd, ce, ce_size(ce)) < 0) return -1; + if (now && now <= entry_time) + recent++; } /* Write extension data here */ @@ -983,5 +990,34 @@ int write_cache(int newfd, struct cache_entry **cache, int entries) return -1; } } + + /* + * To prevent later ce_match_stat() from always falling into + * check_fs(), if we have too many entries that can trigger + * racily clean check, we are better off delaying the return. + * We arbitrarily say if more than 20 paths or 25% of total + * paths are very new, we delay the return until the index + * file gets a new timestamp. + * + * NOTE! NOTE! NOTE! + * + * This assumes that nobody is touching the working tree while + * we are updating the index. + */ + if (20 < recent || entries <= recent * 4) { + now = fstat(newfd, &st) ? 0 : st.st_mtime; + while (now && !fstat(newfd, &st) && st.st_mtime <= now) { + struct timespec rq, rm; + off_t where = lseek(newfd, 0, SEEK_CUR); + rq.tv_sec = 0; + rq.tv_nsec = 250000000; + nanosleep(&rq, &rm); + if ((where == (off_t) -1) || + (write(newfd, "", 1) != 1) || + (lseek(newfd, -1, SEEK_CUR) != where) || + ftruncate(newfd, where)) + break; + } + } return ce_flush(&c, newfd); } From 6015c28b1d6163f124332769989326ee470afbb6 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 8 Aug 2006 14:47:32 -0700 Subject: [PATCH 2/2] read-cache: tweak racy-git delay logic Instead of looping over the entries and writing out, use a separate loop after all entries have been written out to check how many entries are racily clean. Make sure that the newly created index file gets the right timestamp when we check by flushing the buffered data by ce_write(). Signed-off-by: Junio C Hamano --- read-cache.c | 73 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 21 deletions(-) diff --git a/read-cache.c b/read-cache.c index ce76c20f34..c923a32707 100644 --- a/read-cache.c +++ b/read-cache.c @@ -841,6 +841,18 @@ unmap: static unsigned char write_buffer[WRITE_BUFFER_SIZE]; static unsigned long write_buffer_len; +static int ce_write_flush(SHA_CTX *context, int fd) +{ + unsigned int buffered = write_buffer_len; + if (buffered) { + SHA1_Update(context, write_buffer, buffered); + if (write(fd, write_buffer, buffered) != buffered) + return -1; + write_buffer_len = 0; + } + return 0; +} + static int ce_write(SHA_CTX *context, int fd, void *data, unsigned int len) { while (len) { @@ -851,8 +863,8 @@ static int ce_write(SHA_CTX *context, int fd, void *data, unsigned int len) memcpy(write_buffer + buffered, data, partial); buffered += partial; if (buffered == WRITE_BUFFER_SIZE) { - SHA1_Update(context, write_buffer, WRITE_BUFFER_SIZE); - if (write(fd, write_buffer, WRITE_BUFFER_SIZE) != WRITE_BUFFER_SIZE) + write_buffer_len = buffered; + if (ce_write_flush(context, fd)) return -1; buffered = 0; } @@ -962,19 +974,15 @@ int write_cache(int newfd, struct cache_entry **cache, int entries) if (ce_write(&c, newfd, &hdr, sizeof(hdr)) < 0) return -1; - now = fstat(newfd, &st) ? 0 : st.st_mtime; - recent = 0; for (i = 0; i < entries; i++) { struct cache_entry *ce = cache[i]; - time_t entry_time = (time_t) ntohl(ce->ce_mtime.sec); if (!ce->ce_mode) continue; - if (index_file_timestamp && index_file_timestamp <= entry_time) + if (index_file_timestamp && + index_file_timestamp <= ntohl(ce->ce_mtime.sec)) ce_smudge_racily_clean_entry(ce); if (ce_write(&c, newfd, ce, ce_size(ce)) < 0) return -1; - if (now && now <= entry_time) - recent++; } /* Write extension data here */ @@ -1004,19 +1012,42 @@ int write_cache(int newfd, struct cache_entry **cache, int entries) * This assumes that nobody is touching the working tree while * we are updating the index. */ - if (20 < recent || entries <= recent * 4) { - now = fstat(newfd, &st) ? 0 : st.st_mtime; - while (now && !fstat(newfd, &st) && st.st_mtime <= now) { - struct timespec rq, rm; - off_t where = lseek(newfd, 0, SEEK_CUR); - rq.tv_sec = 0; - rq.tv_nsec = 250000000; - nanosleep(&rq, &rm); - if ((where == (off_t) -1) || - (write(newfd, "", 1) != 1) || - (lseek(newfd, -1, SEEK_CUR) != where) || - ftruncate(newfd, where)) - break; + + /* Make sure that the new index file has st_mtime + * that is current enough -- ce_write() batches the data + * so it might not have written anything yet. + */ + ce_write_flush(&c, newfd); + + now = fstat(newfd, &st) ? 0 : st.st_mtime; + if (now) { + recent = 0; + for (i = 0; i < entries; i++) { + struct cache_entry *ce = cache[i]; + time_t entry_time = (time_t) ntohl(ce->ce_mtime.sec); + if (!ce->ce_mode) + continue; + if (now && now <= entry_time) + recent++; + } + if (20 < recent && entries <= recent * 4) { +#if 0 + fprintf(stderr, "entries %d\n", entries); + fprintf(stderr, "recent %d\n", recent); + fprintf(stderr, "now %lu\n", now); +#endif + while (!fstat(newfd, &st) && st.st_mtime <= now) { + struct timespec rq, rm; + off_t where = lseek(newfd, 0, SEEK_CUR); + rq.tv_sec = 0; + rq.tv_nsec = 250000000; + nanosleep(&rq, &rm); + if ((where == (off_t) -1) || + (write(newfd, "", 1) != 1) || + (lseek(newfd, -1, SEEK_CUR) != where) || + ftruncate(newfd, where)) + break; + } } } return ce_flush(&c, newfd);