From c7934306d1504325d26950f35759ad478772e9c3 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Mon, 28 Feb 2011 12:52:39 -0800 Subject: Limit file descriptors used by packs Rather than using 'errno == EMFILE' after a failed open() call to indicate the process is out of file descriptors and an LRU pack window should be closed, place a hard upper limit on the number of open packs based on the actual rlimit of the process. By using a hard upper limit that is below the rlimit of the current process it is not necessary to check for EMFILE on every single fd-allocating system call. Instead reserving 25 file descriptors makes it safe to assume the system call won't fail due to being over the filedescriptor limit. Here 25 is chosen as a WAG, but considers 3 for stdin/stdout/stderr, and at least a few for other Git code to operate on temporary files. An additional 20 is reserved as it is not known what the C library needs to perform other services on Git's behalf, such as nsswitch or name resolution. This fixes a case where running `git gc --auto` in a repository with more than 1024 packs (but an rlimit of 1024 open fds) fails due to the temporary output file not being able to allocate a file descriptor. The output file is opened by pack-objects after object enumeration and delta compression are done, both of which have already opened all of the packs and fully populated the file descriptor table. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano diff --git a/sha1_file.c b/sha1_file.c index 27730c3..c6beed2 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -417,6 +417,8 @@ static unsigned int pack_used_ctr; static unsigned int pack_mmap_calls; static unsigned int peak_pack_open_windows; static unsigned int pack_open_windows; +static unsigned int pack_open_fds; +static unsigned int pack_max_fds; static size_t peak_pack_mapped; static size_t pack_mapped; struct packed_git *packed_git; @@ -596,6 +598,7 @@ static int unuse_one_window(struct packed_git *current, int keep_fd) lru_p->windows = lru_w->next; if (!lru_p->windows && lru_p->pack_fd != keep_fd) { close(lru_p->pack_fd); + pack_open_fds--; lru_p->pack_fd = -1; } } @@ -680,8 +683,10 @@ void free_pack_by_name(const char *pack_name) if (strcmp(pack_name, p->pack_name) == 0) { clear_delta_base_cache(); close_pack_windows(p); - if (p->pack_fd != -1) + if (p->pack_fd != -1) { close(p->pack_fd); + pack_open_fds--; + } close_pack_index(p); free(p->bad_object_sha1); *pp = p->next; @@ -707,9 +712,29 @@ static int open_packed_git_1(struct packed_git *p) if (!p->index_data && open_pack_index(p)) return error("packfile %s index unavailable", p->pack_name); + if (!pack_max_fds) { + struct rlimit lim; + unsigned int max_fds; + + if (getrlimit(RLIMIT_NOFILE, &lim)) + die_errno("cannot get RLIMIT_NOFILE"); + + max_fds = lim.rlim_cur; + + /* Save 3 for stdin/stdout/stderr, 22 for work */ + if (25 < max_fds) + pack_max_fds = max_fds - 25; + else + pack_max_fds = 1; + } + + while (pack_max_fds <= pack_open_fds && unuse_one_window(NULL, -1)) + ; /* nothing */ + p->pack_fd = git_open_noatime(p->pack_name, p); if (p->pack_fd < 0 || fstat(p->pack_fd, &st)) return -1; + pack_open_fds++; /* If we created the struct before we had the pack we lack size. */ if (!p->pack_size) { @@ -761,6 +786,7 @@ static int open_packed_git(struct packed_git *p) return 0; if (p->pack_fd != -1) { close(p->pack_fd); + pack_open_fds--; p->pack_fd = -1; } return -1; @@ -918,6 +944,9 @@ struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path) void install_packed_git(struct packed_git *pack) { + if (pack->pack_fd != -1) + pack_open_fds++; + pack->next = packed_git; packed_git = pack; } @@ -935,8 +964,6 @@ static void prepare_packed_git_one(char *objdir, int local) sprintf(path, "%s/pack", objdir); len = strlen(path); dir = opendir(path); - while (!dir && errno == EMFILE && unuse_one_window(NULL, -1)) - dir = opendir(path); if (!dir) { if (errno != ENOENT) error("unable to open object pack directory: %s: %s", @@ -1092,14 +1119,6 @@ static int git_open_noatime(const char *name, struct packed_git *p) if (fd >= 0) return fd; - /* Might the failure be insufficient file descriptors? */ - if (errno == EMFILE) { - if (unuse_one_window(p, -1)) - continue; - else - return -1; - } - /* Might the failure be due to O_NOATIME? */ if (errno != ENOENT && sha1_file_open_flag) { sha1_file_open_flag = 0; @@ -2359,8 +2378,6 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen, filename = sha1_file_name(sha1); fd = create_tmpfile(tmpfile, sizeof(tmpfile), filename); - while (fd < 0 && errno == EMFILE && unuse_one_window(NULL, -1)) - fd = create_tmpfile(tmpfile, sizeof(tmpfile), filename); if (fd < 0) { if (errno == EACCES) return error("insufficient permission for adding an object to repository database %s\n", get_object_directory()); -- cgit v0.10.2-6-g49f6 From 38abd9b8b4e11aa0b4cdaa5c7b44e0c0ad06820b Mon Sep 17 00:00:00 2001 From: Erik Faye-Lund Date: Mon, 28 Feb 2011 22:13:22 +0100 Subject: mingw: add minimum getrlimit() compatibility stub We don't have getrlimit on Windows :( Limit of 2048 taken from MSDN: http://msdn.microsoft.com/en-us/library/6e3b887c(v=vs.71).aspx Signed-off-by: Erik Faye-Lund diff --git a/compat/mingw.h b/compat/mingw.h index cafc1eb..14211c6 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -233,6 +233,22 @@ int mingw_getpagesize(void); #define getpagesize mingw_getpagesize #endif +struct rlimit { + unsigned int rlim_cur; +}; +#define RLIMIT_NOFILE 0 + +static inline int getrlimit(int resource, struct rlimit *rlp) +{ + if (resource != RLIMIT_NOFILE) { + errno = EINVAL; + return -1; + } + + rlp->rlim_cur = 2048; + return 0; +} + /* Use mingw_lstat() instead of lstat()/stat() and * mingw_fstat() instead of fstat() on Windows. */ -- cgit v0.10.2-6-g49f6 From d131b7afea58f47721dafd013ab6aff4adfc42dd Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Wed, 2 Mar 2011 10:01:54 -0800 Subject: sha1_file.c: Don't retain open fds on small packs If a pack file is small enough that its entire contents fits within one mmap window, mmap the file and then immediately close its file descriptor. This reduces the number of file descriptors that are needed to read from repositories with many tiny pack files, such as one that has received 1000 pushes (and created 1000 small pack files) since its last repack. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano diff --git a/cache.h b/cache.h index 3abf895..663ab58 100644 --- a/cache.h +++ b/cache.h @@ -899,7 +899,8 @@ extern struct packed_git { time_t mtime; int pack_fd; unsigned pack_local:1, - pack_keep:1; + pack_keep:1, + do_not_close:1; unsigned char sha1[20]; /* something like ".git/objects/pack/xxxxx.pack" */ char pack_name[FLEX_ARRAY]; /* more */ diff --git a/fast-import.c b/fast-import.c index 970d847..2369a7b 100644 --- a/fast-import.c +++ b/fast-import.c @@ -871,6 +871,7 @@ static void start_packfile(void) p = xcalloc(1, sizeof(*p) + strlen(tmpfile) + 2); strcpy(p->pack_name, tmpfile); p->pack_fd = pack_fd; + p->do_not_close = 1; pack_file = sha1fd(pack_fd, p->pack_name); hdr.hdr_signature = htonl(PACK_SIGNATURE); diff --git a/sha1_file.c b/sha1_file.c index c6beed2..e194f6a 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -596,7 +596,8 @@ static int unuse_one_window(struct packed_git *current, int keep_fd) lru_l->next = lru_w->next; else { lru_p->windows = lru_w->next; - if (!lru_p->windows && lru_p->pack_fd != keep_fd) { + if (!lru_p->windows && lru_p->pack_fd != -1 + && lru_p->pack_fd != keep_fd) { close(lru_p->pack_fd); pack_open_fds--; lru_p->pack_fd = -1; @@ -812,14 +813,13 @@ unsigned char *use_pack(struct packed_git *p, { struct pack_window *win = *w_cursor; - if (p->pack_fd == -1 && open_packed_git(p)) - die("packfile %s cannot be accessed", p->pack_name); - /* Since packfiles end in a hash of their content and it's * pointless to ask for an offset into the middle of that * hash, and the in_window function above wouldn't match * don't allow an offset too close to the end of the file. */ + if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p)) + die("packfile %s cannot be accessed", p->pack_name); if (offset > (p->pack_size - 20)) die("offset beyond end of packfile (truncated pack?)"); @@ -833,6 +833,10 @@ unsigned char *use_pack(struct packed_git *p, if (!win) { size_t window_align = packed_git_window_size / 2; off_t len; + + if (p->pack_fd == -1 && open_packed_git(p)) + die("packfile %s cannot be accessed", p->pack_name); + win = xcalloc(1, sizeof(*win)); win->offset = (offset / window_align) * window_align; len = p->pack_size - win->offset; @@ -850,6 +854,12 @@ unsigned char *use_pack(struct packed_git *p, die("packfile %s cannot be mapped: %s", p->pack_name, strerror(errno)); + if (!win->offset && win->len == p->pack_size + && !p->do_not_close) { + close(p->pack_fd); + pack_open_fds--; + p->pack_fd = -1; + } pack_mmap_calls++; pack_open_windows++; if (pack_mapped > peak_pack_mapped) @@ -1950,6 +1960,27 @@ off_t find_pack_entry_one(const unsigned char *sha1, return 0; } +static int is_pack_valid(struct packed_git *p) +{ + /* An already open pack is known to be valid. */ + if (p->pack_fd != -1) + return 1; + + /* If the pack has one window completely covering the + * file size, the pack is known to be valid even if + * the descriptor is not currently open. + */ + if (p->windows) { + struct pack_window *w = p->windows; + + if (!w->offset && w->len == p->pack_size) + return 1; + } + + /* Force the pack to open to prove its valid. */ + return !open_packed_git(p); +} + static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e) { static struct packed_git *last_found = (void *)1; @@ -1979,7 +2010,7 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e) * it may have been deleted since the index * was loaded! */ - if (p->pack_fd == -1 && open_packed_git(p)) { + if (!is_pack_valid(p)) { error("packfile %s cannot be accessed", p->pack_name); goto next; } -- cgit v0.10.2-6-g49f6