From b9a62cbeb91e52aea8fc427a84e72f475dfe60cf Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 12 May 2011 16:50:29 -0700 Subject: packed_object_info_detail(): do not return a string Instead return an integer that can be given to typename() if the caller wants a string, just like everybody else does. Signed-off-by: Junio C Hamano diff --git a/builtin/verify-pack.c b/builtin/verify-pack.c index b6079ae..3a919b1 100644 --- a/builtin/verify-pack.c +++ b/builtin/verify-pack.c @@ -33,9 +33,9 @@ static void show_pack_info(struct packed_git *p, unsigned int flags) if (!sha1) die("internal error pack-check nth-packed-object"); offset = nth_packed_object_offset(p, i); - type = packed_object_info_detail(p, offset, &size, &store_size, + type = typename(packed_object_info_detail(p, offset, &size, &store_size, &delta_chain_length, - base_sha1); + base_sha1)); if (!stat_only) printf("%s ", sha1_to_hex(sha1)); if (!delta_chain_length) { diff --git a/cache.h b/cache.h index b1b5bb5..cdb5112 100644 --- a/cache.h +++ b/cache.h @@ -1020,7 +1020,7 @@ extern off_t find_pack_entry_one(const unsigned char *, struct packed_git *); extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *); extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep); extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t); -extern const char *packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *); +extern int packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *); /* Dumb servers support */ extern int update_server_info(int); diff --git a/sha1_file.c b/sha1_file.c index 064a330..4f96eb1 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1549,7 +1549,7 @@ static int unpack_object_header(struct packed_git *p, return type; } -const char *packed_object_info_detail(struct packed_git *p, +int packed_object_info_detail(struct packed_git *p, off_t obj_offset, unsigned long *size, unsigned long *store_size, @@ -1580,7 +1580,7 @@ const char *packed_object_info_detail(struct packed_git *p, case OBJ_BLOB: case OBJ_TAG: unuse_pack(&w_curs); - return typename(type); + return type; case OBJ_OFS_DELTA: obj_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset); if (!obj_offset) -- cgit v0.10.2-6-g49f6 From 9a4905902230c080f0f6a64ed7f0aaa5777d2f5b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 12 May 2011 15:51:38 -0700 Subject: sha1_object_info_extended(): expose a bit more info The original interface for sha1_object_info() takes an object name and gives back a type and its size (the latter is given only when it was asked). The new interface wraps its implementation and exposes a bit more pieces of information that the interface used to discard, namely: - where the object is stored (loose? cached? packed?) - if packed, where in which packfile? Signed-off-by: Junio C Hamano --- * In the earlier round, this used u.pack.delta to record the length of the delta chain, but the caller is not necessarily interested in the length of the delta chain per-se, but may only want to know if it is a delta against another object or is stored as a deflated data. Calling packed_object_info_detail() involves walking the reverse index chain to compute the store size of the object and is unnecessarily expensive. We could resurrect the code if a new caller wants to know, but I doubt it. diff --git a/cache.h b/cache.h index cdb5112..9fbc07e 100644 --- a/cache.h +++ b/cache.h @@ -1022,6 +1022,34 @@ extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsig extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t); extern int packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *); +struct object_info { + /* Request */ + unsigned long *sizep; + + /* Response */ + enum { + OI_CACHED, + OI_LOOSE, + OI_PACKED + } whence; + union { + /* + * struct { + * ... Nothing to expose in this case + * } cached; + * struct { + * ... Nothing to expose in this case + * } loose; + */ + struct { + struct packed_git *pack; + off_t offset; + unsigned int is_delta; + } packed; + } u; +}; +extern int sha1_object_info_extended(const unsigned char *, struct object_info *); + /* Dumb servers support */ extern int update_server_info(int); diff --git a/sha1_file.c b/sha1_file.c index 4f96eb1..7eed316 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1481,7 +1481,7 @@ static off_t get_delta_base(struct packed_git *p, /* forward declaration for a mutually recursive function */ static int packed_object_info(struct packed_git *p, off_t offset, - unsigned long *sizep); + unsigned long *sizep, int *rtype); static int packed_delta_info(struct packed_git *p, struct pack_window **w_curs, @@ -1495,7 +1495,7 @@ static int packed_delta_info(struct packed_git *p, base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset); if (!base_offset) return OBJ_BAD; - type = packed_object_info(p, base_offset, NULL); + type = packed_object_info(p, base_offset, NULL, NULL); if (type <= OBJ_NONE) { struct revindex_entry *revidx; const unsigned char *base_sha1; @@ -1605,7 +1605,7 @@ int packed_object_info_detail(struct packed_git *p, } static int packed_object_info(struct packed_git *p, off_t obj_offset, - unsigned long *sizep) + unsigned long *sizep, int *rtype) { struct pack_window *w_curs = NULL; unsigned long size; @@ -1613,6 +1613,8 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset, enum object_type type; type = unpack_object_header(p, &w_curs, &curpos, &size); + if (rtype) + *rtype = type; /* representation type */ switch (type) { case OBJ_OFS_DELTA: @@ -2093,24 +2095,28 @@ static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *size return status; } -int sha1_object_info(const unsigned char *sha1, unsigned long *sizep) +/* returns enum object_type or negative */ +int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi) { struct cached_object *co; struct pack_entry e; - int status; + int status, rtype; co = find_cached_object(sha1); if (co) { - if (sizep) - *sizep = co->size; + if (oi->sizep) + *(oi->sizep) = co->size; + oi->whence = OI_CACHED; return co->type; } if (!find_pack_entry(sha1, &e)) { /* Most likely it's a loose object. */ - status = sha1_loose_object_info(sha1, sizep); - if (status >= 0) + status = sha1_loose_object_info(sha1, oi->sizep); + if (status >= 0) { + oi->whence = OI_LOOSE; return status; + } /* Not a loose object; someone else may have just packed it. */ reprepare_packed_git(); @@ -2118,15 +2124,29 @@ int sha1_object_info(const unsigned char *sha1, unsigned long *sizep) return status; } - status = packed_object_info(e.p, e.offset, sizep); + status = packed_object_info(e.p, e.offset, oi->sizep, &rtype); if (status < 0) { mark_bad_packed_object(e.p, sha1); - status = sha1_object_info(sha1, sizep); + status = sha1_object_info_extended(sha1, oi); + } else { + oi->whence = OI_PACKED; + oi->u.packed.offset = e.offset; + oi->u.packed.pack = e.p; + oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || + rtype == OBJ_OFS_DELTA); } return status; } +int sha1_object_info(const unsigned char *sha1, unsigned long *sizep) +{ + struct object_info oi; + + oi.sizep = sizep; + return sha1_object_info_extended(sha1, &oi); +} + static void *read_packed_sha1(const unsigned char *sha1, enum object_type *type, unsigned long *size) { -- cgit v0.10.2-6-g49f6 From 5266d369b21f8c260fa60d94fb29c3998c521e4a Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 13 May 2011 13:20:43 -0700 Subject: sha1_object_info_extended(): hint about objects in delta-base cache An object found in the delta-base cache is not guaranteed to stay there, but we know it came from a pack and it is likely to give us a quick access if we read_sha1_file() it right now, which is a piece of useful information. Signed-off-by: Junio C Hamano diff --git a/cache.h b/cache.h index 9fbc07e..3a1af9d 100644 --- a/cache.h +++ b/cache.h @@ -1030,7 +1030,8 @@ struct object_info { enum { OI_CACHED, OI_LOOSE, - OI_PACKED + OI_PACKED, + OI_DBCACHED } whence; union { /* diff --git a/sha1_file.c b/sha1_file.c index 7eed316..1d6f93d 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1697,6 +1697,13 @@ static unsigned long pack_entry_hash(struct packed_git *p, off_t base_offset) return hash % MAX_DELTA_CACHE; } +static int in_delta_base_cache(struct packed_git *p, off_t base_offset) +{ + unsigned long hash = pack_entry_hash(p, base_offset); + struct delta_base_cache_entry *ent = delta_base_cache + hash; + return (ent->data && ent->p == p && ent->base_offset == base_offset); +} + static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset, unsigned long *base_size, enum object_type *type, int keep_cache) { @@ -2128,6 +2135,8 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi) if (status < 0) { mark_bad_packed_object(e.p, sha1); status = sha1_object_info_extended(sha1, oi); + } else if (in_delta_base_cache(e.p, e.offset)) { + oi->whence = OI_DBCACHED; } else { oi->whence = OI_PACKED; oi->u.packed.offset = e.offset; -- cgit v0.10.2-6-g49f6 From f8c8abc5b76ffd763b9c7c5e4fb054358e82ca28 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 13 May 2011 15:33:33 -0700 Subject: unpack_object_header(): make it public This function is used to read and skip over the per-object header in a packfile. Signed-off-by: Junio C Hamano diff --git a/cache.h b/cache.h index 3a1af9d..7650d2e 100644 --- a/cache.h +++ b/cache.h @@ -1021,6 +1021,7 @@ extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsign extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep); extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t); extern int packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *); +extern int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *); struct object_info { /* Request */ diff --git a/sha1_file.c b/sha1_file.c index 1d6f93d..a28683a 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1523,10 +1523,10 @@ static int packed_delta_info(struct packed_git *p, return type; } -static int unpack_object_header(struct packed_git *p, - struct pack_window **w_curs, - off_t *curpos, - unsigned long *sizep) +int unpack_object_header(struct packed_git *p, + struct pack_window **w_curs, + off_t *curpos, + unsigned long *sizep) { unsigned char *base; unsigned int left; -- cgit v0.10.2-6-g49f6 From fd5db55d8b6668a1ff9583a6636a4d54ad9519f2 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 12 May 2011 21:36:42 -0700 Subject: write_entry(): separate two helper functions out In the write-out codepath, a block of code determines what file in the working tree to write to, and opens an output file descriptor to it. After writing the contents out to the file, another block of code runs fstat() on the file descriptor when appropriate. Separate these blocks out to open_output_fd() and fstat_output() helper functions. Signed-off-by: Junio C Hamano diff --git a/entry.c b/entry.c index b017167..cc6502a 100644 --- a/entry.c +++ b/entry.c @@ -91,6 +91,29 @@ static void *read_blob_entry(struct cache_entry *ce, unsigned long *size) return NULL; } +static int open_output_fd(char *path, struct cache_entry *ce, int to_tempfile) +{ + int symlink = (ce->ce_mode & S_IFMT) != S_IFREG; + if (to_tempfile) { + strcpy(path, symlink + ? ".merge_link_XXXXXX" : ".merge_file_XXXXXX"); + return mkstemp(path); + } else { + return create_file(path, !symlink ? ce->ce_mode : 0666); + } +} + +static int fstat_output(int fd, const struct checkout *state, struct stat *st) +{ + /* use fstat() only when path == ce->name */ + if (fstat_is_reliable() && + state->refresh_cache && !state->base_dir_len) { + fstat(fd, st); + return 1; + } + return 0; +} + static int write_entry(struct cache_entry *ce, char *path, const struct checkout *state, int to_tempfile) { unsigned int ce_mode_s_ifmt = ce->ce_mode & S_IFMT; @@ -128,17 +151,7 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout size = newsize; } - if (to_tempfile) { - if (ce_mode_s_ifmt == S_IFREG) - strcpy(path, ".merge_file_XXXXXX"); - else - strcpy(path, ".merge_link_XXXXXX"); - fd = mkstemp(path); - } else if (ce_mode_s_ifmt == S_IFREG) { - fd = create_file(path, ce->ce_mode); - } else { - fd = create_file(path, 0666); - } + fd = open_output_fd(path, ce, to_tempfile); if (fd < 0) { free(new); return error("unable to create file %s (%s)", @@ -146,12 +159,8 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout } wrote = write_in_full(fd, new, size); - /* use fstat() only when path == ce->name */ - if (fstat_is_reliable() && - state->refresh_cache && !to_tempfile && !state->base_dir_len) { - fstat(fd, &st); - fstat_done = 1; - } + if (!to_tempfile) + fstat_done = fstat_output(fd, state, &st); close(fd); free(new); if (wrote != size) -- cgit v0.10.2-6-g49f6 From 46bf043807cc5d8986f41139a8c28491f613c5e0 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 11 May 2011 19:30:25 -0700 Subject: streaming: a new API to read from the object store Given an object name, use open_istream() to get a git_istream handle that you can read_istream() from as if you are using read(2) to read the contents of the object, and close it with close_istream() when you are done. Currently, we do not do anything fancy--it just calls read_sha1_file() and keeps the contents in memory as a whole, and carve it out as you request with read_istream(). Signed-off-by: Junio C Hamano diff --git a/Makefile b/Makefile index 320ccc7..83bd539 100644 --- a/Makefile +++ b/Makefile @@ -552,6 +552,7 @@ LIB_H += sha1-lookup.h LIB_H += sideband.h LIB_H += sigchain.h LIB_H += strbuf.h +LIB_H += streaming.h LIB_H += string-list.h LIB_H += submodule.h LIB_H += tag.h @@ -657,6 +658,7 @@ LIB_OBJS += shallow.o LIB_OBJS += sideband.o LIB_OBJS += sigchain.o LIB_OBJS += strbuf.o +LIB_OBJS += streaming.o LIB_OBJS += string-list.o LIB_OBJS += submodule.o LIB_OBJS += symlinks.o diff --git a/streaming.c b/streaming.c new file mode 100644 index 0000000..13cbce7 --- /dev/null +++ b/streaming.c @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2011, Google Inc. + */ +#include "cache.h" +#include "streaming.h" + +enum input_source { + stream_error = -1, + incore = 0, + loose = 1, + pack_non_delta = 2 +}; + +typedef int (*open_istream_fn)(struct git_istream *, + struct object_info *, + const unsigned char *, + enum object_type *); +typedef int (*close_istream_fn)(struct git_istream *); +typedef ssize_t (*read_istream_fn)(struct git_istream *, char *, size_t); + +struct stream_vtbl { + close_istream_fn close; + read_istream_fn read; +}; + +#define open_method_decl(name) \ + int open_istream_ ##name \ + (struct git_istream *st, struct object_info *oi, \ + const unsigned char *sha1, \ + enum object_type *type) + +#define close_method_decl(name) \ + int close_istream_ ##name \ + (struct git_istream *st) + +#define read_method_decl(name) \ + ssize_t read_istream_ ##name \ + (struct git_istream *st, char *buf, size_t sz) + +/* forward declaration */ +static open_method_decl(incore); +static open_method_decl(loose); +static open_method_decl(pack_non_delta); + +static open_istream_fn open_istream_tbl[] = { + open_istream_incore, + open_istream_loose, + open_istream_pack_non_delta, +}; + +struct git_istream { + const struct stream_vtbl *vtbl; + unsigned long size; /* inflated size of full object */ + + union { + struct { + char *buf; /* from read_object() */ + unsigned long read_ptr; + } incore; + + struct { + int fd; /* open for reading */ + /* NEEDSWORK: what else? */ + } loose; + + struct { + int fd; /* open for reading */ + /* NEEDSWORK: what else? */ + } in_pack; + } u; +}; + +int close_istream(struct git_istream *st) +{ + return st->vtbl->close(st); +} + +ssize_t read_istream(struct git_istream *st, char *buf, size_t sz) +{ + return st->vtbl->read(st, buf, sz); +} + +static enum input_source istream_source(const unsigned char *sha1, + enum object_type *type, + struct object_info *oi) +{ + unsigned long size; + int status; + + oi->sizep = &size; + status = sha1_object_info_extended(sha1, oi); + if (status < 0) + return stream_error; + *type = status; + + switch (oi->whence) { + case OI_LOOSE: + return loose; + case OI_PACKED: + if (!oi->u.packed.is_delta && big_file_threshold <= size) + return pack_non_delta; + /* fallthru */ + default: + return incore; + } +} + +struct git_istream *open_istream(const unsigned char *sha1, + enum object_type *type, + unsigned long *size) +{ + struct git_istream *st; + struct object_info oi; + const unsigned char *real = lookup_replace_object(sha1); + enum input_source src = istream_source(real, type, &oi); + + if (src < 0) + return NULL; + + st = xmalloc(sizeof(*st)); + if (open_istream_tbl[src](st, &oi, real, type)) { + if (open_istream_incore(st, &oi, real, type)) { + free(st); + return NULL; + } + } + *size = st->size; + return st; +} + +/***************************************************************** + * + * Loose object stream + * + *****************************************************************/ + +static open_method_decl(loose) +{ + return -1; /* for now */ +} + + +/***************************************************************** + * + * Non-delta packed object stream + * + *****************************************************************/ + +static open_method_decl(pack_non_delta) +{ + return -1; /* for now */ +} + + +/***************************************************************** + * + * In-core stream + * + *****************************************************************/ + +static close_method_decl(incore) +{ + free(st->u.incore.buf); + return 0; +} + +static read_method_decl(incore) +{ + size_t read_size = sz; + size_t remainder = st->size - st->u.incore.read_ptr; + + if (remainder <= read_size) + read_size = remainder; + if (read_size) { + memcpy(buf, st->u.incore.buf + st->u.incore.read_ptr, read_size); + st->u.incore.read_ptr += read_size; + } + return read_size; +} + +static struct stream_vtbl incore_vtbl = { + close_istream_incore, + read_istream_incore, +}; + +static open_method_decl(incore) +{ + st->u.incore.buf = read_sha1_file_extended(sha1, type, &st->size, 0); + st->u.incore.read_ptr = 0; + st->vtbl = &incore_vtbl; + + return st->u.incore.buf ? 0 : -1; +} diff --git a/streaming.h b/streaming.h new file mode 100644 index 0000000..18cbe68 --- /dev/null +++ b/streaming.h @@ -0,0 +1,15 @@ +/* + * Copyright (c) 2011, Google Inc. + */ +#ifndef STREAMING_H +#define STREAMING_H 1 +#include "cache.h" + +/* opaque */ +struct git_istream; + +extern struct git_istream *open_istream(const unsigned char *, enum object_type *, unsigned long *); +extern int close_istream(struct git_istream *); +extern ssize_t read_istream(struct git_istream *, char *, size_t); + +#endif /* STREAMING_H */ -- cgit v0.10.2-6-g49f6 From dd8e912190540ef6578386aa1343fbbe196cb8c1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 12 May 2011 14:31:08 -0700 Subject: streaming_write_entry(): use streaming API in write_entry() When the output to a path does not have to be converted, we can read from the object database from the streaming API and write to the file in the working tree, without having to hold everything in the memory. The ident, auto- and safe- crlf conversions inherently require you to read the whole thing before deciding what to do, so while it is technically possible to support them by using a buffer of an unbound size or rewinding and reading the stream twice, it is less practical than the traditional "read the whole thing in core and convert" approach. Adding streaming filters for the other conversions on top of this should be doable by tweaking the can_bypass_conversion() function (it should be renamed to can_filter_stream() when it happens). Then the streaming API can be extended to wrap the git_istream streaming_write_entry() opens on the underlying object in another git_istream that reads from it, filters what is read, and let the streaming_write_entry() read the filtered result. But that is outside the scope of this series. Signed-off-by: Junio C Hamano diff --git a/cache.h b/cache.h index 7650d2e..2a7a77f 100644 --- a/cache.h +++ b/cache.h @@ -1156,6 +1156,7 @@ extern int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst, enum safe_crlf checksafe); extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst); extern int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst); +extern int can_bypass_conversion(const char *path); /* add */ /* diff --git a/convert.c b/convert.c index efc7e07..d3c0041 100644 --- a/convert.c +++ b/convert.c @@ -813,3 +813,26 @@ int renormalize_buffer(const char *path, const char *src, size_t len, struct str } return ret | convert_to_git(path, src, len, dst, 0); } + +/* + * You would be crazy to set CRLF, smuge/clean or ident to + * a large binary blob you would want us not to slurp into + * the memory! + */ +int can_bypass_conversion(const char *path) +{ + struct conv_attrs ca; + enum crlf_action crlf_action; + + convert_attrs(&ca, path); + + if (ca.ident || + (ca.drv && (ca.drv->smudge || ca.drv->clean))) + return 0; + + crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr); + if ((crlf_action == CRLF_BINARY) || + (crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE)) + return 1; + return 0; +} diff --git a/entry.c b/entry.c index cc6502a..da37d01 100644 --- a/entry.c +++ b/entry.c @@ -1,6 +1,7 @@ #include "cache.h" #include "blob.h" #include "dir.h" +#include "streaming.h" static void create_directories(const char *path, int path_len, const struct checkout *state) @@ -114,6 +115,50 @@ static int fstat_output(int fd, const struct checkout *state, struct stat *st) return 0; } +static int streaming_write_entry(struct cache_entry *ce, char *path, + const struct checkout *state, int to_tempfile, + int *fstat_done, struct stat *statbuf) +{ + struct git_istream *st; + enum object_type type; + unsigned long sz; + int result = -1; + int fd = -1; + + st = open_istream(ce->sha1, &type, &sz); + if (!st) + return -1; + if (type != OBJ_BLOB) + goto close_and_exit; + + fd = open_output_fd(path, ce, to_tempfile); + if (fd < 0) + goto close_and_exit; + + for (;;) { + char buf[10240]; + ssize_t wrote; + ssize_t readlen = read_istream(st, buf, sizeof(buf)); + + if (!readlen) + break; + + wrote = write_in_full(fd, buf, readlen); + + if (wrote != readlen) + goto close_and_exit; + } + *fstat_done = fstat_output(fd, state, statbuf); + +close_and_exit: + close_istream(st); + if (0 <= fd) + result = close(fd); + if (result && 0 <= fd) + unlink(path); + return result; +} + static int write_entry(struct cache_entry *ce, char *path, const struct checkout *state, int to_tempfile) { unsigned int ce_mode_s_ifmt = ce->ce_mode & S_IFMT; @@ -124,6 +169,12 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout size_t wrote, newsize = 0; struct stat st; + if ((ce_mode_s_ifmt == S_IFREG) && + can_bypass_conversion(path) && + !streaming_write_entry(ce, path, state, to_tempfile, + &fstat_done, &st)) + goto finish; + switch (ce_mode_s_ifmt) { case S_IFREG: case S_IFLNK: @@ -176,6 +227,7 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout return error("unknown file mode for %s in index", path); } +finish: if (state->refresh_cache) { if (!fstat_done) lstat(ce->name, &st); -- cgit v0.10.2-6-g49f6 From b0d9c69f5ee4a626a779d27a33f5565efccd5802 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 20 May 2011 16:14:32 -0700 Subject: convert: CRLF_INPUT is a no-op in the output codepath Signed-off-by: Junio C Hamano diff --git a/convert.c b/convert.c index d3c0041..264af1d 100644 --- a/convert.c +++ b/convert.c @@ -831,7 +831,7 @@ int can_bypass_conversion(const char *path) return 0; crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr); - if ((crlf_action == CRLF_BINARY) || + if ((crlf_action == CRLF_BINARY) || (crlf_action == CRLF_INPUT) || (crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE)) return 1; return 0; -- cgit v0.10.2-6-g49f6 From de6182db67d0f53fdc13256042014f2ddf5f8df3 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 13 May 2011 15:55:00 -0700 Subject: streaming_write_entry(): support files with holes One typical use of a large binary file is to hold a sparse on-disk hash table with a lot of holes. Help preserving the holes with lseek(). Signed-off-by: Junio C Hamano diff --git a/entry.c b/entry.c index da37d01..e2dc16c 100644 --- a/entry.c +++ b/entry.c @@ -123,6 +123,7 @@ static int streaming_write_entry(struct cache_entry *ce, char *path, enum object_type type; unsigned long sz; int result = -1; + ssize_t kept = 0; int fd = -1; st = open_istream(ce->sha1, &type, &sz); @@ -136,18 +137,34 @@ static int streaming_write_entry(struct cache_entry *ce, char *path, goto close_and_exit; for (;;) { - char buf[10240]; - ssize_t wrote; + char buf[1024 * 16]; + ssize_t wrote, holeto; ssize_t readlen = read_istream(st, buf, sizeof(buf)); if (!readlen) break; + if (sizeof(buf) == readlen) { + for (holeto = 0; holeto < readlen; holeto++) + if (buf[holeto]) + break; + if (readlen == holeto) { + kept += holeto; + continue; + } + } + if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1) + goto close_and_exit; + else + kept = 0; wrote = write_in_full(fd, buf, readlen); if (wrote != readlen) goto close_and_exit; } + if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 || + write(fd, "", 1) != 1)) + goto close_and_exit; *fstat_done = fstat_output(fd, state, statbuf); close_and_exit: -- cgit v0.10.2-6-g49f6 From 7ef2d9a2604b0cd554e59f948f2c879b3708dfa5 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 13 May 2011 15:34:58 -0700 Subject: streaming: read non-delta incrementally from a pack Helped-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/streaming.c b/streaming.c index 13cbce7..4fdd567 100644 --- a/streaming.c +++ b/streaming.c @@ -51,6 +51,8 @@ static open_istream_fn open_istream_tbl[] = { struct git_istream { const struct stream_vtbl *vtbl; unsigned long size; /* inflated size of full object */ + z_stream z; + enum { z_unused, z_used, z_done, z_error } z_state; union { struct { @@ -64,8 +66,8 @@ struct git_istream { } loose; struct { - int fd; /* open for reading */ - /* NEEDSWORK: what else? */ + struct packed_git *pack; + off_t pos; } in_pack; } u; }; @@ -128,6 +130,20 @@ struct git_istream *open_istream(const unsigned char *sha1, return st; } + +/***************************************************************** + * + * Common helpers + * + *****************************************************************/ + +static void close_deflated_stream(struct git_istream *st) +{ + if (st->z_state == z_used) + git_inflate_end(&st->z); +} + + /***************************************************************** * * Loose object stream @@ -146,9 +162,92 @@ static open_method_decl(loose) * *****************************************************************/ +static read_method_decl(pack_non_delta) +{ + size_t total_read = 0; + + switch (st->z_state) { + case z_unused: + memset(&st->z, 0, sizeof(st->z)); + git_inflate_init(&st->z); + st->z_state = z_used; + break; + case z_done: + return 0; + case z_error: + return -1; + case z_used: + break; + } + + while (total_read < sz) { + int status; + struct pack_window *window = NULL; + unsigned char *mapped; + + mapped = use_pack(st->u.in_pack.pack, &window, + st->u.in_pack.pos, &st->z.avail_in); + + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + st->z.next_in = mapped; + status = git_inflate(&st->z, Z_FINISH); + + st->u.in_pack.pos += st->z.next_in - mapped; + total_read = st->z.next_out - (unsigned char *)buf; + unuse_pack(&window); + + if (status == Z_STREAM_END) { + git_inflate_end(&st->z); + st->z_state = z_done; + break; + } + if (status != Z_OK && status != Z_BUF_ERROR) { + git_inflate_end(&st->z); + st->z_state = z_error; + return -1; + } + } + return total_read; +} + +static close_method_decl(pack_non_delta) +{ + close_deflated_stream(st); + return 0; +} + +static struct stream_vtbl pack_non_delta_vtbl = { + close_istream_pack_non_delta, + read_istream_pack_non_delta, +}; + static open_method_decl(pack_non_delta) { - return -1; /* for now */ + struct pack_window *window; + enum object_type in_pack_type; + + st->u.in_pack.pack = oi->u.packed.pack; + st->u.in_pack.pos = oi->u.packed.offset; + window = NULL; + + in_pack_type = unpack_object_header(st->u.in_pack.pack, + &window, + &st->u.in_pack.pos, + &st->size); + unuse_pack(&window); + switch (in_pack_type) { + default: + return -1; /* we do not do deltas for now */ + case OBJ_COMMIT: + case OBJ_TREE: + case OBJ_BLOB: + case OBJ_TAG: + break; + } + st->z_state = z_unused; + st->vtbl = &pack_non_delta_vtbl; + return 0; } -- cgit v0.10.2-6-g49f6 From f0270efd460e5ee28dca8481181f1ac1ed4111d1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 14 May 2011 19:42:10 -0700 Subject: sha1_file.c: expose helpers to read loose objects Make map_sha1_file(), parse_sha1_header() and unpack_sha1_header() available to the streaming read API by exporting them via cache.h header file. Signed-off-by: Junio C Hamano diff --git a/cache.h b/cache.h index 2a7a77f..a5067ba 100644 --- a/cache.h +++ b/cache.h @@ -780,6 +780,9 @@ extern int hash_sha1_file(const void *buf, unsigned long len, const char *type, extern int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *return_sha1); extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *); extern int force_object_loose(const unsigned char *sha1, time_t mtime); +extern void *map_sha1_file(const unsigned char *sha1, unsigned long *size); +extern int unpack_sha1_header(z_stream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz); +extern int parse_sha1_header(const char *hdr, unsigned long *sizep); /* global flag to enable extra checks when accessing packed objects */ extern int do_check_packed_object_crc; diff --git a/sha1_file.c b/sha1_file.c index a28683a..5fc877f 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1186,7 +1186,7 @@ static int open_sha1_file(const unsigned char *sha1) return -1; } -static void *map_sha1_file(const unsigned char *sha1, unsigned long *size) +void *map_sha1_file(const unsigned char *sha1, unsigned long *size) { void *map; int fd; @@ -1245,7 +1245,7 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf, return used; } -static int unpack_sha1_header(z_stream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz) +int unpack_sha1_header(z_stream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz) { unsigned long size, used; static const char valid_loose_object_type[8] = { @@ -1342,7 +1342,7 @@ static void *unpack_sha1_rest(z_stream *stream, void *buffer, unsigned long size * too permissive for what we want to check. So do an anal * object header parse by hand. */ -static int parse_sha1_header(const char *hdr, unsigned long *sizep) +int parse_sha1_header(const char *hdr, unsigned long *sizep) { char type[10]; int i; -- cgit v0.10.2-6-g49f6 From 93aa7bd595d37aec09b96da7ea9da89d9f659ebd Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 14 May 2011 19:17:10 -0700 Subject: streaming: read loose objects incrementally Helped-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/streaming.c b/streaming.c index 4fdd567..0602926 100644 --- a/streaming.c +++ b/streaming.c @@ -61,8 +61,11 @@ struct git_istream { } incore; struct { - int fd; /* open for reading */ - /* NEEDSWORK: what else? */ + void *mapped; + unsigned long mapsize; + char hdr[32]; + int hdr_avail; + int hdr_used; } loose; struct { @@ -150,9 +153,85 @@ static void close_deflated_stream(struct git_istream *st) * *****************************************************************/ +static read_method_decl(loose) +{ + size_t total_read = 0; + + switch (st->z_state) { + case z_done: + return 0; + case z_error: + return -1; + default: + break; + } + + if (st->u.loose.hdr_used < st->u.loose.hdr_avail) { + size_t to_copy = st->u.loose.hdr_avail - st->u.loose.hdr_used; + if (sz < to_copy) + to_copy = sz; + memcpy(buf, st->u.loose.hdr + st->u.loose.hdr_used, to_copy); + st->u.loose.hdr_used += to_copy; + total_read += to_copy; + } + + while (total_read < sz) { + int status; + + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + status = git_inflate(&st->z, Z_FINISH); + + total_read = st->z.next_out - (unsigned char *)buf; + + if (status == Z_STREAM_END) { + git_inflate_end(&st->z); + st->z_state = z_done; + break; + } + if (status != Z_OK && status != Z_BUF_ERROR) { + git_inflate_end(&st->z); + st->z_state = z_error; + return -1; + } + } + return total_read; +} + +static close_method_decl(loose) +{ + close_deflated_stream(st); + munmap(st->u.loose.mapped, st->u.loose.mapsize); + return 0; +} + +static struct stream_vtbl loose_vtbl = { + close_istream_loose, + read_istream_loose, +}; + static open_method_decl(loose) { - return -1; /* for now */ + st->u.loose.mapped = map_sha1_file(sha1, &st->u.loose.mapsize); + if (!st->u.loose.mapped) + return -1; + if (unpack_sha1_header(&st->z, + st->u.loose.mapped, + st->u.loose.mapsize, + st->u.loose.hdr, + sizeof(st->u.loose.hdr)) < 0) { + git_inflate_end(&st->z); + munmap(st->u.loose.mapped, st->u.loose.mapsize); + return -1; + } + + parse_sha1_header(st->u.loose.hdr, &st->size); + st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1; + st->u.loose.hdr_avail = st->z.total_out; + st->z_state = z_used; + + st->vtbl = &loose_vtbl; + return 0; } -- cgit v0.10.2-6-g49f6 From 23c7df6bdd13e3d99ca09b6a7655747cc29ccc41 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Thu, 26 May 2011 16:34:20 +0200 Subject: sha1_file: use the correct type (ssize_t, not size_t) for read-style function Using an unsigned type, we would fail to detect a read error and then proceed to try to write (size_t)-1 bytes. Signed-off-by: Jim Meyering Signed-off-by: Junio C Hamano diff --git a/sha1_file.c b/sha1_file.c index 5fc877f..8a85217 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2733,7 +2733,7 @@ static int index_stream(unsigned char *sha1, int fd, size_t size, while (size) { char buf[10240]; size_t sz = size < sizeof(buf) ? size : sizeof(buf); - size_t actual; + ssize_t actual; actual = read_in_full(fd, buf, sz); if (actual < 0) -- cgit v0.10.2-6-g49f6