From 3371f9b3fb13a508c447e484618b9b5e2b9e45a7 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 27 Mar 2011 13:13:22 -0500 Subject: Makefile: list one vcs-svn/xdiff object or header per line As the svn import infrastructure evolves, it's getting to be a pain to tell by eye what files were added or removed from a dependency line like VCSSVN_OBJS = vcs-svn/string_pool.o vcs-svn/line_buffer.o \ vcs-svn/repo_tree.o vcs-svn/fast_export.o vcs-svn/svndump.o So use a style with one entry per line instead, like the existing BUILTIN_OBJS: # protect against environment VCSSVN_OBJS = ... VCSSVN_OBJS += vcs-svn/string_pool.o VCSSVN_OBJS += vcs-svn/line_buffer.o ... which is readable on its own and produces nice, clear diffs. Signed-off-by: Jonathan Nieder diff --git a/Makefile b/Makefile index ade7923..dd1af36 100644 --- a/Makefile +++ b/Makefile @@ -341,6 +341,11 @@ BUILTIN_OBJS = BUILT_INS = COMPAT_CFLAGS = COMPAT_OBJS = +XDIFF_H = +XDIFF_OBJS = +VCSSVN_H = +VCSSVN_OBJS = +VCSSVN_TEST_OBJS = EXTRA_CPPFLAGS = LIB_H = LIB_OBJS = @@ -1836,12 +1841,25 @@ GIT_OBJS := $(LIB_OBJS) $(BUILTIN_OBJS) $(PROGRAM_OBJS) $(TEST_OBJS) \ ifndef NO_CURL GIT_OBJS += http.o http-walker.o remote-curl.o endif -XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \ - xdiff/xmerge.o xdiff/xpatience.o -VCSSVN_OBJS = vcs-svn/string_pool.o vcs-svn/line_buffer.o \ - vcs-svn/repo_tree.o vcs-svn/fast_export.o vcs-svn/svndump.o -VCSSVN_TEST_OBJS = test-obj-pool.o test-string-pool.o \ - test-line-buffer.o test-treap.o + +XDIFF_OBJS += xdiff/xdiffi.o +XDIFF_OBJS += xdiff/xprepare.o +XDIFF_OBJS += xdiff/xutils.o +XDIFF_OBJS += xdiff/xemit.o +XDIFF_OBJS += xdiff/xmerge.o +XDIFF_OBJS += xdiff/xpatience.o + +VCSSVN_OBJS += vcs-svn/string_pool.o +VCSSVN_OBJS += vcs-svn/line_buffer.o +VCSSVN_OBJS += vcs-svn/repo_tree.o +VCSSVN_OBJS += vcs-svn/fast_export.o +VCSSVN_OBJS += vcs-svn/svndump.o + +VCSSVN_TEST_OBJS += test-obj-pool.o +VCSSVN_TEST_OBJS += test-string-pool.o +VCSSVN_TEST_OBJS += test-line-buffer.o +VCSSVN_TEST_OBJS += test-treap.o + OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS) dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d) @@ -1960,16 +1978,26 @@ connect.o transport.o http-backend.o: url.h http-fetch.o http-walker.o remote-curl.o transport.o walker.o: walker.h http.o http-walker.o http-push.o http-fetch.o remote-curl.o: http.h url.h -xdiff-interface.o $(XDIFF_OBJS): \ - xdiff/xinclude.h xdiff/xmacros.h xdiff/xdiff.h xdiff/xtypes.h \ - xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h - -$(VCSSVN_OBJS) $(VCSSVN_TEST_OBJS): $(LIB_H) \ - vcs-svn/obj_pool.h vcs-svn/trp.h vcs-svn/string_pool.h \ - vcs-svn/line_buffer.h vcs-svn/repo_tree.h vcs-svn/fast_export.h \ - vcs-svn/svndump.h - -test-svn-fe.o: vcs-svn/svndump.h +XDIFF_H += xdiff/xinclude.h +XDIFF_H += xdiff/xmacros.h +XDIFF_H += xdiff/xdiff.h +XDIFF_H += xdiff/xtypes.h +XDIFF_H += xdiff/xutils.h +XDIFF_H += xdiff/xprepare.h +XDIFF_H += xdiff/xdiffi.h +XDIFF_H += xdiff/xemit.h + +xdiff-interface.o $(XDIFF_OBJS): $(XDIFF_H) + +VCSSVN_H += vcs-svn/obj_pool.h +VCSSVN_H += vcs-svn/trp.h +VCSSVN_H += vcs-svn/string_pool.h +VCSSVN_H += vcs-svn/line_buffer.h +VCSSVN_H += vcs-svn/repo_tree.h +VCSSVN_H += vcs-svn/fast_export.h +VCSSVN_H += vcs-svn/svndump.h + +$(VCSSVN_OBJS) $(VCSSVN_TEST_OBJS): $(LIB_H) $(VCSSVN_H) endif exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \ -- cgit v0.10.2-6-g49f6 From 9d2f5ddfe56fcc228a36dd079f0897e0f474eb4e Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 2 Jan 2011 21:54:58 -0600 Subject: vcs-svn: learn to maintain a sliding view of a file Each section of a Subversion-format delta only requires examining (and keeping in random-access memory) a small portion of the preimage. At any moment, this portion starts at a certain file offset and has a well-defined length, and as the delta is applied, the portion advances from the beginning to the end of the preimage. Add a move_window function to keep track of this view into the preimage. You can use it like this: buffer_init(f, NULL); struct sliding_view window = SLIDING_VIEW_INIT(f); move_window(&window, 3, 7); /* (1) */ move_window(&window, 5, 5); /* (2) */ move_window(&window, 12, 2); /* (3) */ strbuf_release(&window.buf); buffer_deinit(f); The data structure is called sliding_view instead of _window to prevent confusion with svndiff0 Windows. In this example, (1) reads 10 bytes and discards the first 3; (2) discards the first 2, which are not needed any more; and (3) skips 2 bytes and reads 2 new bytes to work with. When move_window returns, the file position indicator is at position window->off + window->width and the data from positions window->off to the current file position are stored in window->buf. This function performs only sequential access from the input file and never seeks, so it can be safely used on pipes and sockets. On end-of-file, move_window silently reads less than the caller requested. On other errors, it prints a message and returns -1. Helped-by: David Barr Signed-off-by: Jonathan Nieder diff --git a/Makefile b/Makefile index dd1af36..5cd141d 100644 --- a/Makefile +++ b/Makefile @@ -1851,6 +1851,7 @@ XDIFF_OBJS += xdiff/xpatience.o VCSSVN_OBJS += vcs-svn/string_pool.o VCSSVN_OBJS += vcs-svn/line_buffer.o +VCSSVN_OBJS += vcs-svn/sliding_window.o VCSSVN_OBJS += vcs-svn/repo_tree.o VCSSVN_OBJS += vcs-svn/fast_export.o VCSSVN_OBJS += vcs-svn/svndump.o @@ -1993,6 +1994,7 @@ VCSSVN_H += vcs-svn/obj_pool.h VCSSVN_H += vcs-svn/trp.h VCSSVN_H += vcs-svn/string_pool.h VCSSVN_H += vcs-svn/line_buffer.h +VCSSVN_H += vcs-svn/sliding_window.h VCSSVN_H += vcs-svn/repo_tree.h VCSSVN_H += vcs-svn/fast_export.h VCSSVN_H += vcs-svn/svndump.h diff --git a/vcs-svn/LICENSE b/vcs-svn/LICENSE index 0a5e3c4..805882c 100644 --- a/vcs-svn/LICENSE +++ b/vcs-svn/LICENSE @@ -1,6 +1,8 @@ Copyright (C) 2010 David Barr . All rights reserved. +Copyright (C) 2010 Jonathan Nieder . + Copyright (C) 2008 Jason Evans . All rights reserved. diff --git a/vcs-svn/sliding_window.c b/vcs-svn/sliding_window.c new file mode 100644 index 0000000..1b8d987 --- /dev/null +++ b/vcs-svn/sliding_window.c @@ -0,0 +1,77 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "sliding_window.h" +#include "line_buffer.h" +#include "strbuf.h" + +static int input_error(struct line_buffer *file) +{ + if (!buffer_ferror(file)) + return error("delta preimage ends early"); + return error("cannot read delta preimage: %s", strerror(errno)); +} + +static int skip_or_whine(struct line_buffer *file, off_t gap) +{ + if (buffer_skip_bytes(file, gap) != gap) + return input_error(file); + return 0; +} + +static int read_to_fill_or_whine(struct line_buffer *file, + struct strbuf *buf, size_t width) +{ + buffer_read_binary(file, buf, width - buf->len); + if (buf->len != width) + return input_error(file); + return 0; +} + +static int check_overflow(off_t a, size_t b) +{ + if (b > maximum_signed_value_of_type(off_t)) + return error("unrepresentable length in delta: " + "%"PRIuMAX" > OFF_MAX", (uintmax_t) b); + if (signed_add_overflows(a, (off_t) b)) + return error("unrepresentable offset in delta: " + "%"PRIuMAX" + %"PRIuMAX" > OFF_MAX", + (uintmax_t) a, (uintmax_t) b); + return 0; +} + +int move_window(struct sliding_view *view, off_t off, size_t width) +{ + off_t file_offset; + assert(view); + assert(view->width <= view->buf.len); + assert(!check_overflow(view->off, view->buf.len)); + + if (check_overflow(off, width)) + return -1; + if (off < view->off || off + width < view->off + view->width) + return error("invalid delta: window slides left"); + + file_offset = view->off + view->buf.len; + if (off < file_offset) { + /* Move the overlapping region into place. */ + strbuf_remove(&view->buf, 0, off - view->off); + } else { + /* Seek ahead to skip the gap. */ + if (skip_or_whine(view->file, off - file_offset)) + return -1; + strbuf_setlen(&view->buf, 0); + } + + if (view->buf.len > width) + ; /* Already read. */ + else if (read_to_fill_or_whine(view->file, &view->buf, width)) + return -1; + + view->off = off; + view->width = width; + return 0; +} diff --git a/vcs-svn/sliding_window.h b/vcs-svn/sliding_window.h new file mode 100644 index 0000000..ed0bfdd --- /dev/null +++ b/vcs-svn/sliding_window.h @@ -0,0 +1,17 @@ +#ifndef SLIDING_WINDOW_H_ +#define SLIDING_WINDOW_H_ + +#include "strbuf.h" + +struct sliding_view { + struct line_buffer *file; + off_t off; + size_t width; + struct strbuf buf; +}; + +#define SLIDING_VIEW_INIT(input) { (input), 0, 0, STRBUF_INIT } + +extern int move_window(struct sliding_view *view, off_t off, size_t width); + +#endif -- cgit v0.10.2-6-g49f6 From 896e4bfcec4f6b489aba2197f60a59bc7f45a8ac Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 2 Jan 2011 21:37:36 -0600 Subject: vcs-svn: make buffer_read_binary API more convenient buffer_read_binary is a thin wrapper around fread, but its signature is wrong: - fread can fill an arbitrary in-memory buffer. buffer_read_binary is limited to buffers whose size is representable by a 32-bit integer. - The result from fread is the number of bytes actually read. buffer_read_binary only reports the number of bytes read by incrementing sb->len by that amount and returns void. Fix both: let buffer_read_binary accept a size_t instead of uint32_t for the number of bytes to read and as a convenience return the number of bytes actually read. Signed-off-by: Jonathan Nieder Signed-off-by: David Barr Signed-off-by: Jonathan Nieder diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index c390387..01fcb84 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -91,10 +91,10 @@ char *buffer_read_line(struct line_buffer *buf) return buf->line_buffer; } -void buffer_read_binary(struct line_buffer *buf, - struct strbuf *sb, uint32_t size) +size_t buffer_read_binary(struct line_buffer *buf, + struct strbuf *sb, size_t size) { - strbuf_fread(sb, size, buf->infile); + return strbuf_fread(sb, size, buf->infile); } off_t buffer_copy_bytes(struct line_buffer *buf, off_t nbytes) diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index d0b22dd..8901f21 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -23,7 +23,7 @@ long buffer_tmpfile_prepare_to_read(struct line_buffer *buf); int buffer_ferror(struct line_buffer *buf); char *buffer_read_line(struct line_buffer *buf); int buffer_read_char(struct line_buffer *buf); -void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len); +size_t buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, size_t len); /* Returns number of bytes read (not necessarily written). */ off_t buffer_copy_bytes(struct line_buffer *buf, off_t len); off_t buffer_skip_bytes(struct line_buffer *buf, off_t len); -- cgit v0.10.2-6-g49f6 From ddcc8c5b469d2564dbacd629a873e7703f2dbd83 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sat, 25 Dec 2010 05:11:32 -0600 Subject: vcs-svn: skeleton of an svn delta parser A delta in the subversion delta (svndiff0) format consists of the magic bytes SVN\0 followed by a sequence of windows of a certain well specified format (starting with five integers). Add an svndiff0_apply function and test-svn-fe -d commandline tool to parse such a delta in the special case of not including any windows. Later patches will add features to turn this into a fully functional delta applier for svn-fe to use to parse the streams produced by "svnrdump dump" and "svnadmin dump --deltas". The content of symlinks starts with the word "link " in Subversion's worldview, so we need to be able to prepend that text to input for the sake of delta application. So initialization of the input state of the delta preimage is left to the calling program, giving callers a chance to seed the buffer with text of their choice. Improved-by: Ramkumar Ramachandra Improved-by: David Barr Signed-off-by: Jonathan Nieder diff --git a/Makefile b/Makefile index 5cd141d..cd7dd97 100644 --- a/Makefile +++ b/Makefile @@ -1854,6 +1854,7 @@ VCSSVN_OBJS += vcs-svn/line_buffer.o VCSSVN_OBJS += vcs-svn/sliding_window.o VCSSVN_OBJS += vcs-svn/repo_tree.o VCSSVN_OBJS += vcs-svn/fast_export.o +VCSSVN_OBJS += vcs-svn/svndiff.o VCSSVN_OBJS += vcs-svn/svndump.o VCSSVN_TEST_OBJS += test-obj-pool.o @@ -1997,6 +1998,7 @@ VCSSVN_H += vcs-svn/line_buffer.h VCSSVN_H += vcs-svn/sliding_window.h VCSSVN_H += vcs-svn/repo_tree.h VCSSVN_H += vcs-svn/fast_export.h +VCSSVN_H += vcs-svn/svndiff.h VCSSVN_H += vcs-svn/svndump.h $(VCSSVN_OBJS) $(VCSSVN_TEST_OBJS): $(LIB_H) $(VCSSVN_H) diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh new file mode 100755 index 0000000..ee0c1e2 --- /dev/null +++ b/t/t9011-svn-da.sh @@ -0,0 +1,35 @@ +#!/bin/sh + +test_description='test parsing of svndiff0 files + +Using the "test-svn-fe -d" helper, check that svn-fe correctly +interprets deltas using various facilities (some from the spec, +some only learned from practice). +' +. ./test-lib.sh + +>empty +printf foo >preimage + +test_expect_success 'reject empty delta' ' + test_must_fail test-svn-fe -d preimage empty 0 +' + +test_expect_success 'delta can empty file' ' + printf "SVNQ" | q_to_nul >clear.delta && + test-svn-fe -d preimage clear.delta 4 >actual && + test_cmp empty actual +' + +test_expect_success 'reject svndiff2' ' + printf "SVN\002" >bad.filetype && + test_must_fail test-svn-fe -d preimage bad.filetype 4 +' + +test_expect_failure 'one-window empty delta' ' + printf "SVNQ%s" "QQQQQ" | q_to_nul >clear.onewindow && + test-svn-fe -d preimage clear.onewindow 9 >actual && + test_cmp empty actual +' + +test_done diff --git a/test-svn-fe.c b/test-svn-fe.c index b42ba78..66bd040 100644 --- a/test-svn-fe.c +++ b/test-svn-fe.c @@ -4,15 +4,41 @@ #include "git-compat-util.h" #include "vcs-svn/svndump.h" +#include "vcs-svn/svndiff.h" +#include "vcs-svn/sliding_window.h" +#include "vcs-svn/line_buffer.h" int main(int argc, char *argv[]) { - if (argc != 2) - usage("test-svn-fe "); - if (svndump_init(argv[1])) - return 1; - svndump_read(NULL); - svndump_deinit(); - svndump_reset(); - return 0; + static const char test_svnfe_usage[] = + "test-svn-fe ( | [-d] )"; + if (argc == 2) { + if (svndump_init(argv[1])) + return 1; + svndump_read(NULL); + svndump_deinit(); + svndump_reset(); + return 0; + } + if (argc == 5 && !strcmp(argv[1], "-d")) { + struct line_buffer preimage = LINE_BUFFER_INIT; + struct line_buffer delta = LINE_BUFFER_INIT; + struct sliding_view preimage_view = SLIDING_VIEW_INIT(&preimage); + if (buffer_init(&preimage, argv[2])) + die_errno("cannot open preimage"); + if (buffer_init(&delta, argv[3])) + die_errno("cannot open delta"); + if (svndiff0_apply(&delta, (off_t) strtoull(argv[4], NULL, 0), + &preimage_view, stdout)) + return 1; + if (buffer_deinit(&preimage)) + die_errno("cannot close preimage"); + if (buffer_deinit(&delta)) + die_errno("cannot close delta"); + buffer_reset(&preimage); + strbuf_release(&preimage_view.buf); + buffer_reset(&delta); + return 0; + } + usage(test_svnfe_usage); } diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c new file mode 100644 index 0000000..5916036 --- /dev/null +++ b/vcs-svn/svndiff.c @@ -0,0 +1,52 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "line_buffer.h" +#include "svndiff.h" + +/* + * svndiff0 applier + * + * See http://svn.apache.org/repos/asf/subversion/trunk/notes/svndiff. + * + * svndiff0 ::= 'SVN\0' window* + */ + +static int error_short_read(struct line_buffer *input) +{ + if (buffer_ferror(input)) + return error("error reading delta: %s", strerror(errno)); + return error("invalid delta: unexpected end of file"); +} + +static int read_magic(struct line_buffer *in, off_t *len) +{ + static const char magic[] = {'S', 'V', 'N', '\0'}; + struct strbuf sb = STRBUF_INIT; + + if (*len < sizeof(magic) || + buffer_read_binary(in, &sb, sizeof(magic)) != sizeof(magic)) + return error_short_read(in); + + if (memcmp(sb.buf, magic, sizeof(magic))) + return error("invalid delta: unrecognized file type"); + + *len -= sizeof(magic); + strbuf_release(&sb); + return 0; +} + +int svndiff0_apply(struct line_buffer *delta, off_t delta_len, + struct sliding_view *preimage, FILE *postimage) +{ + assert(delta && preimage && postimage); + + if (read_magic(delta, &delta_len)) + return -1; + if (delta_len) + return error("What do you think I am? A delta applier?"); + return 0; +} diff --git a/vcs-svn/svndiff.h b/vcs-svn/svndiff.h new file mode 100644 index 0000000..74eb464 --- /dev/null +++ b/vcs-svn/svndiff.h @@ -0,0 +1,10 @@ +#ifndef SVNDIFF_H_ +#define SVNDIFF_H_ + +struct line_buffer; +struct sliding_view; + +extern int svndiff0_apply(struct line_buffer *delta, off_t delta_len, + struct sliding_view *preimage, FILE *postimage); + +#endif -- cgit v0.10.2-6-g49f6 From 252712111fad127db365e3dd764309fe5658679a Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:21:43 -0500 Subject: vcs-svn: parse svndiff0 window header Each window in a subversion delta (svndiff0-format file) starts with a window header, consisting of five integers with variable-length representation: source view offset source view length output length instructions length auxiliary data length Parse it. The result is not usable for deltas with nonempty postimage yet; in fact, this only adds support for deltas without any instructions or auxiliary data. This is a good place to stop, though, since that little support lets us add some simple passing tests concerning error handling to the test suite. Improved-by: Ramkumar Ramachandra Improved-by: David Barr Signed-off-by: Jonathan Nieder diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh index ee0c1e2..be12340 100755 --- a/t/t9011-svn-da.sh +++ b/t/t9011-svn-da.sh @@ -26,10 +26,64 @@ test_expect_success 'reject svndiff2' ' test_must_fail test-svn-fe -d preimage bad.filetype 4 ' -test_expect_failure 'one-window empty delta' ' +test_expect_success 'one-window empty delta' ' printf "SVNQ%s" "QQQQQ" | q_to_nul >clear.onewindow && test-svn-fe -d preimage clear.onewindow 9 >actual && test_cmp empty actual ' +test_expect_success 'reject incomplete window header' ' + printf "SVNQ%s" "QQQQQ" | q_to_nul >clear.onewindow && + printf "SVNQ%s" "QQ" | q_to_nul >clear.partialwindow && + test_must_fail test-svn-fe -d preimage clear.onewindow 6 && + test_must_fail test-svn-fe -d preimage clear.partialwindow 6 +' + +test_expect_success 'reject declared delta longer than actual delta' ' + printf "SVNQ%s" "QQQQQ" | q_to_nul >clear.onewindow && + printf "SVNQ%s" "QQ" | q_to_nul >clear.partialwindow && + test_must_fail test-svn-fe -d preimage clear.onewindow 14 && + test_must_fail test-svn-fe -d preimage clear.partialwindow 9 +' + +test_expect_success 'two-window empty delta' ' + printf "SVNQ%s%s" "QQQQQ" "QQQQQ" | q_to_nul >clear.twowindow && + test-svn-fe -d preimage clear.twowindow 14 >actual && + test_must_fail test-svn-fe -d preimage clear.twowindow 13 && + test_cmp empty actual +' + +test_expect_success 'noisy zeroes' ' + printf "SVNQ%s" \ + "RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRQQQQQ" | + tr R "\200" | + q_to_nul >clear.noisy && + len=$(wc -c clear.badmagic && + test_must_fail test-svn-fe -d preimage clear.badmagic 5 +' + +test_expect_success 'reject truncated integer' ' + printf "SVNQ%s%s" "QQQQQ" "QQQQRRQ" | + tr R "\200" | + q_to_nul >clear.fullint && + printf "SVNQ%s%s" "QQQQQ" "QQQQRR" | + tr RT "\201" | + q_to_nul >clear.partialint && + test_must_fail test-svn-fe -d preimage clear.fullint 15 && + test-svn-fe -d preimage clear.fullint 16 && + test_must_fail test-svn-fe -d preimage clear.partialint 15 +' + +test_expect_success 'nonempty (but unused) preimage view' ' + printf "SVNQ%b" "Q\003QQQ" | q_to_nul >clear.readpreimage && + test-svn-fe -d preimage clear.readpreimage 9 >actual && + test_cmp empty actual +' + test_done diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c index 5916036..249efb6 100644 --- a/vcs-svn/svndiff.c +++ b/vcs-svn/svndiff.c @@ -13,8 +13,16 @@ * See http://svn.apache.org/repos/asf/subversion/trunk/notes/svndiff. * * svndiff0 ::= 'SVN\0' window* + * window ::= int int int int int instructions inline_data; + * int ::= highdigit* lowdigit; + * highdigit ::= # binary 1000 0000 OR-ed with 7 bit value; + * lowdigit ::= # 7 bit value; */ +#define VLI_CONTINUE 0x80 +#define VLI_DIGIT_MASK 0x7f +#define VLI_BITS_PER_DIGIT 7 + static int error_short_read(struct line_buffer *input) { if (buffer_ferror(input)) @@ -28,17 +36,84 @@ static int read_magic(struct line_buffer *in, off_t *len) struct strbuf sb = STRBUF_INIT; if (*len < sizeof(magic) || - buffer_read_binary(in, &sb, sizeof(magic)) != sizeof(magic)) - return error_short_read(in); + buffer_read_binary(in, &sb, sizeof(magic)) != sizeof(magic)) { + error_short_read(in); + strbuf_release(&sb); + return -1; + } - if (memcmp(sb.buf, magic, sizeof(magic))) + if (memcmp(sb.buf, magic, sizeof(magic))) { + strbuf_release(&sb); return error("invalid delta: unrecognized file type"); + } *len -= sizeof(magic); strbuf_release(&sb); return 0; } +static int read_int(struct line_buffer *in, uintmax_t *result, off_t *len) +{ + uintmax_t rv = 0; + off_t sz; + for (sz = *len; sz; sz--) { + const int ch = buffer_read_char(in); + if (ch == EOF) + break; + + rv <<= VLI_BITS_PER_DIGIT; + rv += (ch & VLI_DIGIT_MASK); + if (ch & VLI_CONTINUE) + continue; + + *result = rv; + *len = sz - 1; + return 0; + } + return error_short_read(in); +} + +static int read_offset(struct line_buffer *in, off_t *result, off_t *len) +{ + uintmax_t val; + if (read_int(in, &val, len)) + return -1; + if (val > maximum_signed_value_of_type(off_t)) + return error("unrepresentable offset in delta: %"PRIuMAX"", val); + *result = val; + return 0; +} + +static int read_length(struct line_buffer *in, size_t *result, off_t *len) +{ + uintmax_t val; + if (read_int(in, &val, len)) + return -1; + if (val > SIZE_MAX) + return error("unrepresentable length in delta: %"PRIuMAX"", val); + *result = val; + return 0; +} + +static int apply_one_window(struct line_buffer *delta, off_t *delta_len) +{ + size_t out_len; + size_t instructions_len; + size_t data_len; + assert(delta_len); + + /* "source view" offset and length already handled; */ + if (read_length(delta, &out_len, delta_len) || + read_length(delta, &instructions_len, delta_len) || + read_length(delta, &data_len, delta_len)) + return -1; + if (instructions_len) + return error("What do you think I am? A delta applier?"); + if (data_len) + return error("No support for inline data yet"); + return 0; +} + int svndiff0_apply(struct line_buffer *delta, off_t delta_len, struct sliding_view *preimage, FILE *postimage) { @@ -46,7 +121,14 @@ int svndiff0_apply(struct line_buffer *delta, off_t delta_len, if (read_magic(delta, &delta_len)) return -1; - if (delta_len) - return error("What do you think I am? A delta applier?"); + while (delta_len) { /* For each window: */ + off_t pre_off; + size_t pre_len; + + if (read_offset(delta, &pre_off, &delta_len) || + read_length(delta, &pre_len, &delta_len) || + apply_one_window(delta, &delta_len)) + return -1; + } return 0; } -- cgit v0.10.2-6-g49f6 From bcd254621f9a98794cdc32906db10af7135824c4 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:30:37 -0500 Subject: vcs-svn: read the preimage when applying deltas The source view offset heading each svndiff0 window represents a number of bytes past the beginning of the preimage. Together with the source view length, it dictates to the delta applier what portion of the preimage instructions will refer to. Read that portion right away using the sliding window code. Maybe some day we will use mmap to read data more lazily. Subversion's implementation tolerates source view offsets pointing past the end of the preimage file but we do not, for simplicity. This does not teach the delta applier to read instructions or copy data from the source view. Deltas that could produce nonempty output will still be rejected. Improved-by: Ramkumar Ramachandra Improved-by: David Barr Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh index be12340..90b6058 100755 --- a/t/t9011-svn-da.sh +++ b/t/t9011-svn-da.sh @@ -86,4 +86,39 @@ test_expect_success 'nonempty (but unused) preimage view' ' test_cmp empty actual ' +test_expect_success 'preimage view: right endpoint cannot backtrack' ' + printf "SVNQ%b%b" "Q\003QQQ" "Q\002QQQ" | + q_to_nul >clear.backtrack && + test_must_fail test-svn-fe -d preimage clear.backtrack 14 +' + +test_expect_success 'preimage view: left endpoint can advance' ' + printf "SVNQ%b%b" "Q\003QQQ" "\001\002QQQ" | + q_to_nul >clear.preshrink && + printf "SVNQ%b%b" "Q\003QQQ" "\001\001QQQ" | + q_to_nul >clear.shrinkbacktrack && + test-svn-fe -d preimage clear.preshrink 14 >actual && + test_must_fail test-svn-fe -d preimage clear.shrinkbacktrack 14 && + test_cmp empty actual +' + +test_expect_success 'preimage view: offsets compared by value' ' + printf "SVNQ%b%b" "\001\001QQQ" "\0200Q\003QQQ" | + q_to_nul >clear.noisybacktrack && + printf "SVNQ%b%b" "\001\001QQQ" "\0200\001\002QQQ" | + q_to_nul >clear.noisyadvance && + test_must_fail test-svn-fe -d preimage clear.noisybacktrack 15 && + test-svn-fe -d preimage clear.noisyadvance 15 && + test_cmp empty actual +' + +test_expect_success 'preimage view: reject truncated preimage' ' + printf "SVNQ%b" "\010QQQQ" | q_to_nul >clear.lateemptyread && + printf "SVNQ%b" "\010\001QQQ" | q_to_nul >clear.latenonemptyread && + printf "SVNQ%b" "\001\010QQQ" | q_to_nul >clear.longread && + test_must_fail test-svn-fe -d preimage clear.lateemptyread 9 && + test_must_fail test-svn-fe -d preimage clear.latenonemptyread 9 && + test_must_fail test-svn-fe -d preimage clear.longread 9 +' + test_done diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c index 249efb6..b7c2c8b 100644 --- a/vcs-svn/svndiff.c +++ b/vcs-svn/svndiff.c @@ -4,6 +4,7 @@ */ #include "git-compat-util.h" +#include "sliding_window.h" #include "line_buffer.h" #include "svndiff.h" @@ -127,6 +128,7 @@ int svndiff0_apply(struct line_buffer *delta, off_t delta_len, if (read_offset(delta, &pre_off, &delta_len) || read_length(delta, &pre_len, &delta_len) || + move_window(preimage, pre_off, pre_len) || apply_one_window(delta, &delta_len)) return -1; } -- cgit v0.10.2-6-g49f6 From fc4ae43b2cbd53da6ac2a0047fb4e53175921696 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:35:59 -0500 Subject: vcs-svn: read inline data from deltas Each window of an svndiff0-format delta includes a section for novel text to be copied to the postimage (in the order it appears in the window, possibly interspersed with other data). Slurp in this data when encountering it. It is not actually necessary to do so --- it would be just as easy to copy from delta to output as part of interpreting the relevant instructions --- but this way, the code that interprets svndiff0 instructions can proceed very quickly because it does not require I/O. Subversion's svndiff0 parser rejects deltas that do not consume all the novel text that was provided. Omit that check for now so we can test the new functionality right away, rather than waiting to learn instructions that consume data. Do check for truncated data sections. Subversion's parser rejects deltas that end in the middle of a declared novel-text section, so it should be safe for us to reject them, too. Improved-by: Ramkumar Ramachandra Improved-by: David Barr Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh index 90b6058..26a4a36 100755 --- a/t/t9011-svn-da.sh +++ b/t/t9011-svn-da.sh @@ -121,4 +121,16 @@ test_expect_success 'preimage view: reject truncated preimage' ' test_must_fail test-svn-fe -d preimage clear.longread 9 ' +test_expect_success 'inline data' ' + printf "SVNQ%b%s%b%s" "QQQQ\003" "bar" "QQQQ\001" "x" | + q_to_nul >inline.clear && + test-svn-fe -d preimage inline.clear 18 >actual && + test_cmp empty actual +' + +test_expect_success 'reject truncated inline data' ' + printf "SVNQ%b%s" "QQQQ\003" "b" | q_to_nul >inline.trunc && + test_must_fail test-svn-fe -d preimage inline.trunc 10 +' + test_done diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c index b7c2c8b..175168f 100644 --- a/vcs-svn/svndiff.c +++ b/vcs-svn/svndiff.c @@ -24,6 +24,17 @@ #define VLI_DIGIT_MASK 0x7f #define VLI_BITS_PER_DIGIT 7 +struct window { + struct strbuf data; +}; + +#define WINDOW_INIT { STRBUF_INIT } + +static void window_release(struct window *ctx) +{ + strbuf_release(&ctx->data); +} + static int error_short_read(struct line_buffer *input) { if (buffer_ferror(input)) @@ -31,24 +42,30 @@ static int error_short_read(struct line_buffer *input) return error("invalid delta: unexpected end of file"); } +static int read_chunk(struct line_buffer *delta, off_t *delta_len, + struct strbuf *buf, size_t len) +{ + strbuf_reset(buf); + if (len > *delta_len || + buffer_read_binary(delta, buf, len) != len) + return error_short_read(delta); + *delta_len -= buf->len; + return 0; +} + static int read_magic(struct line_buffer *in, off_t *len) { static const char magic[] = {'S', 'V', 'N', '\0'}; struct strbuf sb = STRBUF_INIT; - if (*len < sizeof(magic) || - buffer_read_binary(in, &sb, sizeof(magic)) != sizeof(magic)) { - error_short_read(in); + if (read_chunk(in, len, &sb, sizeof(magic))) { strbuf_release(&sb); return -1; } - if (memcmp(sb.buf, magic, sizeof(magic))) { strbuf_release(&sb); return error("invalid delta: unrecognized file type"); } - - *len -= sizeof(magic); strbuf_release(&sb); return 0; } @@ -98,6 +115,7 @@ static int read_length(struct line_buffer *in, size_t *result, off_t *len) static int apply_one_window(struct line_buffer *delta, off_t *delta_len) { + struct window ctx = WINDOW_INIT; size_t out_len; size_t instructions_len; size_t data_len; @@ -107,12 +125,18 @@ static int apply_one_window(struct line_buffer *delta, off_t *delta_len) if (read_length(delta, &out_len, delta_len) || read_length(delta, &instructions_len, delta_len) || read_length(delta, &data_len, delta_len)) - return -1; - if (instructions_len) - return error("What do you think I am? A delta applier?"); - if (data_len) - return error("No support for inline data yet"); + goto error_out; + if (instructions_len) { + error("What do you think I am? A delta applier?"); + goto error_out; + } + if (read_chunk(delta, delta_len, &ctx.data, data_len)) + goto error_out; + window_release(&ctx); return 0; +error_out: + window_release(&ctx); + return -1; } int svndiff0_apply(struct line_buffer *delta, off_t delta_len, -- cgit v0.10.2-6-g49f6 From ef2ac77e9f8f4819f75cf52721567463e60a805c Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:38:01 -0500 Subject: vcs-svn: read instructions from deltas Buffer the instruction section upon encountering it for later interpretation. An alternative design would involve parsing the instructions at this point and buffering them in some processed form. Using the unprocessed form is simpler. Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh index 26a4a36..d9acd0c 100755 --- a/t/t9011-svn-da.sh +++ b/t/t9011-svn-da.sh @@ -133,4 +133,9 @@ test_expect_success 'reject truncated inline data' ' test_must_fail test-svn-fe -d preimage inline.trunc 10 ' +test_expect_success 'reject truncated inline data (after instruction section)' ' + printf "SVNQ%b%b%s" "QQ\001\001\003" "\0201" "b" | q_to_nul >insn.trunc && + test_must_fail test-svn-fe -d preimage insn.trunc 11 +' + test_done diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c index 175168f..8968fdb 100644 --- a/vcs-svn/svndiff.c +++ b/vcs-svn/svndiff.c @@ -25,13 +25,15 @@ #define VLI_BITS_PER_DIGIT 7 struct window { + struct strbuf instructions; struct strbuf data; }; -#define WINDOW_INIT { STRBUF_INIT } +#define WINDOW_INIT { STRBUF_INIT, STRBUF_INIT } static void window_release(struct window *ctx) { + strbuf_release(&ctx->instructions); strbuf_release(&ctx->data); } @@ -124,7 +126,8 @@ static int apply_one_window(struct line_buffer *delta, off_t *delta_len) /* "source view" offset and length already handled; */ if (read_length(delta, &out_len, delta_len) || read_length(delta, &instructions_len, delta_len) || - read_length(delta, &data_len, delta_len)) + read_length(delta, &data_len, delta_len) || + read_chunk(delta, delta_len, &ctx.instructions, instructions_len)) goto error_out; if (instructions_len) { error("What do you think I am? A delta applier?"); -- cgit v0.10.2-6-g49f6 From ec71aa2e1f229b90092e6678ac7c2dca3d15b5f3 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:39:44 -0500 Subject: vcs-svn: implement copyfrom_data delta instruction The copyfrom_data instruction copies a few bytes verbatim from the novel text section of a window to the postimage. [jn: with memory leak fix from David] Improved-by: David Barr Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh index d9acd0c..ba8ce05 100755 --- a/t/t9011-svn-da.sh +++ b/t/t9011-svn-da.sh @@ -138,4 +138,35 @@ test_expect_success 'reject truncated inline data (after instruction section)' ' test_must_fail test-svn-fe -d preimage insn.trunc 11 ' +test_expect_success 'copyfrom_data' ' + echo hi >expect && + printf "SVNQ%b%b%b" "QQ\003\001\003" "\0203" "hi\n" | q_to_nul >copydat && + test-svn-fe -d preimage copydat 13 >actual && + test_cmp expect actual +' + +test_expect_success 'multiple copyfrom_data' ' + echo hi >expect && + printf "SVNQ%b%b%b%b%b" "QQ\003\002\003" "\0201\0202" "hi\n" \ + "QQQ\002Q" "\0200Q" | q_to_nul >copy.multi && + len=$(wc -c actual && + test_cmp expect actual +' + +test_expect_success 'incomplete multiple insn' ' + printf "SVNQ%b%b%b" "QQ\003\002\003" "\0203\0200" "hi\n" | + q_to_nul >copy.partial && + len=$(wc -c copy.incomplete && + len=$(wc -c out); strbuf_release(&ctx->instructions); strbuf_release(&ctx->data); } +static int write_strbuf(struct strbuf *sb, FILE *out) +{ + if (fwrite(sb->buf, 1, sb->len, out) == sb->len) /* Success. */ + return 0; + return error("cannot write delta postimage: %s", strerror(errno)); +} + static int error_short_read(struct line_buffer *input) { if (buffer_ferror(input)) @@ -93,6 +114,25 @@ static int read_int(struct line_buffer *in, uintmax_t *result, off_t *len) return error_short_read(in); } +static int parse_int(const char **buf, size_t *result, const char *end) +{ + size_t rv = 0; + const char *pos; + for (pos = *buf; pos != end; pos++) { + unsigned char ch = *pos; + + rv <<= VLI_BITS_PER_DIGIT; + rv += (ch & VLI_DIGIT_MASK); + if (ch & VLI_CONTINUE) + continue; + + *result = rv; + *buf = pos + 1; + return 0; + } + return error("invalid delta: unexpected end of instructions section"); +} + static int read_offset(struct line_buffer *in, off_t *result, off_t *len) { uintmax_t val; @@ -115,7 +155,64 @@ static int read_length(struct line_buffer *in, size_t *result, off_t *len) return 0; } -static int apply_one_window(struct line_buffer *delta, off_t *delta_len) +static int copyfrom_data(struct window *ctx, size_t *data_pos, size_t nbytes) +{ + const size_t pos = *data_pos; + if (unsigned_add_overflows(pos, nbytes) || + pos + nbytes > ctx->data.len) + return error("invalid delta: copies unavailable inline data"); + strbuf_add(&ctx->out, ctx->data.buf + pos, nbytes); + *data_pos += nbytes; + return 0; +} + +static int parse_first_operand(const char **buf, size_t *out, const char *end) +{ + size_t result = (unsigned char) *(*buf)++ & OPERAND_MASK; + if (result) { /* immediate operand */ + *out = result; + return 0; + } + return parse_int(buf, out, end); +} + +static int execute_one_instruction(struct window *ctx, + const char **instructions, size_t *data_pos) +{ + unsigned int instruction; + const char *insns_end = ctx->instructions.buf + ctx->instructions.len; + size_t nbytes; + assert(ctx); + assert(instructions && *instructions); + assert(data_pos); + + instruction = (unsigned char) **instructions; + if (parse_first_operand(instructions, &nbytes, insns_end)) + return -1; + if ((instruction & INSN_MASK) != INSN_COPYFROM_DATA) + return error("Unknown instruction %x", instruction); + return copyfrom_data(ctx, data_pos, nbytes); +} + +static int apply_window_in_core(struct window *ctx) +{ + const char *instructions; + size_t data_pos = 0; + + /* + * Fill ctx->out.buf using data from the source, target, + * and inline data views. + */ + for (instructions = ctx->instructions.buf; + instructions != ctx->instructions.buf + ctx->instructions.len; + ) + if (execute_one_instruction(ctx, &instructions, &data_pos)) + return -1; + return 0; +} + +static int apply_one_window(struct line_buffer *delta, off_t *delta_len, + FILE *out) { struct window ctx = WINDOW_INIT; size_t out_len; @@ -127,13 +224,17 @@ static int apply_one_window(struct line_buffer *delta, off_t *delta_len) if (read_length(delta, &out_len, delta_len) || read_length(delta, &instructions_len, delta_len) || read_length(delta, &data_len, delta_len) || - read_chunk(delta, delta_len, &ctx.instructions, instructions_len)) + read_chunk(delta, delta_len, &ctx.instructions, instructions_len) || + read_chunk(delta, delta_len, &ctx.data, data_len)) + goto error_out; + strbuf_grow(&ctx.out, out_len); + if (apply_window_in_core(&ctx)) goto error_out; - if (instructions_len) { - error("What do you think I am? A delta applier?"); + if (ctx.out.len != out_len) { + error("invalid delta: incorrect postimage length"); goto error_out; } - if (read_chunk(delta, delta_len, &ctx.data, data_len)) + if (write_strbuf(&ctx.out, out)) goto error_out; window_release(&ctx); return 0; @@ -156,7 +257,7 @@ int svndiff0_apply(struct line_buffer *delta, off_t delta_len, if (read_offset(delta, &pre_off, &delta_len) || read_length(delta, &pre_len, &delta_len) || move_window(preimage, pre_off, pre_len) || - apply_one_window(delta, &delta_len)) + apply_one_window(delta, &delta_len, postimage)) return -1; } return 0; -- cgit v0.10.2-6-g49f6 From 4c9b93ed7644a7a7c72bdd8105d88a9ebb8e3e74 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:48:07 -0500 Subject: vcs-svn: verify that deltas consume all inline data By constraining the format of deltas, we can more easily detect corruption and other breakage. Requiring deltas not to provide unconsumed data also opens the possibility of ignoring the declared amount of novel data and simply streaming the data as needed to fulfill copyfrom_data requests. Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh index ba8ce05..72691b9 100755 --- a/t/t9011-svn-da.sh +++ b/t/t9011-svn-da.sh @@ -121,11 +121,10 @@ test_expect_success 'preimage view: reject truncated preimage' ' test_must_fail test-svn-fe -d preimage clear.longread 9 ' -test_expect_success 'inline data' ' +test_expect_success 'forbid unconsumed inline data' ' printf "SVNQ%b%s%b%s" "QQQQ\003" "bar" "QQQQ\001" "x" | q_to_nul >inline.clear && - test-svn-fe -d preimage inline.clear 18 >actual && - test_cmp empty actual + test_must_fail test-svn-fe -d preimage inline.clear 18 >actual ' test_expect_success 'reject truncated inline data' ' diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c index ed1d4a0..fb7dc22 100644 --- a/vcs-svn/svndiff.c +++ b/vcs-svn/svndiff.c @@ -208,6 +208,8 @@ static int apply_window_in_core(struct window *ctx) ) if (execute_one_instruction(ctx, &instructions, &data_pos)) return -1; + if (data_pos != ctx->data.len) + return error("invalid delta: does not copy all inline data"); return 0; } -- cgit v0.10.2-6-g49f6 From d3f131b57ec0e69a37bca882fa6bf39aa4c1c387 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:50:07 -0500 Subject: vcs-svn: let deltas use data from postimage The copyfrom_target instruction copies appends data that is already present in the current output view to the end of output. (The offset argument is relative to the beginning of output produced in the current window.) The region copied is allowed to run past the end of the existing output. To support that case, copy one character at a time rather than calling memcpy or memmove. This allows copyfrom_target to be used once to repeat a string many times. For example: COPYFROM_DATA 2 COPYFROM_OUTPUT 10, 0 DATA "ab" would produce the output "ababababababababababab". Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh index 72691b9..f9573a1 100755 --- a/t/t9011-svn-da.sh +++ b/t/t9011-svn-da.sh @@ -168,4 +168,46 @@ test_expect_success 'catch attempt to copy missing data' ' test_must_fail test-svn-fe -d preimage copy.incomplete $len ' +test_expect_success 'copyfrom target to repeat data' ' + printf foofoo >expect && + printf "SVNQ%b%b%s" "QQ\006\004\003" "\0203\0100\003Q" "foo" | + q_to_nul >copytarget.repeat && + len=$(wc -c actual && + test_cmp expect actual +' + +test_expect_success 'copyfrom target out of order' ' + printf foooof >expect && + printf "SVNQ%b%b%s" \ + "QQ\006\007\003" "\0203\0101\002\0101\001\0101Q" "foo" | + q_to_nul >copytarget.reverse && + len=$(wc -c actual && + test_cmp expect actual +' + +test_expect_success 'catch copyfrom future' ' + printf "SVNQ%b%b%s" "QQ\004\004\003" "\0202\0101\002\0201" "XYZ" | + q_to_nul >copytarget.infuture && + len=$(wc -c expect && + printf "SVNQ%b%b%s" "QQ\014\004\003" "\0202\0111Q\0201" "XYZ" | + q_to_nul >copytarget.sustain && + len=$(wc -c actual && + test_cmp expect actual +' + +test_expect_success 'catch copy that overflows' ' + printf "SVNQ%b%b%s" "QQ\003\003\001" "\0201\0177Q" X | + q_to_nul >copytarget.overflow && + len=$(wc -c = ctx->out.len) + return error("invalid delta: copies from the future"); + for (; nbytes > 0; nbytes--) + strbuf_addch(&ctx->out, ctx->out.buf[offset++]); + return 0; +} + static int copyfrom_data(struct window *ctx, size_t *data_pos, size_t nbytes) { const size_t pos = *data_pos; @@ -189,9 +208,14 @@ static int execute_one_instruction(struct window *ctx, instruction = (unsigned char) **instructions; if (parse_first_operand(instructions, &nbytes, insns_end)) return -1; - if ((instruction & INSN_MASK) != INSN_COPYFROM_DATA) + switch (instruction & INSN_MASK) { + case INSN_COPYFROM_TARGET: + return copyfrom_target(ctx, instructions, nbytes, insns_end); + case INSN_COPYFROM_DATA: + return copyfrom_data(ctx, data_pos, nbytes); + default: return error("Unknown instruction %x", instruction); - return copyfrom_data(ctx, data_pos, nbytes); + } } static int apply_window_in_core(struct window *ctx) -- cgit v0.10.2-6-g49f6 From c846e4107876936bed7177a811559bd74a72dcd8 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:58:30 -0500 Subject: vcs-svn: let deltas use data from preimage The copyfrom_source instruction appends data from the preimage buffer to the end of output. Its arguments are a length and an offset relative to the beginning of the source view. With this change, the delta applier is able to reproduce all 5,636,613 blobs in the early history of the ASF repository. Tested with mkfifo backflow svn-fe backflow with svn-asf-public-r0:940166 produced by whatever version of Subversion the dumps in /dump/ on svn.apache.org use (presumably 1.6.something). Improved-by: Ramkumar Ramachandra Improved-by: David Barr Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh index f9573a1..b38d16f 100755 --- a/t/t9011-svn-da.sh +++ b/t/t9011-svn-da.sh @@ -210,4 +210,39 @@ test_expect_success 'catch copy that overflows' ' test_must_fail test-svn-fe -d preimage copytarget.overflow $len ' +test_expect_success 'copyfrom source' ' + printf foo >expect && + printf "SVNQ%b%b" "Q\003\003\002Q" "\003Q" | q_to_nul >copysource.all && + test-svn-fe -d preimage copysource.all 11 >actual && + test_cmp expect actual +' + +test_expect_success 'copy backwards' ' + printf oof >expect && + printf "SVNQ%b%b" "Q\003\003\006Q" "\001\002\001\001\001Q" | + q_to_nul >copysource.rev && + test-svn-fe -d preimage copysource.rev 15 >actual && + test_cmp expect actual +' + +test_expect_success 'offsets are relative to window' ' + printf fo >expect && + printf "SVNQ%b%b%b%b" "Q\003\001\002Q" "\001Q" \ + "\002\001\001\002Q" "\001Q" | + q_to_nul >copysource.two && + test-svn-fe -d preimage copysource.two 18 >actual && + test_cmp expect actual +' + +test_expect_success 'example from notes/svndiff' ' + printf aaaaccccdddddddd >expect && + printf aaaabbbbcccc >source && + printf "SVNQ%b%b%s" "Q\014\020\007\001" \ + "\004Q\004\010\0201\0107\010" d | + q_to_nul >delta.example && + len=$(wc -c actual && + test_cmp expect actual +' + test_done diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c index a02eee0..9ee41bb 100644 --- a/vcs-svn/svndiff.c +++ b/vcs-svn/svndiff.c @@ -24,6 +24,7 @@ * view_selector ::= copyfrom_source * | copyfrom_target * ; + * copyfrom_source ::= # binary 00 000000; * copyfrom_target ::= # binary 01 000000; * copyfrom_data ::= # binary 10 000000; * packed_view_selector ::= # view_selector OR-ed with 6 bit value; @@ -34,6 +35,7 @@ */ #define INSN_MASK 0xc0 +#define INSN_COPYFROM_SOURCE 0x00 #define INSN_COPYFROM_TARGET 0x40 #define INSN_COPYFROM_DATA 0x80 #define OPERAND_MASK 0x3f @@ -43,12 +45,13 @@ #define VLI_BITS_PER_DIGIT 7 struct window { + struct sliding_view *in; struct strbuf out; struct strbuf instructions; struct strbuf data; }; -#define WINDOW_INIT { STRBUF_INIT, STRBUF_INIT, STRBUF_INIT } +#define WINDOW_INIT(w) { (w), STRBUF_INIT, STRBUF_INIT, STRBUF_INIT } static void window_release(struct window *ctx) { @@ -161,6 +164,19 @@ static int read_length(struct line_buffer *in, size_t *result, off_t *len) return 0; } +static int copyfrom_source(struct window *ctx, const char **instructions, + size_t nbytes, const char *insns_end) +{ + size_t offset; + if (parse_int(instructions, &offset, insns_end)) + return -1; + if (unsigned_add_overflows(offset, nbytes) || + offset + nbytes > ctx->in->width) + return error("invalid delta: copies source data outside view"); + strbuf_add(&ctx->out, ctx->in->buf.buf + offset, nbytes); + return 0; +} + static int copyfrom_target(struct window *ctx, const char **instructions, size_t nbytes, const char *instructions_end) { @@ -209,12 +225,14 @@ static int execute_one_instruction(struct window *ctx, if (parse_first_operand(instructions, &nbytes, insns_end)) return -1; switch (instruction & INSN_MASK) { + case INSN_COPYFROM_SOURCE: + return copyfrom_source(ctx, instructions, nbytes, insns_end); case INSN_COPYFROM_TARGET: return copyfrom_target(ctx, instructions, nbytes, insns_end); case INSN_COPYFROM_DATA: return copyfrom_data(ctx, data_pos, nbytes); default: - return error("Unknown instruction %x", instruction); + return error("invalid delta: unrecognized instruction"); } } @@ -238,9 +256,9 @@ static int apply_window_in_core(struct window *ctx) } static int apply_one_window(struct line_buffer *delta, off_t *delta_len, - FILE *out) + struct sliding_view *preimage, FILE *out) { - struct window ctx = WINDOW_INIT; + struct window ctx = WINDOW_INIT(preimage); size_t out_len; size_t instructions_len; size_t data_len; @@ -283,7 +301,7 @@ int svndiff0_apply(struct line_buffer *delta, off_t delta_len, if (read_offset(delta, &pre_off, &delta_len) || read_length(delta, &pre_len, &delta_len) || move_window(preimage, pre_off, pre_len) || - apply_one_window(delta, &delta_len, postimage)) + apply_one_window(delta, &delta_len, preimage, postimage)) return -1; } return 0; -- cgit v0.10.2-6-g49f6