From 3f527372d9ec6d7b6890773e41c4b3542d7ad451 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Mon, 9 Aug 2010 17:04:29 -0500 Subject: Introduce vcs-svn lib Teach the build system to build a separate library for the upcoming subversion interop support. The resulting vcs-svn/lib.a does not contain any code, nor is it built during a normal build. This is just scaffolding for later changes. Signed-off-by: Jonathan Nieder Signed-off-by: Ramkumar Ramachandra Signed-off-by: Junio C Hamano Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano diff --git a/Makefile b/Makefile index 1f11618..0b6454d 100644 --- a/Makefile +++ b/Makefile @@ -468,6 +468,7 @@ export PYTHON_PATH LIB_FILE=libgit.a XDIFF_LIB=xdiff/lib.a +VCSSVN_LIB=vcs-svn/lib.a LIB_H += advice.h LIB_H += archive.h @@ -1739,7 +1740,8 @@ ifndef NO_CURL endif XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \ xdiff/xmerge.o xdiff/xpatience.o -OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) +VCSSVN_OBJS = +OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS) dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d) dep_dirs := $(addsuffix .depend,$(sort $(dir $(OBJECTS)))) @@ -1861,6 +1863,8 @@ http.o http-walker.o http-push.o http-fetch.o remote-curl.o: http.h xdiff-interface.o $(XDIFF_OBJS): \ xdiff/xinclude.h xdiff/xmacros.h xdiff/xdiff.h xdiff/xtypes.h \ xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h + +$(VCSSVN_OBJS): endif exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \ @@ -1909,6 +1913,8 @@ $(LIB_FILE): $(LIB_OBJS) $(XDIFF_LIB): $(XDIFF_OBJS) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(XDIFF_OBJS) +$(VCSSVN_LIB): $(VCSSVN_OBJS) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(VCSSVN_OBJS) doc: $(MAKE) -C Documentation all diff --git a/vcs-svn/LICENSE b/vcs-svn/LICENSE new file mode 100644 index 0000000..6e52372 --- /dev/null +++ b/vcs-svn/LICENSE @@ -0,0 +1,26 @@ +Copyright (C) 2010 David Barr . +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice(s), this list of conditions and the following disclaimer + unmodified other than the allowable addition of one or more + copyright notices. +2. Redistributions in binary form must reproduce the above copyright + notice(s), this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -- cgit v0.10.2-6-g49f6 From 4709455db3891f6cad9a96a574296b4926f70cbe Mon Sep 17 00:00:00 2001 From: David Barr Date: Mon, 9 Aug 2010 17:11:11 -0500 Subject: Add memory pool library Add a memory pool library implemented using C macros. The obj_pool_gen() macro creates a type-specific memory pool. The memory pool library is distinguished from the existing specialized allocators in alloc.c by using a contiguous block for all allocations. This means that on one hand, long-lived pointers have to be written as offsets, since the base address changes as the pool grows, but on the other hand, the entire pool can be easily written to the file system. This could allow the memory pool to persist between runs of an application. For the svn importer, such a facility is useful because each svn revision can copy trees and files from any previous revision. The relevant information for all revisions has to persist somehow to support incremental runs. [rr: minor cleanups] [jn: added tests; removed file system backing for now] Signed-off-by: David Barr Signed-off-by: Ramkumar Ramachandra Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano diff --git a/.gitignore b/.gitignore index 14e2b6b..1e64a6a 100644 --- a/.gitignore +++ b/.gitignore @@ -167,6 +167,7 @@ /test-genrandom /test-index-version /test-match-trees +/test-obj-pool /test-parse-options /test-path-utils /test-run-command diff --git a/Makefile b/Makefile index 0b6454d..47cbf26 100644 --- a/Makefile +++ b/Makefile @@ -409,6 +409,7 @@ TEST_PROGRAMS_NEED_X += test-delta TEST_PROGRAMS_NEED_X += test-dump-cache-tree TEST_PROGRAMS_NEED_X += test-genrandom TEST_PROGRAMS_NEED_X += test-match-trees +TEST_PROGRAMS_NEED_X += test-obj-pool TEST_PROGRAMS_NEED_X += test-parse-options TEST_PROGRAMS_NEED_X += test-path-utils TEST_PROGRAMS_NEED_X += test-run-command @@ -1864,7 +1865,8 @@ xdiff-interface.o $(XDIFF_OBJS): \ xdiff/xinclude.h xdiff/xmacros.h xdiff/xdiff.h xdiff/xtypes.h \ xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h -$(VCSSVN_OBJS): +$(VCSSVN_OBJS): \ + vcs-svn/obj_pool.h endif exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \ diff --git a/t/t0080-vcs-svn.sh b/t/t0080-vcs-svn.sh new file mode 100755 index 0000000..3f29496 --- /dev/null +++ b/t/t0080-vcs-svn.sh @@ -0,0 +1,79 @@ +#!/bin/sh + +test_description='check infrastructure for svn importer' + +. ./test-lib.sh +uint32_max=4294967295 + +test_expect_success 'obj pool: store data' ' + cat <<-\EOF >expected && + 0 + 1 + EOF + + test-obj-pool <<-\EOF >actual && + alloc one 16 + set one 13 + test one 13 + reset one + EOF + test_cmp expected actual +' + +test_expect_success 'obj pool: NULL is offset ~0' ' + echo "$uint32_max" >expected && + echo null one | test-obj-pool >actual && + test_cmp expected actual +' + +test_expect_success 'obj pool: out-of-bounds access' ' + cat <<-EOF >expected && + 0 + 0 + $uint32_max + $uint32_max + 16 + 20 + $uint32_max + EOF + + test-obj-pool <<-\EOF >actual && + alloc one 16 + alloc two 16 + offset one 20 + offset two 20 + alloc one 5 + offset one 20 + free one 1 + offset one 20 + reset one + reset two + EOF + test_cmp expected actual +' + +test_expect_success 'obj pool: high-water mark' ' + cat <<-\EOF >expected && + 0 + 0 + 10 + 20 + 20 + 20 + EOF + + test-obj-pool <<-\EOF >actual && + alloc one 10 + committed one + alloc one 10 + commit one + committed one + alloc one 10 + free one 20 + committed one + reset one + EOF + test_cmp expected actual +' + +test_done diff --git a/test-obj-pool.c b/test-obj-pool.c new file mode 100644 index 0000000..5018863 --- /dev/null +++ b/test-obj-pool.c @@ -0,0 +1,116 @@ +/* + * test-obj-pool.c: code to exercise the svn importer's object pool + */ + +#include "cache.h" +#include "vcs-svn/obj_pool.h" + +enum pool { POOL_ONE, POOL_TWO }; +obj_pool_gen(one, int, 1) +obj_pool_gen(two, int, 4096) + +static uint32_t strtouint32(const char *s) +{ + char *end; + uintmax_t n = strtoumax(s, &end, 10); + if (*s == '\0' || (*end != '\n' && *end != '\0')) + die("invalid offset: %s", s); + return (uint32_t) n; +} + +static void handle_command(const char *command, enum pool pool, const char *arg) +{ + switch (*command) { + case 'a': + if (!prefixcmp(command, "alloc ")) { + uint32_t n = strtouint32(arg); + printf("%"PRIu32"\n", + pool == POOL_ONE ? + one_alloc(n) : two_alloc(n)); + return; + } + case 'c': + if (!prefixcmp(command, "commit ")) { + pool == POOL_ONE ? one_commit() : two_commit(); + return; + } + if (!prefixcmp(command, "committed ")) { + printf("%"PRIu32"\n", + pool == POOL_ONE ? + one_pool.committed : two_pool.committed); + return; + } + case 'f': + if (!prefixcmp(command, "free ")) { + uint32_t n = strtouint32(arg); + pool == POOL_ONE ? one_free(n) : two_free(n); + return; + } + case 'n': + if (!prefixcmp(command, "null ")) { + printf("%"PRIu32"\n", + pool == POOL_ONE ? + one_offset(NULL) : two_offset(NULL)); + return; + } + case 'o': + if (!prefixcmp(command, "offset ")) { + uint32_t n = strtouint32(arg); + printf("%"PRIu32"\n", + pool == POOL_ONE ? + one_offset(one_pointer(n)) : + two_offset(two_pointer(n))); + return; + } + case 'r': + if (!prefixcmp(command, "reset ")) { + pool == POOL_ONE ? one_reset() : two_reset(); + return; + } + case 's': + if (!prefixcmp(command, "set ")) { + uint32_t n = strtouint32(arg); + if (pool == POOL_ONE) + *one_pointer(n) = 1; + else + *two_pointer(n) = 1; + return; + } + case 't': + if (!prefixcmp(command, "test ")) { + uint32_t n = strtouint32(arg); + printf("%d\n", pool == POOL_ONE ? + *one_pointer(n) : *two_pointer(n)); + return; + } + default: + die("unrecognized command: %s", command); + } +} + +static void handle_line(const char *line) +{ + const char *arg = strchr(line, ' '); + enum pool pool; + + if (arg && !prefixcmp(arg + 1, "one")) + pool = POOL_ONE; + else if (arg && !prefixcmp(arg + 1, "two")) + pool = POOL_TWO; + else + die("no pool specified: %s", line); + + handle_command(line, pool, arg + strlen("one ")); +} + +int main(int argc, char *argv[]) +{ + struct strbuf sb = STRBUF_INIT; + if (argc != 1) + usage("test-obj-str < script"); + + while (strbuf_getline(&sb, stdin, '\n') != EOF) + handle_line(sb.buf); + strbuf_release(&sb); + return 0; +} diff --git a/vcs-svn/obj_pool.h b/vcs-svn/obj_pool.h new file mode 100644 index 0000000..deb6eb8 --- /dev/null +++ b/vcs-svn/obj_pool.h @@ -0,0 +1,61 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#ifndef OBJ_POOL_H_ +#define OBJ_POOL_H_ + +#include "git-compat-util.h" + +#define MAYBE_UNUSED __attribute__((__unused__)) + +#define obj_pool_gen(pre, obj_t, initial_capacity) \ +static struct { \ + uint32_t committed; \ + uint32_t size; \ + uint32_t capacity; \ + obj_t *base; \ +} pre##_pool = {0, 0, 0, NULL}; \ +static MAYBE_UNUSED uint32_t pre##_alloc(uint32_t count) \ +{ \ + uint32_t offset; \ + if (pre##_pool.size + count > pre##_pool.capacity) { \ + while (pre##_pool.size + count > pre##_pool.capacity) \ + if (pre##_pool.capacity) \ + pre##_pool.capacity *= 2; \ + else \ + pre##_pool.capacity = initial_capacity; \ + pre##_pool.base = realloc(pre##_pool.base, \ + pre##_pool.capacity * sizeof(obj_t)); \ + } \ + offset = pre##_pool.size; \ + pre##_pool.size += count; \ + return offset; \ +} \ +static MAYBE_UNUSED void pre##_free(uint32_t count) \ +{ \ + pre##_pool.size -= count; \ +} \ +static MAYBE_UNUSED uint32_t pre##_offset(obj_t *obj) \ +{ \ + return obj == NULL ? ~0 : obj - pre##_pool.base; \ +} \ +static MAYBE_UNUSED obj_t *pre##_pointer(uint32_t offset) \ +{ \ + return offset >= pre##_pool.size ? NULL : &pre##_pool.base[offset]; \ +} \ +static MAYBE_UNUSED void pre##_commit(void) \ +{ \ + pre##_pool.committed = pre##_pool.size; \ +} \ +static MAYBE_UNUSED void pre##_reset(void) \ +{ \ + free(pre##_pool.base); \ + pre##_pool.base = NULL; \ + pre##_pool.size = 0; \ + pre##_pool.capacity = 0; \ + pre##_pool.committed = 0; \ +} + +#endif -- cgit v0.10.2-6-g49f6 From 951f316470acc7c785c460a4e40735b22822349f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 9 Aug 2010 17:17:34 -0500 Subject: Add treap implementation Provide macros to generate a type-specific treap implementation and various functions to operate on it. It uses obj_pool.h to store memory nodes in a treap. Previously committed nodes are never removed from the pool; after any *_commit operation, it is assumed (correctly, in the case of svn-fast-export) that someone else must care about them. Treaps provide a memory-efficient binary search tree structure. Insertion/deletion/search are about as about as fast in the average case as red-black trees and the chances of worst-case behavior are vanishingly small, thanks to (pseudo-)randomness. The bad worst-case behavior is a small price to pay, given that treaps are much simpler to implement. >From http://www.canonware.com/download/trp/trp_hash/trp.h [db: Altered to reference nodes by offset from a common base pointer] [db: Bob Jenkins' hashing implementation dropped for Knuth's] [db: Methods unnecessary for search and insert dropped] [rr: Squelched compiler warnings] [db: Added support for immutable treap nodes] [jn: Reintroduced treap_nsearch(); with tests] Signed-off-by: David Barr Signed-off-by: Ramkumar Ramachandra Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano diff --git a/.gitignore b/.gitignore index 1e64a6a..af47653 100644 --- a/.gitignore +++ b/.gitignore @@ -173,6 +173,7 @@ /test-run-command /test-sha1 /test-sigchain +/test-treap /common-cmds.h *.tar.gz *.dsc diff --git a/Makefile b/Makefile index 47cbf26..8777d28 100644 --- a/Makefile +++ b/Makefile @@ -415,6 +415,7 @@ TEST_PROGRAMS_NEED_X += test-path-utils TEST_PROGRAMS_NEED_X += test-run-command TEST_PROGRAMS_NEED_X += test-sha1 TEST_PROGRAMS_NEED_X += test-sigchain +TEST_PROGRAMS_NEED_X += test-treap TEST_PROGRAMS_NEED_X += test-index-version TEST_PROGRAMS = $(patsubst %,%$X,$(TEST_PROGRAMS_NEED_X)) @@ -1866,7 +1867,7 @@ xdiff-interface.o $(XDIFF_OBJS): \ xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h $(VCSSVN_OBJS): \ - vcs-svn/obj_pool.h + vcs-svn/obj_pool.h vcs-svn/trp.h endif exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \ diff --git a/t/t0080-vcs-svn.sh b/t/t0080-vcs-svn.sh index 3f29496..ce02c58 100755 --- a/t/t0080-vcs-svn.sh +++ b/t/t0080-vcs-svn.sh @@ -76,4 +76,26 @@ test_expect_success 'obj pool: high-water mark' ' test_cmp expected actual ' +test_expect_success 'treap sort' ' + cat <<-\EOF >unsorted && + 68 + 12 + 13 + 13 + 68 + 13 + 13 + 21 + 10 + 11 + 12 + 13 + 13 + EOF + sort unsorted >expected && + + test-treap actual && + test_cmp expected actual +' + test_done diff --git a/test-treap.c b/test-treap.c new file mode 100644 index 0000000..cdba511 --- /dev/null +++ b/test-treap.c @@ -0,0 +1,65 @@ +/* + * test-treap.c: code to exercise the svn importer's treap structure + */ + +#include "cache.h" +#include "vcs-svn/obj_pool.h" +#include "vcs-svn/trp.h" + +struct int_node { + uintmax_t n; + struct trp_node children; +}; + +obj_pool_gen(node, struct int_node, 3) + +static int node_cmp(struct int_node *a, struct int_node *b) +{ + return (a->n > b->n) - (a->n < b->n); +} + +trp_gen(static, treap_, struct int_node, children, node, node_cmp) + +static void strtonode(struct int_node *item, const char *s) +{ + char *end; + item->n = strtoumax(s, &end, 10); + if (*s == '\0' || (*end != '\n' && *end != '\0')) + die("invalid integer: %s", s); +} + +int main(int argc, char *argv[]) +{ + struct strbuf sb = STRBUF_INIT; + struct trp_root root = { ~0 }; + uint32_t item; + + if (argc != 1) + usage("test-treap < ints"); + + while (strbuf_getline(&sb, stdin, '\n') != EOF) { + item = node_alloc(1); + strtonode(node_pointer(item), sb.buf); + treap_insert(&root, node_pointer(item)); + } + + item = node_offset(treap_first(&root)); + while (~item) { + uint32_t next; + struct int_node *tmp = node_pointer(node_alloc(1)); + + tmp->n = node_pointer(item)->n; + next = node_offset(treap_next(&root, node_pointer(item))); + + treap_remove(&root, node_pointer(item)); + item = node_offset(treap_nsearch(&root, tmp)); + + if (item != next && (!~item || node_pointer(item)->n != tmp->n)) + die("found %"PRIuMAX" in place of %"PRIuMAX"", + ~item ? node_pointer(item)->n : ~(uintmax_t) 0, + ~next ? node_pointer(next)->n : ~(uintmax_t) 0); + printf("%"PRIuMAX"\n", tmp->n); + } + node_reset(); + return 0; +} diff --git a/vcs-svn/LICENSE b/vcs-svn/LICENSE index 6e52372..a3d384c 100644 --- a/vcs-svn/LICENSE +++ b/vcs-svn/LICENSE @@ -1,6 +1,9 @@ Copyright (C) 2010 David Barr . All rights reserved. +Copyright (C) 2008 Jason Evans . +All rights reserved. + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/vcs-svn/trp.h b/vcs-svn/trp.h new file mode 100644 index 0000000..1f5f51f --- /dev/null +++ b/vcs-svn/trp.h @@ -0,0 +1,236 @@ +/* + * C macro implementation of treaps. + * + * Usage: + * #include + * #include "trp.h" + * trp_gen(...) + * + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#ifndef TRP_H_ +#define TRP_H_ + +#define MAYBE_UNUSED __attribute__((__unused__)) + +/* Node structure. */ +struct trp_node { + uint32_t trpn_left; + uint32_t trpn_right; +}; + +/* Root structure. */ +struct trp_root { + uint32_t trp_root; +}; + +/* Pointer/Offset conversion. */ +#define trpn_pointer(a_base, a_offset) (a_base##_pointer(a_offset)) +#define trpn_offset(a_base, a_pointer) (a_base##_offset(a_pointer)) +#define trpn_modify(a_base, a_offset) \ + do { \ + if ((a_offset) < a_base##_pool.committed) { \ + uint32_t old_offset = (a_offset);\ + (a_offset) = a_base##_alloc(1); \ + *trpn_pointer(a_base, a_offset) = \ + *trpn_pointer(a_base, old_offset); \ + } \ + } while (0); + +/* Left accessors. */ +#define trp_left_get(a_base, a_field, a_node) \ + (trpn_pointer(a_base, a_node)->a_field.trpn_left) +#define trp_left_set(a_base, a_field, a_node, a_left) \ + do { \ + trpn_modify(a_base, a_node); \ + trp_left_get(a_base, a_field, a_node) = (a_left); \ + } while(0) + +/* Right accessors. */ +#define trp_right_get(a_base, a_field, a_node) \ + (trpn_pointer(a_base, a_node)->a_field.trpn_right) +#define trp_right_set(a_base, a_field, a_node, a_right) \ + do { \ + trpn_modify(a_base, a_node); \ + trp_right_get(a_base, a_field, a_node) = (a_right); \ + } while(0) + +/* + * Fibonacci hash function. + * The multiplier is the nearest prime to (2^32 times (√5 - 1)/2). + * See Knuth §6.4: volume 3, 3rd ed, p518. + */ +#define trpn_hash(a_node) (uint32_t) (2654435761u * (a_node)) + +/* Priority accessors. */ +#define trp_prio_get(a_node) trpn_hash(a_node) + +/* Node initializer. */ +#define trp_node_new(a_base, a_field, a_node) \ + do { \ + trp_left_set(a_base, a_field, (a_node), ~0); \ + trp_right_set(a_base, a_field, (a_node), ~0); \ + } while(0) + +/* Internal utility macros. */ +#define trpn_first(a_base, a_field, a_root, r_node) \ + do { \ + (r_node) = (a_root); \ + if ((r_node) == ~0) \ + return NULL; \ + while (~trp_left_get(a_base, a_field, (r_node))) \ + (r_node) = trp_left_get(a_base, a_field, (r_node)); \ + } while (0) + +#define trpn_rotate_left(a_base, a_field, a_node, r_node) \ + do { \ + (r_node) = trp_right_get(a_base, a_field, (a_node)); \ + trp_right_set(a_base, a_field, (a_node), \ + trp_left_get(a_base, a_field, (r_node))); \ + trp_left_set(a_base, a_field, (r_node), (a_node)); \ + } while(0) + +#define trpn_rotate_right(a_base, a_field, a_node, r_node) \ + do { \ + (r_node) = trp_left_get(a_base, a_field, (a_node)); \ + trp_left_set(a_base, a_field, (a_node), \ + trp_right_get(a_base, a_field, (r_node))); \ + trp_right_set(a_base, a_field, (r_node), (a_node)); \ + } while(0) + +#define trp_gen(a_attr, a_pre, a_type, a_field, a_base, a_cmp) \ +a_attr a_type MAYBE_UNUSED *a_pre##first(struct trp_root *treap) \ +{ \ + uint32_t ret; \ + trpn_first(a_base, a_field, treap->trp_root, ret); \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr a_type MAYBE_UNUSED *a_pre##next(struct trp_root *treap, a_type *node) \ +{ \ + uint32_t ret; \ + uint32_t offset = trpn_offset(a_base, node); \ + if (~trp_right_get(a_base, a_field, offset)) { \ + trpn_first(a_base, a_field, \ + trp_right_get(a_base, a_field, offset), ret); \ + } else { \ + uint32_t tnode = treap->trp_root; \ + ret = ~0; \ + while (1) { \ + int cmp = (a_cmp)(trpn_pointer(a_base, offset), \ + trpn_pointer(a_base, tnode)); \ + if (cmp < 0) { \ + ret = tnode; \ + tnode = trp_left_get(a_base, a_field, tnode); \ + } else if (cmp > 0) { \ + tnode = trp_right_get(a_base, a_field, tnode); \ + } else { \ + break; \ + } \ + } \ + } \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr a_type MAYBE_UNUSED *a_pre##search(struct trp_root *treap, a_type *key) \ +{ \ + int cmp; \ + uint32_t ret = treap->trp_root; \ + while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base,ret)))) { \ + if (cmp < 0) { \ + ret = trp_left_get(a_base, a_field, ret); \ + } else { \ + ret = trp_right_get(a_base, a_field, ret); \ + } \ + } \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr a_type MAYBE_UNUSED *a_pre##nsearch(struct trp_root *treap, a_type *key) \ +{ \ + int cmp; \ + uint32_t ret = treap->trp_root; \ + while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base,ret)))) { \ + if (cmp < 0) { \ + if (!~trp_left_get(a_base, a_field, ret)) \ + break; \ + ret = trp_left_get(a_base, a_field, ret); \ + } else { \ + ret = trp_right_get(a_base, a_field, ret); \ + } \ + } \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr uint32_t MAYBE_UNUSED a_pre##insert_recurse(uint32_t cur_node, uint32_t ins_node) \ +{ \ + if (cur_node == ~0) { \ + return (ins_node); \ + } else { \ + uint32_t ret; \ + int cmp = (a_cmp)(trpn_pointer(a_base, ins_node), \ + trpn_pointer(a_base, cur_node)); \ + if (cmp < 0) { \ + uint32_t left = a_pre##insert_recurse( \ + trp_left_get(a_base, a_field, cur_node), ins_node); \ + trp_left_set(a_base, a_field, cur_node, left); \ + if (trp_prio_get(left) < trp_prio_get(cur_node)) \ + trpn_rotate_right(a_base, a_field, cur_node, ret); \ + else \ + ret = cur_node; \ + } else { \ + uint32_t right = a_pre##insert_recurse( \ + trp_right_get(a_base, a_field, cur_node), ins_node); \ + trp_right_set(a_base, a_field, cur_node, right); \ + if (trp_prio_get(right) < trp_prio_get(cur_node)) \ + trpn_rotate_left(a_base, a_field, cur_node, ret); \ + else \ + ret = cur_node; \ + } \ + return (ret); \ + } \ +} \ +a_attr void MAYBE_UNUSED a_pre##insert(struct trp_root *treap, a_type *node) \ +{ \ + uint32_t offset = trpn_offset(a_base, node); \ + trp_node_new(a_base, a_field, offset); \ + treap->trp_root = a_pre##insert_recurse(treap->trp_root, offset); \ +} \ +a_attr uint32_t MAYBE_UNUSED a_pre##remove_recurse(uint32_t cur_node, uint32_t rem_node) \ +{ \ + int cmp = a_cmp(trpn_pointer(a_base, rem_node), \ + trpn_pointer(a_base, cur_node)); \ + if (cmp == 0) { \ + uint32_t ret; \ + uint32_t left = trp_left_get(a_base, a_field, cur_node); \ + uint32_t right = trp_right_get(a_base, a_field, cur_node); \ + if (left == ~0) { \ + if (right == ~0) \ + return (~0); \ + } else if (right == ~0 || trp_prio_get(left) < trp_prio_get(right)) { \ + trpn_rotate_right(a_base, a_field, cur_node, ret); \ + right = a_pre##remove_recurse(cur_node, rem_node); \ + trp_right_set(a_base, a_field, ret, right); \ + return (ret); \ + } \ + trpn_rotate_left(a_base, a_field, cur_node, ret); \ + left = a_pre##remove_recurse(cur_node, rem_node); \ + trp_left_set(a_base, a_field, ret, left); \ + return (ret); \ + } else if (cmp < 0) { \ + uint32_t left = a_pre##remove_recurse( \ + trp_left_get(a_base, a_field, cur_node), rem_node); \ + trp_left_set(a_base, a_field, cur_node, left); \ + return (cur_node); \ + } else { \ + uint32_t right = a_pre##remove_recurse( \ + trp_right_get(a_base, a_field, cur_node), rem_node); \ + trp_right_set(a_base, a_field, cur_node, right); \ + return (cur_node); \ + } \ +} \ +a_attr void MAYBE_UNUSED a_pre##remove(struct trp_root *treap, a_type *node) \ +{ \ + treap->trp_root = a_pre##remove_recurse(treap->trp_root, \ + trpn_offset(a_base, node)); \ +} \ + +#endif diff --git a/vcs-svn/trp.txt b/vcs-svn/trp.txt new file mode 100644 index 0000000..eb4c191 --- /dev/null +++ b/vcs-svn/trp.txt @@ -0,0 +1,103 @@ +Motivation +========== + +Treaps provide a memory-efficient binary search tree structure. +Insertion/deletion/search are about as about as fast in the average +case as red-black trees and the chances of worst-case behavior are +vanishingly small, thanks to (pseudo-)randomness. The bad worst-case +behavior is a small price to pay, given that treaps are much simpler +to implement. + +API +=== + +The trp API generates a data structure and functions to handle a +large growing set of objects stored in a pool. + +The caller: + +. Specifies parameters for the generated functions with the + trp_gen(static, foo_, ...) macro. + +. Allocates a `struct trp_root` variable and sets it to {~0}. + +. Adds new nodes to the set using `foo_insert`. + +. Can find a specific item in the set using `foo_search`. + +. Can iterate over items in the set using `foo_first` and `foo_next`. + +. Can remove an item from the set using `foo_remove`. + +Example: + +---- +struct ex_node { + const char *s; + struct trp_node ex_link; +}; +static struct trp_root ex_base = {~0}; +obj_pool_gen(ex, struct ex_node, 4096); +trp_gen(static, ex_, struct ex_node, ex_link, ex, strcmp) +struct ex_node *item; + +item = ex_pointer(ex_alloc(1)); +item->s = "hello"; +ex_insert(&ex_base, item); +item = ex_pointer(ex_alloc(1)); +item->s = "goodbye"; +ex_insert(&ex_base, item); +for (item = ex_first(&ex_base); item; item = ex_next(&ex_base, item)) + printf("%s\n", item->s); +---- + +Functions +--------- + +trp_gen(attr, foo_, node_type, link_field, pool, cmp):: + + Generate a type-specific treap implementation. ++ +. The storage class for generated functions will be 'attr' (e.g., `static`). +. Generated function names are prefixed with 'foo_' (e.g., `treap_`). +. Treap nodes will be of type 'node_type' (e.g., `struct treap_node`). + This type must be a struct with at least one `struct trp_node` field + to point to its children. +. The field used to access child nodes will be 'link_field'. +. All treap nodes must lie in the 'pool' object pool. +. Treap nodes must be totally ordered by the 'cmp' relation, with the + following prototype: ++ +int (*cmp)(node_type \*a, node_type \*b) ++ +and returning a value less than, equal to, or greater than zero +according to the result of comparison. + +void foo_insert(struct trp_root *treap, node_type \*node):: + + Insert node into treap. If inserted multiple times, + a node will appear in the treap multiple times. + +void foo_remove(struct trp_root *treap, node_type \*node):: + + Remove node from treap. Caller must ensure node is + present in treap before using this function. + +node_type *foo_search(struct trp_root \*treap, node_type \*key):: + + Search for a node that matches key. If no match is found, + result is NULL. + +node_type *foo_nsearch(struct trp_root \*treap, node_type \*key):: + + Like `foo_search`, but if if the key is missing return what + would be key's successor, were key in treap (NULL if no + successor). + +node_type *foo_first(struct trp_root \*treap):: + + Find the first item from the treap, in sorted order. + +node_type *foo_next(struct trp_root \*treap, node_type \*node):: + + Find the next item. -- cgit v0.10.2-6-g49f6 From 1d73b52f5ba4184de6acf474f14668001304a10c Mon Sep 17 00:00:00 2001 From: David Barr Date: Mon, 9 Aug 2010 17:34:42 -0500 Subject: Add string-specific memory pool Intern strings so they can be compared by address and stored without wasting space. This library uses the macros in the obj_pool.h and trp.h to create a memory pool for strings and expose an API for handling them. [rr: added API docs] [jn: with some API simplifications, new documentation and tests] Signed-off-by: David Barr Signed-off-by: Ramkumar Ramachandra Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano diff --git a/.gitignore b/.gitignore index af47653..9f109db 100644 --- a/.gitignore +++ b/.gitignore @@ -173,6 +173,7 @@ /test-run-command /test-sha1 /test-sigchain +/test-string-pool /test-treap /common-cmds.h *.tar.gz diff --git a/Makefile b/Makefile index 8777d28..9cb83e1 100644 --- a/Makefile +++ b/Makefile @@ -415,6 +415,7 @@ TEST_PROGRAMS_NEED_X += test-path-utils TEST_PROGRAMS_NEED_X += test-run-command TEST_PROGRAMS_NEED_X += test-sha1 TEST_PROGRAMS_NEED_X += test-sigchain +TEST_PROGRAMS_NEED_X += test-string-pool TEST_PROGRAMS_NEED_X += test-treap TEST_PROGRAMS_NEED_X += test-index-version @@ -1742,7 +1743,7 @@ ifndef NO_CURL endif XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \ xdiff/xmerge.o xdiff/xpatience.o -VCSSVN_OBJS = +VCSSVN_OBJS = vcs-svn/string_pool.o OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS) dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d) @@ -1867,7 +1868,7 @@ xdiff-interface.o $(XDIFF_OBJS): \ xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h $(VCSSVN_OBJS): \ - vcs-svn/obj_pool.h vcs-svn/trp.h + vcs-svn/obj_pool.h vcs-svn/trp.h vcs-svn/string_pool.h endif exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \ @@ -2018,10 +2019,12 @@ test-delta$X: diff-delta.o patch-delta.o test-parse-options$X: parse-options.o +test-string-pool$X: vcs-svn/lib.a + .PRECIOUS: $(TEST_OBJS) test-%$X: test-%.o $(GITLIBS) - $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) + $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(filter %.a,$^) $(LIBS) check-sha1:: test-sha1$X ./test-sha1.sh diff --git a/t/t0080-vcs-svn.sh b/t/t0080-vcs-svn.sh index ce02c58..99a314b 100755 --- a/t/t0080-vcs-svn.sh +++ b/t/t0080-vcs-svn.sh @@ -76,6 +76,22 @@ test_expect_success 'obj pool: high-water mark' ' test_cmp expected actual ' +test_expect_success 'string pool' ' + echo a does not equal b >expected.differ && + echo a equals a >expected.match && + echo equals equals equals >expected.matchmore && + + test-string-pool "a,--b" >actual.differ && + test-string-pool "a,a" >actual.match && + test-string-pool "equals-equals" >actual.matchmore && + test_must_fail test-string-pool a,a,a && + test_must_fail test-string-pool a && + + test_cmp expected.differ actual.differ && + test_cmp expected.match actual.match && + test_cmp expected.matchmore actual.matchmore +' + test_expect_success 'treap sort' ' cat <<-\EOF >unsorted && 68 diff --git a/test-string-pool.c b/test-string-pool.c new file mode 100644 index 0000000..c5782e6 --- /dev/null +++ b/test-string-pool.c @@ -0,0 +1,31 @@ +/* + * test-string-pool.c: code to exercise the svn importer's string pool + */ + +#include "git-compat-util.h" +#include "vcs-svn/string_pool.h" + +int main(int argc, char *argv[]) +{ + const uint32_t unequal = pool_intern("does not equal"); + const uint32_t equal = pool_intern("equals"); + uint32_t buf[3]; + uint32_t n; + + if (argc != 2) + usage("test-string-pool ,"); + + n = pool_tok_seq(3, buf, ",-", argv[1]); + if (n >= 3) + die("too many strings"); + if (n <= 1) + die("too few strings"); + + buf[2] = buf[1]; + buf[1] = (buf[0] == buf[2]) ? equal : unequal; + pool_print_seq(3, buf, ' ', stdout); + fputc('\n', stdout); + + pool_reset(); + return 0; +} diff --git a/vcs-svn/string_pool.c b/vcs-svn/string_pool.c new file mode 100644 index 0000000..f5b1da8 --- /dev/null +++ b/vcs-svn/string_pool.c @@ -0,0 +1,102 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "trp.h" +#include "obj_pool.h" +#include "string_pool.h" + +static struct trp_root tree = { ~0 }; + +struct node { + uint32_t offset; + struct trp_node children; +}; + +/* Two memory pools: one for struct node, and another for strings */ +obj_pool_gen(node, struct node, 4096) +obj_pool_gen(string, char, 4096) + +static char *node_value(struct node *node) +{ + return node ? string_pointer(node->offset) : NULL; +} + +static int node_cmp(struct node *a, struct node *b) +{ + return strcmp(node_value(a), node_value(b)); +} + +/* Build a Treap from the node structure (a trp_node w/ offset) */ +trp_gen(static, tree_, struct node, children, node, node_cmp); + +const char *pool_fetch(uint32_t entry) +{ + return node_value(node_pointer(entry)); +} + +uint32_t pool_intern(const char *key) +{ + /* Canonicalize key */ + struct node *match = NULL, *node; + uint32_t key_len; + if (key == NULL) + return ~0; + key_len = strlen(key) + 1; + node = node_pointer(node_alloc(1)); + node->offset = string_alloc(key_len); + strcpy(node_value(node), key); + match = tree_search(&tree, node); + if (!match) { + tree_insert(&tree, node); + } else { + node_free(1); + string_free(key_len); + node = match; + } + return node_offset(node); +} + +uint32_t pool_tok_r(char *str, const char *delim, char **saveptr) +{ + char *token = strtok_r(str, delim, saveptr); + return token ? pool_intern(token) : ~0; +} + +void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream) +{ + uint32_t i; + for (i = 0; i < len && ~seq[i]; i++) { + fputs(pool_fetch(seq[i]), stream); + if (i < len - 1 && ~seq[i + 1]) + fputc(delim, stream); + } +} + +uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str) +{ + char *context = NULL; + uint32_t token = ~0; + uint32_t length; + + if (sz == 0) + return ~0; + if (str) + token = pool_tok_r(str, delim, &context); + for (length = 0; length < sz; length++) { + seq[length] = token; + if (token == ~0) + return length; + token = pool_tok_r(NULL, delim, &context); + } + seq[sz - 1] = ~0; + return sz; +} + +void pool_reset(void) +{ + node_reset(); + string_reset(); +} diff --git a/vcs-svn/string_pool.h b/vcs-svn/string_pool.h new file mode 100644 index 0000000..222fb66 --- /dev/null +++ b/vcs-svn/string_pool.h @@ -0,0 +1,11 @@ +#ifndef STRING_POOL_H_ +#define STRING_POOL_H_ + +uint32_t pool_intern(const char *key); +const char *pool_fetch(uint32_t entry); +uint32_t pool_tok_r(char *str, const char *delim, char **saveptr); +void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream); +uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str); +void pool_reset(void); + +#endif diff --git a/vcs-svn/string_pool.txt b/vcs-svn/string_pool.txt new file mode 100644 index 0000000..1b41f15 --- /dev/null +++ b/vcs-svn/string_pool.txt @@ -0,0 +1,43 @@ +string_pool API +=============== + +The string_pool API provides facilities for replacing strings +with integer keys that can be more easily compared and stored. +The facilities are designed so that one could teach Git without +too much trouble to store the information needed for these keys to +remain valid over multiple executions. + +Functions +--------- + +pool_intern:: + Include a string in the string pool and get its key. + If that string is already in the pool, retrieves its + existing key. + +pool_fetch:: + Retrieve the string associated to a given key. + +pool_tok_r:: + Extract the key of the next token from a string. + Interface mimics strtok_r. + +pool_print_seq:: + Print a sequence of strings named by key to a file, using the + specified delimiter to separate them. + + If NULL (key ~0) appears in the sequence, the sequence ends + early. + +pool_tok_seq:: + Split a string into tokens, storing the keys of segments + into a caller-provided array. + + Unless sz is 0, the array will always be ~0-terminated. + If there is not enough room for all the tokens, the + array holds as many tokens as fit in the entries before + the terminating ~0. Return value is the index after the + last token, or sz if the tokens did not fit. + +pool_reset:: + Deallocate storage for the string pool. -- cgit v0.10.2-6-g49f6 From 3bbaec00a8ffc6ea7e71c3b707851fe663d93a45 Mon Sep 17 00:00:00 2001 From: David Barr Date: Mon, 9 Aug 2010 17:39:43 -0500 Subject: Add stream helper library This library provides thread-unsafe fgets()- and fread()-like functions where the caller does not have to supply a buffer. It maintains a couple of static buffers and provides an API to use them. [rr: allow input from files other than stdin] [jn: with tests, documentation, and error handling improvements] Signed-off-by: David Barr Signed-off-by: Ramkumar Ramachandra Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano diff --git a/.gitignore b/.gitignore index 9f109db..8c0512e 100644 --- a/.gitignore +++ b/.gitignore @@ -166,6 +166,7 @@ /test-dump-cache-tree /test-genrandom /test-index-version +/test-line-buffer /test-match-trees /test-obj-pool /test-parse-options diff --git a/Makefile b/Makefile index 9cb83e1..ea7fc87 100644 --- a/Makefile +++ b/Makefile @@ -408,6 +408,7 @@ TEST_PROGRAMS_NEED_X += test-date TEST_PROGRAMS_NEED_X += test-delta TEST_PROGRAMS_NEED_X += test-dump-cache-tree TEST_PROGRAMS_NEED_X += test-genrandom +TEST_PROGRAMS_NEED_X += test-line-buffer TEST_PROGRAMS_NEED_X += test-match-trees TEST_PROGRAMS_NEED_X += test-obj-pool TEST_PROGRAMS_NEED_X += test-parse-options @@ -1743,7 +1744,7 @@ ifndef NO_CURL endif XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \ xdiff/xmerge.o xdiff/xpatience.o -VCSSVN_OBJS = vcs-svn/string_pool.o +VCSSVN_OBJS = vcs-svn/string_pool.o vcs-svn/line_buffer.o OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS) dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d) @@ -1868,7 +1869,8 @@ xdiff-interface.o $(XDIFF_OBJS): \ xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h $(VCSSVN_OBJS): \ - vcs-svn/obj_pool.h vcs-svn/trp.h vcs-svn/string_pool.h + vcs-svn/obj_pool.h vcs-svn/trp.h vcs-svn/string_pool.h \ + vcs-svn/line_buffer.h endif exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \ @@ -2017,6 +2019,8 @@ test-date$X: date.o ctype.o test-delta$X: diff-delta.o patch-delta.o +test-line-buffer$X: vcs-svn/lib.a + test-parse-options$X: parse-options.o test-string-pool$X: vcs-svn/lib.a diff --git a/t/t0080-vcs-svn.sh b/t/t0080-vcs-svn.sh index 99a314b..d3225ad 100755 --- a/t/t0080-vcs-svn.sh +++ b/t/t0080-vcs-svn.sh @@ -76,6 +76,60 @@ test_expect_success 'obj pool: high-water mark' ' test_cmp expected actual ' +test_expect_success 'line buffer' ' + echo HELLO >expected1 && + printf "%s\n" "" HELLO >expected2 && + echo >expected3 && + printf "%s\n" "" Q | q_to_nul >expected4 && + printf "%s\n" foo "" >expected5 && + printf "%s\n" "" foo >expected6 && + + test-line-buffer <<-\EOF >actual1 && + 5 + HELLO + EOF + + test-line-buffer <<-\EOF >actual2 && + 0 + + 5 + HELLO + EOF + + q_to_nul <<-\EOF | + 1 + Q + EOF + test-line-buffer >actual3 && + + q_to_nul <<-\EOF | + 0 + + 1 + Q + EOF + test-line-buffer >actual4 && + + test-line-buffer <<-\EOF >actual5 && + 5 + foo + EOF + + test-line-buffer <<-\EOF >actual6 && + 0 + + 5 + foo + EOF + + test_cmp expected1 actual1 && + test_cmp expected2 actual2 && + test_cmp expected3 actual3 && + test_cmp expected4 actual4 && + test_cmp expected5 actual5 && + test_cmp expected6 actual6 +' + test_expect_success 'string pool' ' echo a does not equal b >expected.differ && echo a equals a >expected.match && diff --git a/test-line-buffer.c b/test-line-buffer.c new file mode 100644 index 0000000..c11bf7f --- /dev/null +++ b/test-line-buffer.c @@ -0,0 +1,46 @@ +/* + * test-line-buffer.c: code to exercise the svn importer's input helper + * + * Input format: + * number NL + * (number bytes) NL + * number NL + * ... + */ + +#include "git-compat-util.h" +#include "vcs-svn/line_buffer.h" + +static uint32_t strtouint32(const char *s) +{ + char *end; + uintmax_t n = strtoumax(s, &end, 10); + if (*s == '\0' || *end != '\0') + die("invalid count: %s", s); + return (uint32_t) n; +} + +int main(int argc, char *argv[]) +{ + char *s; + + if (argc != 1) + usage("test-line-buffer < input.txt"); + if (buffer_init(NULL)) + die_errno("open error"); + while ((s = buffer_read_line())) { + s = buffer_read_string(strtouint32(s)); + fputs(s, stdout); + fputc('\n', stdout); + buffer_skip_bytes(1); + if (!(s = buffer_read_line())) + break; + buffer_copy_bytes(strtouint32(s) + 1); + } + if (buffer_deinit()) + die("input error"); + if (ferror(stdout)) + die("output error"); + buffer_reset(); + return 0; +} diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c new file mode 100644 index 0000000..1543567 --- /dev/null +++ b/vcs-svn/line_buffer.c @@ -0,0 +1,97 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "line_buffer.h" +#include "obj_pool.h" + +#define LINE_BUFFER_LEN 10000 +#define COPY_BUFFER_LEN 4096 + +/* Create memory pool for char sequence of known length */ +obj_pool_gen(blob, char, 4096) + +static char line_buffer[LINE_BUFFER_LEN]; +static char byte_buffer[COPY_BUFFER_LEN]; +static FILE *infile; + +int buffer_init(const char *filename) +{ + infile = filename ? fopen(filename, "r") : stdin; + if (!infile) + return -1; + return 0; +} + +int buffer_deinit(void) +{ + int err; + if (infile == stdin) + return ferror(infile); + err = ferror(infile); + err |= fclose(infile); + return err; +} + +/* Read a line without trailing newline. */ +char *buffer_read_line(void) +{ + char *end; + if (!fgets(line_buffer, sizeof(line_buffer), infile)) + /* Error or data exhausted. */ + return NULL; + end = line_buffer + strlen(line_buffer); + if (end[-1] == '\n') + end[-1] = '\0'; + else if (feof(infile)) + ; /* No newline at end of file. That's fine. */ + else + /* + * Line was too long. + * There is probably a saner way to deal with this, + * but for now let's return an error. + */ + return NULL; + return line_buffer; +} + +char *buffer_read_string(uint32_t len) +{ + char *s; + blob_free(blob_pool.size); + s = blob_pointer(blob_alloc(len + 1)); + s[fread(s, 1, len, infile)] = '\0'; + return ferror(infile) ? NULL : s; +} + +void buffer_copy_bytes(uint32_t len) +{ + uint32_t in; + while (len > 0 && !feof(infile) && !ferror(infile)) { + in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; + in = fread(byte_buffer, 1, in, infile); + len -= in; + fwrite(byte_buffer, 1, in, stdout); + if (ferror(stdout)) { + buffer_skip_bytes(len); + return; + } + } +} + +void buffer_skip_bytes(uint32_t len) +{ + uint32_t in; + while (len > 0 && !feof(infile) && !ferror(infile)) { + in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; + in = fread(byte_buffer, 1, in, infile); + len -= in; + } +} + +void buffer_reset(void) +{ + blob_reset(); +} diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h new file mode 100644 index 0000000..9c78ae1 --- /dev/null +++ b/vcs-svn/line_buffer.h @@ -0,0 +1,12 @@ +#ifndef LINE_BUFFER_H_ +#define LINE_BUFFER_H_ + +int buffer_init(const char *filename); +int buffer_deinit(void); +char *buffer_read_line(void); +char *buffer_read_string(uint32_t len); +void buffer_copy_bytes(uint32_t len); +void buffer_skip_bytes(uint32_t len); +void buffer_reset(void); + +#endif diff --git a/vcs-svn/line_buffer.txt b/vcs-svn/line_buffer.txt new file mode 100644 index 0000000..8906fb1 --- /dev/null +++ b/vcs-svn/line_buffer.txt @@ -0,0 +1,58 @@ +line_buffer API +=============== + +The line_buffer library provides a convenient interface for +mostly-line-oriented input. + +Each line is not permitted to exceed 10000 bytes. The provided +functions are not thread-safe or async-signal-safe, and like +`fgets()`, they generally do not function correctly if interrupted +by a signal without SA_RESTART set. + +Calling sequence +---------------- + +The calling program: + + - specifies a file to read with `buffer_init` + - processes input with `buffer_read_line`, `buffer_read_string`, + `buffer_skip_bytes`, and `buffer_copy_bytes` + - closes the file with `buffer_deinit`, perhaps to start over and + read another file. + +Before exiting, the caller can use `buffer_reset` to deallocate +resources for the benefit of profiling tools. + +Functions +--------- + +`buffer_init`:: + Open the named file for input. If filename is NULL, + start reading from stdin. On failure, returns -1 (with + errno indicating the nature of the failure). + +`buffer_deinit`:: + Stop reading from the current file (closing it unless + it was stdin). Returns nonzero if `fclose` fails or + the error indicator was set. + +`buffer_read_line`:: + Read a line and strip off the trailing newline. + On failure or end of file, returns NULL. + +`buffer_read_string`:: + Read `len` characters of input or up to the end of the + file, whichever comes first. Returns NULL on error. + Returns whatever characters were read (possibly "") + for end of file. + +`buffer_copy_bytes`:: + Read `len` bytes of input and dump them to the standard output + stream. Returns early for error or end of file. + +`buffer_skip_bytes`:: + Discards `len` bytes from the input stream (stopping early + if necessary because of an error or eof). + +`buffer_reset`:: + Deallocates non-static buffers. -- cgit v0.10.2-6-g49f6 From c0e6c23dca84227167a6fe1077503ddf32208919 Mon Sep 17 00:00:00 2001 From: David Barr Date: Mon, 9 Aug 2010 17:48:10 -0500 Subject: Infrastructure to write revisions in fast-export format repo_tree maintains the exporter's state and provides a facility to to call fast_export, which writes objects to stdout suitable for consumption by fast-import. The exported functions roughly correspond to Subversion FS operations. . repo_add, repo_modify, repo_copy, repo_replace, and repo_delete update the current commit, based roughly on the corresponding Subversion FS operation. . repo_commit calls out to fast_export to write the current commit to the fast-import stream in stdout. . repo_diff is used by the fast_export module to write the changes for a commit. . repo_reset erases the exporter's state, so valgrind can be happy. [rr: squelched compiler warnings] [jn: removed support for maintaining state on-disk, though we may want to add it back later] Signed-off-by: David Barr Signed-off-by: Ramkumar Ramachandra Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano diff --git a/Makefile b/Makefile index ea7fc87..2017d09 100644 --- a/Makefile +++ b/Makefile @@ -1744,7 +1744,8 @@ ifndef NO_CURL endif XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \ xdiff/xmerge.o xdiff/xpatience.o -VCSSVN_OBJS = vcs-svn/string_pool.o vcs-svn/line_buffer.o +VCSSVN_OBJS = vcs-svn/string_pool.o vcs-svn/line_buffer.o \ + vcs-svn/repo_tree.o vcs-svn/fast_export.o OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS) dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d) @@ -1870,7 +1871,7 @@ xdiff-interface.o $(XDIFF_OBJS): \ $(VCSSVN_OBJS): \ vcs-svn/obj_pool.h vcs-svn/trp.h vcs-svn/string_pool.h \ - vcs-svn/line_buffer.h + vcs-svn/line_buffer.h vcs-svn/repo_tree.h vcs-svn/fast_export.h endif exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \ diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c new file mode 100644 index 0000000..3a6156f --- /dev/null +++ b/vcs-svn/fast_export.c @@ -0,0 +1,74 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "fast_export.h" +#include "line_buffer.h" +#include "repo_tree.h" +#include "string_pool.h" + +#define MAX_GITSVN_LINE_LEN 4096 + +static uint32_t first_commit_done; + +void fast_export_delete(uint32_t depth, uint32_t *path) +{ + putchar('D'); + putchar(' '); + pool_print_seq(depth, path, '/', stdout); + putchar('\n'); +} + +void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, + uint32_t mark) +{ + /* Mode must be 100644, 100755, 120000, or 160000. */ + printf("M %06o :%d ", mode, mark); + pool_print_seq(depth, path, '/', stdout); + putchar('\n'); +} + +static char gitsvnline[MAX_GITSVN_LINE_LEN]; +void fast_export_commit(uint32_t revision, uint32_t author, char *log, + uint32_t uuid, uint32_t url, + unsigned long timestamp) +{ + if (!log) + log = ""; + if (~uuid && ~url) { + snprintf(gitsvnline, MAX_GITSVN_LINE_LEN, "\n\ngit-svn-id: %s@%d %s\n", + pool_fetch(url), revision, pool_fetch(uuid)); + } else { + *gitsvnline = '\0'; + } + printf("commit refs/heads/master\n"); + printf("committer %s <%s@%s> %ld +0000\n", + ~author ? pool_fetch(author) : "nobody", + ~author ? pool_fetch(author) : "nobody", + ~uuid ? pool_fetch(uuid) : "local", timestamp); + printf("data %zd\n%s%s\n", + strlen(log) + strlen(gitsvnline), log, gitsvnline); + if (!first_commit_done) { + if (revision > 1) + printf("from refs/heads/master^0\n"); + first_commit_done = 1; + } + repo_diff(revision - 1, revision); + fputc('\n', stdout); + + printf("progress Imported commit %d.\n\n", revision); +} + +void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len) +{ + if (mode == REPO_MODE_LNK) { + /* svn symlink blobs start with "link " */ + buffer_skip_bytes(5); + len -= 5; + } + printf("blob\nmark :%d\ndata %d\n", mark, len); + buffer_copy_bytes(len); + fputc('\n', stdout); +} diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h new file mode 100644 index 0000000..2aaaea5 --- /dev/null +++ b/vcs-svn/fast_export.h @@ -0,0 +1,11 @@ +#ifndef FAST_EXPORT_H_ +#define FAST_EXPORT_H_ + +void fast_export_delete(uint32_t depth, uint32_t *path); +void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, + uint32_t mark); +void fast_export_commit(uint32_t revision, uint32_t author, char *log, + uint32_t uuid, uint32_t url, unsigned long timestamp); +void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len); + +#endif diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c new file mode 100644 index 0000000..c3d7ee7 --- /dev/null +++ b/vcs-svn/repo_tree.c @@ -0,0 +1,329 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" + +#include "string_pool.h" +#include "repo_tree.h" +#include "obj_pool.h" +#include "fast_export.h" + +#include "trp.h" + +struct repo_dirent { + uint32_t name_offset; + struct trp_node children; + uint32_t mode; + uint32_t content_offset; +}; + +struct repo_dir { + struct trp_root entries; +}; + +struct repo_commit { + uint32_t root_dir_offset; +}; + +/* Memory pools for commit, dir and dirent */ +obj_pool_gen(commit, struct repo_commit, 4096) +obj_pool_gen(dir, struct repo_dir, 4096) +obj_pool_gen(dirent, struct repo_dirent, 4096) + +static uint32_t active_commit; +static uint32_t mark; + +static int repo_dirent_name_cmp(const void *a, const void *b); + +/* Treap for directory entries */ +trp_gen(static, dirent_, struct repo_dirent, children, dirent, repo_dirent_name_cmp); + +uint32_t next_blob_mark(void) +{ + return mark++; +} + +static struct repo_dir *repo_commit_root_dir(struct repo_commit *commit) +{ + return dir_pointer(commit->root_dir_offset); +} + +static struct repo_dirent *repo_first_dirent(struct repo_dir *dir) +{ + return dirent_first(&dir->entries); +} + +static int repo_dirent_name_cmp(const void *a, const void *b) +{ + const struct repo_dirent *dirent1 = a, *dirent2 = b; + uint32_t a_offset = dirent1->name_offset; + uint32_t b_offset = dirent2->name_offset; + return (a_offset > b_offset) - (a_offset < b_offset); +} + +static int repo_dirent_is_dir(struct repo_dirent *dirent) +{ + return dirent != NULL && dirent->mode == REPO_MODE_DIR; +} + +static struct repo_dir *repo_dir_from_dirent(struct repo_dirent *dirent) +{ + if (!repo_dirent_is_dir(dirent)) + return NULL; + return dir_pointer(dirent->content_offset); +} + +static struct repo_dir *repo_clone_dir(struct repo_dir *orig_dir) +{ + uint32_t orig_o, new_o; + orig_o = dir_offset(orig_dir); + if (orig_o >= dir_pool.committed) + return orig_dir; + new_o = dir_alloc(1); + orig_dir = dir_pointer(orig_o); + *dir_pointer(new_o) = *orig_dir; + return dir_pointer(new_o); +} + +static struct repo_dirent *repo_read_dirent(uint32_t revision, uint32_t *path) +{ + uint32_t name = 0; + struct repo_dirent *key = dirent_pointer(dirent_alloc(1)); + struct repo_dir *dir = NULL; + struct repo_dirent *dirent = NULL; + dir = repo_commit_root_dir(commit_pointer(revision)); + while (~(name = *path++)) { + key->name_offset = name; + dirent = dirent_search(&dir->entries, key); + if (dirent == NULL || !repo_dirent_is_dir(dirent)) + break; + dir = repo_dir_from_dirent(dirent); + } + dirent_free(1); + return dirent; +} + +static void repo_write_dirent(uint32_t *path, uint32_t mode, + uint32_t content_offset, uint32_t del) +{ + uint32_t name, revision, dir_o = ~0, parent_dir_o = ~0; + struct repo_dir *dir; + struct repo_dirent *key; + struct repo_dirent *dirent = NULL; + revision = active_commit; + dir = repo_commit_root_dir(commit_pointer(revision)); + dir = repo_clone_dir(dir); + commit_pointer(revision)->root_dir_offset = dir_offset(dir); + while (~(name = *path++)) { + parent_dir_o = dir_offset(dir); + + key = dirent_pointer(dirent_alloc(1)); + key->name_offset = name; + + dirent = dirent_search(&dir->entries, key); + if (dirent == NULL) + dirent = key; + else + dirent_free(1); + + if (dirent == key) { + dirent->mode = REPO_MODE_DIR; + dirent->content_offset = 0; + dirent_insert(&dir->entries, dirent); + } + + if (dirent_offset(dirent) < dirent_pool.committed) { + dir_o = repo_dirent_is_dir(dirent) ? + dirent->content_offset : ~0; + dirent_remove(&dir->entries, dirent); + dirent = dirent_pointer(dirent_alloc(1)); + dirent->name_offset = name; + dirent->mode = REPO_MODE_DIR; + dirent->content_offset = dir_o; + dirent_insert(&dir->entries, dirent); + } + + dir = repo_dir_from_dirent(dirent); + dir = repo_clone_dir(dir); + dirent->content_offset = dir_offset(dir); + } + if (dirent == NULL) + return; + dirent->mode = mode; + dirent->content_offset = content_offset; + if (del && ~parent_dir_o) + dirent_remove(&dir_pointer(parent_dir_o)->entries, dirent); +} + +uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst) +{ + uint32_t mode = 0, content_offset = 0; + struct repo_dirent *src_dirent; + src_dirent = repo_read_dirent(revision, src); + if (src_dirent != NULL) { + mode = src_dirent->mode; + content_offset = src_dirent->content_offset; + repo_write_dirent(dst, mode, content_offset, 0); + } + return mode; +} + +void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark) +{ + repo_write_dirent(path, mode, blob_mark, 0); +} + +uint32_t repo_replace(uint32_t *path, uint32_t blob_mark) +{ + uint32_t mode = 0; + struct repo_dirent *src_dirent; + src_dirent = repo_read_dirent(active_commit, path); + if (src_dirent != NULL) { + mode = src_dirent->mode; + repo_write_dirent(path, mode, blob_mark, 0); + } + return mode; +} + +void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark) +{ + struct repo_dirent *src_dirent; + src_dirent = repo_read_dirent(active_commit, path); + if (src_dirent != NULL && blob_mark == 0) + blob_mark = src_dirent->content_offset; + repo_write_dirent(path, mode, blob_mark, 0); +} + +void repo_delete(uint32_t *path) +{ + repo_write_dirent(path, 0, 0, 1); +} + +static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir); + +static void repo_git_add(uint32_t depth, uint32_t *path, struct repo_dirent *dirent) +{ + if (repo_dirent_is_dir(dirent)) + repo_git_add_r(depth, path, repo_dir_from_dirent(dirent)); + else + fast_export_modify(depth, path, + dirent->mode, dirent->content_offset); +} + +static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir) +{ + struct repo_dirent *de = repo_first_dirent(dir); + while (de) { + path[depth] = de->name_offset; + repo_git_add(depth + 1, path, de); + de = dirent_next(&dir->entries, de); + } +} + +static void repo_diff_r(uint32_t depth, uint32_t *path, struct repo_dir *dir1, + struct repo_dir *dir2) +{ + struct repo_dirent *de1, *de2; + de1 = repo_first_dirent(dir1); + de2 = repo_first_dirent(dir2); + + while (de1 && de2) { + if (de1->name_offset < de2->name_offset) { + path[depth] = de1->name_offset; + fast_export_delete(depth + 1, path); + de1 = dirent_next(&dir1->entries, de1); + continue; + } + if (de1->name_offset > de2->name_offset) { + path[depth] = de2->name_offset; + repo_git_add(depth + 1, path, de2); + de2 = dirent_next(&dir2->entries, de2); + continue; + } + path[depth] = de1->name_offset; + + if (de1->mode == de2->mode && + de1->content_offset == de2->content_offset) { + ; /* No change. */ + } else if (repo_dirent_is_dir(de1) && repo_dirent_is_dir(de2)) { + repo_diff_r(depth + 1, path, + repo_dir_from_dirent(de1), + repo_dir_from_dirent(de2)); + } else if (!repo_dirent_is_dir(de1) && !repo_dirent_is_dir(de2)) { + repo_git_add(depth + 1, path, de2); + } else { + fast_export_delete(depth + 1, path); + repo_git_add(depth + 1, path, de2); + } + de1 = dirent_next(&dir1->entries, de1); + de2 = dirent_next(&dir2->entries, de2); + } + while (de1) { + path[depth] = de1->name_offset; + fast_export_delete(depth + 1, path); + de1 = dirent_next(&dir1->entries, de1); + } + while (de2) { + path[depth] = de2->name_offset; + repo_git_add(depth + 1, path, de2); + de2 = dirent_next(&dir2->entries, de2); + } +} + +static uint32_t path_stack[REPO_MAX_PATH_DEPTH]; + +void repo_diff(uint32_t r1, uint32_t r2) +{ + repo_diff_r(0, + path_stack, + repo_commit_root_dir(commit_pointer(r1)), + repo_commit_root_dir(commit_pointer(r2))); +} + +void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, + uint32_t url, unsigned long timestamp) +{ + fast_export_commit(revision, author, log, uuid, url, timestamp); + dirent_commit(); + dir_commit(); + active_commit = commit_alloc(1); + commit_pointer(active_commit)->root_dir_offset = + commit_pointer(active_commit - 1)->root_dir_offset; +} + +static void mark_init(void) +{ + uint32_t i; + mark = 0; + for (i = 0; i < dirent_pool.size; i++) + if (!repo_dirent_is_dir(dirent_pointer(i)) && + dirent_pointer(i)->content_offset > mark) + mark = dirent_pointer(i)->content_offset; + mark++; +} + +void repo_init(void) +{ + mark_init(); + if (commit_pool.size == 0) { + /* Create empty tree for commit 0. */ + commit_alloc(1); + commit_pointer(0)->root_dir_offset = dir_alloc(1); + dir_pointer(0)->entries.trp_root = ~0; + dir_commit(); + } + /* Preallocate next commit, ready for changes. */ + active_commit = commit_alloc(1); + commit_pointer(active_commit)->root_dir_offset = + commit_pointer(active_commit - 1)->root_dir_offset; +} + +void repo_reset(void) +{ + pool_reset(); + commit_reset(); + dir_reset(); + dirent_reset(); +} diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h new file mode 100644 index 0000000..5476175 --- /dev/null +++ b/vcs-svn/repo_tree.h @@ -0,0 +1,26 @@ +#ifndef REPO_TREE_H_ +#define REPO_TREE_H_ + +#include "git-compat-util.h" + +#define REPO_MODE_DIR 0040000 +#define REPO_MODE_BLB 0100644 +#define REPO_MODE_EXE 0100755 +#define REPO_MODE_LNK 0120000 + +#define REPO_MAX_PATH_LEN 4096 +#define REPO_MAX_PATH_DEPTH 1000 + +uint32_t next_blob_mark(void); +uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst); +void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark); +uint32_t repo_replace(uint32_t *path, uint32_t blob_mark); +void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark); +void repo_delete(uint32_t *path); +void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, + uint32_t url, long unsigned timestamp); +void repo_diff(uint32_t r1, uint32_t r2); +void repo_init(void); +void repo_reset(void); + +#endif -- cgit v0.10.2-6-g49f6 From 21746aa34fc99d2c73634bc9829387c27c109dbe Mon Sep 17 00:00:00 2001 From: David Barr Date: Mon, 9 Aug 2010 17:55:00 -0500 Subject: SVN dump parser svndump parses data that is in SVN dumpfile format produced by `svnadmin dump` with the help of line_buffer and uses repo_tree and fast_export to emit a git fast-import stream. Based roughly on com.hydrografix.svndump 0.92 from the SvnToCCase project at , by Stefan Hegny and others. [rr: allow input from files other than stdin] [jn: with test, more error reporting] Signed-off-by: David Barr Signed-off-by: Ramkumar Ramachandra Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano diff --git a/.gitignore b/.gitignore index 8c0512e..258723f 100644 --- a/.gitignore +++ b/.gitignore @@ -175,6 +175,7 @@ /test-sha1 /test-sigchain /test-string-pool +/test-svn-fe /test-treap /common-cmds.h *.tar.gz diff --git a/Makefile b/Makefile index 2017d09..ec84697 100644 --- a/Makefile +++ b/Makefile @@ -417,6 +417,7 @@ TEST_PROGRAMS_NEED_X += test-run-command TEST_PROGRAMS_NEED_X += test-sha1 TEST_PROGRAMS_NEED_X += test-sigchain TEST_PROGRAMS_NEED_X += test-string-pool +TEST_PROGRAMS_NEED_X += test-svn-fe TEST_PROGRAMS_NEED_X += test-treap TEST_PROGRAMS_NEED_X += test-index-version @@ -1745,7 +1746,7 @@ endif XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \ xdiff/xmerge.o xdiff/xpatience.o VCSSVN_OBJS = vcs-svn/string_pool.o vcs-svn/line_buffer.o \ - vcs-svn/repo_tree.o vcs-svn/fast_export.o + vcs-svn/repo_tree.o vcs-svn/fast_export.o vcs-svn/svndump.o OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS) dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d) @@ -1871,7 +1872,8 @@ xdiff-interface.o $(XDIFF_OBJS): \ $(VCSSVN_OBJS): \ vcs-svn/obj_pool.h vcs-svn/trp.h vcs-svn/string_pool.h \ - vcs-svn/line_buffer.h vcs-svn/repo_tree.h vcs-svn/fast_export.h + vcs-svn/line_buffer.h vcs-svn/repo_tree.h vcs-svn/fast_export.h \ + vcs-svn/svndump.h endif exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \ @@ -2026,6 +2028,8 @@ test-parse-options$X: parse-options.o test-string-pool$X: vcs-svn/lib.a +test-svn-fe$X: vcs-svn/lib.a + .PRECIOUS: $(TEST_OBJS) test-%$X: test-%.o $(GITLIBS) diff --git a/contrib/svn-fe/svn-fe.c b/contrib/svn-fe/svn-fe.c index e9b9ba4..a2677b0 100644 --- a/contrib/svn-fe/svn-fe.c +++ b/contrib/svn-fe/svn-fe.c @@ -10,6 +10,7 @@ int main(int argc, char **argv) { svndump_init(NULL); svndump_read((argc > 1) ? argv[1] : NULL); + svndump_deinit(); svndump_reset(); return 0; } diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh new file mode 100644 index 0000000..bf9bbd6 --- /dev/null +++ b/t/t9010-svn-fe.sh @@ -0,0 +1,32 @@ +#!/bin/sh + +test_description='check svn dumpfile importer' + +. ./lib-git-svn.sh + +test_dump() { + label=$1 + dump=$2 + test_expect_success "$dump" ' + svnadmin create "$label-svn" && + svnadmin load "$label-svn" < "$TEST_DIRECTORY/$dump" && + svn_cmd export "file://$(pwd)/$label-svn" "$label-svnco" && + git init "$label-git" && + test-svn-fe "$TEST_DIRECTORY/$dump" >"$label.fe" && + ( + cd "$label-git" && + git fast-import < ../"$label.fe" + ) && + ( + cd "$label-svnco" && + git init && + git add . && + git fetch "../$label-git" master && + git diff --exit-code FETCH_HEAD + ) + ' +} + +test_dump simple t9111/svnsync.dump + +test_done diff --git a/test-svn-fe.c b/test-svn-fe.c new file mode 100644 index 0000000..77cf78a --- /dev/null +++ b/test-svn-fe.c @@ -0,0 +1,17 @@ +/* + * test-svn-fe: Code to exercise the svn import lib + */ + +#include "git-compat-util.h" +#include "vcs-svn/svndump.h" + +int main(int argc, char *argv[]) +{ + if (argc != 2) + usage("test-svn-fe "); + svndump_init(argv[1]); + svndump_read(NULL); + svndump_deinit(); + svndump_reset(); + return 0; +} diff --git a/vcs-svn/LICENSE b/vcs-svn/LICENSE index a3d384c..0a5e3c4 100644 --- a/vcs-svn/LICENSE +++ b/vcs-svn/LICENSE @@ -4,6 +4,10 @@ All rights reserved. Copyright (C) 2008 Jason Evans . All rights reserved. +Copyright (C) 2005 Stefan Hegny, hydrografix Consulting GmbH, +Frankfurt/Main, Germany +and others, see http://svn2cc.sarovar.org + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c new file mode 100644 index 0000000..630eeb5 --- /dev/null +++ b/vcs-svn/svndump.c @@ -0,0 +1,302 @@ +/* + * Parse and rearrange a svnadmin dump. + * Create the dump with: + * svnadmin dump --incremental -r: >outfile + * + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "cache.h" +#include "repo_tree.h" +#include "fast_export.h" +#include "line_buffer.h" +#include "obj_pool.h" +#include "string_pool.h" + +#define NODEACT_REPLACE 4 +#define NODEACT_DELETE 3 +#define NODEACT_ADD 2 +#define NODEACT_CHANGE 1 +#define NODEACT_UNKNOWN 0 + +#define DUMP_CTX 0 +#define REV_CTX 1 +#define NODE_CTX 2 + +#define LENGTH_UNKNOWN (~0) +#define DATE_RFC2822_LEN 31 + +/* Create memory pool for log messages */ +obj_pool_gen(log, char, 4096) + +static char* log_copy(uint32_t length, char *log) +{ + char *buffer; + log_free(log_pool.size); + buffer = log_pointer(log_alloc(length)); + strncpy(buffer, log, length); + return buffer; +} + +static struct { + uint32_t action, propLength, textLength, srcRev, srcMode, mark, type; + uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH]; +} node_ctx; + +static struct { + uint32_t revision, author; + unsigned long timestamp; + char *log; +} rev_ctx; + +static struct { + uint32_t uuid, url; +} dump_ctx; + +static struct { + uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid, + revision_number, node_path, node_kind, node_action, + node_copyfrom_path, node_copyfrom_rev, text_content_length, + prop_content_length, content_length; +} keys; + +static void reset_node_ctx(char *fname) +{ + node_ctx.type = 0; + node_ctx.action = NODEACT_UNKNOWN; + node_ctx.propLength = LENGTH_UNKNOWN; + node_ctx.textLength = LENGTH_UNKNOWN; + node_ctx.src[0] = ~0; + node_ctx.srcRev = 0; + node_ctx.srcMode = 0; + pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname); + node_ctx.mark = 0; +} + +static void reset_rev_ctx(uint32_t revision) +{ + rev_ctx.revision = revision; + rev_ctx.timestamp = 0; + rev_ctx.log = NULL; + rev_ctx.author = ~0; +} + +static void reset_dump_ctx(uint32_t url) +{ + dump_ctx.url = url; + dump_ctx.uuid = ~0; +} + +static void init_keys(void) +{ + keys.svn_log = pool_intern("svn:log"); + keys.svn_author = pool_intern("svn:author"); + keys.svn_date = pool_intern("svn:date"); + keys.svn_executable = pool_intern("svn:executable"); + keys.svn_special = pool_intern("svn:special"); + keys.uuid = pool_intern("UUID"); + keys.revision_number = pool_intern("Revision-number"); + keys.node_path = pool_intern("Node-path"); + keys.node_kind = pool_intern("Node-kind"); + keys.node_action = pool_intern("Node-action"); + keys.node_copyfrom_path = pool_intern("Node-copyfrom-path"); + keys.node_copyfrom_rev = pool_intern("Node-copyfrom-rev"); + keys.text_content_length = pool_intern("Text-content-length"); + keys.prop_content_length = pool_intern("Prop-content-length"); + keys.content_length = pool_intern("Content-length"); +} + +static void read_props(void) +{ + uint32_t len; + uint32_t key = ~0; + char *val = NULL; + char *t; + while ((t = buffer_read_line()) && strcmp(t, "PROPS-END")) { + if (!strncmp(t, "K ", 2)) { + len = atoi(&t[2]); + key = pool_intern(buffer_read_string(len)); + buffer_read_line(); + } else if (!strncmp(t, "V ", 2)) { + len = atoi(&t[2]); + val = buffer_read_string(len); + if (key == keys.svn_log) { + /* Value length excludes terminating nul. */ + rev_ctx.log = log_copy(len + 1, val); + } else if (key == keys.svn_author) { + rev_ctx.author = pool_intern(val); + } else if (key == keys.svn_date) { + if (parse_date_basic(val, &rev_ctx.timestamp, NULL)) + fprintf(stderr, "Invalid timestamp: %s\n", val); + } else if (key == keys.svn_executable) { + node_ctx.type = REPO_MODE_EXE; + } else if (key == keys.svn_special) { + node_ctx.type = REPO_MODE_LNK; + } + key = ~0; + buffer_read_line(); + } + } +} + +static void handle_node(void) +{ + if (node_ctx.propLength != LENGTH_UNKNOWN && node_ctx.propLength) + read_props(); + + if (node_ctx.srcRev) + node_ctx.srcMode = repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); + + if (node_ctx.textLength != LENGTH_UNKNOWN && + node_ctx.type != REPO_MODE_DIR) + node_ctx.mark = next_blob_mark(); + + if (node_ctx.action == NODEACT_DELETE) { + repo_delete(node_ctx.dst); + } else if (node_ctx.action == NODEACT_CHANGE || + node_ctx.action == NODEACT_REPLACE) { + if (node_ctx.action == NODEACT_REPLACE && + node_ctx.type == REPO_MODE_DIR) + repo_replace(node_ctx.dst, node_ctx.mark); + else if (node_ctx.propLength != LENGTH_UNKNOWN) + repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); + else if (node_ctx.textLength != LENGTH_UNKNOWN) + node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark); + } else if (node_ctx.action == NODEACT_ADD) { + if (node_ctx.srcRev && node_ctx.propLength != LENGTH_UNKNOWN) + repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); + else if (node_ctx.srcRev && node_ctx.textLength != LENGTH_UNKNOWN) + node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark); + else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) || + node_ctx.textLength != LENGTH_UNKNOWN) + repo_add(node_ctx.dst, node_ctx.type, node_ctx.mark); + } + + if (node_ctx.propLength == LENGTH_UNKNOWN && node_ctx.srcMode) + node_ctx.type = node_ctx.srcMode; + + if (node_ctx.mark) + fast_export_blob(node_ctx.type, node_ctx.mark, node_ctx.textLength); + else if (node_ctx.textLength != LENGTH_UNKNOWN) + buffer_skip_bytes(node_ctx.textLength); +} + +static void handle_revision(void) +{ + if (rev_ctx.revision) + repo_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log, + dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp); +} + +void svndump_read(const char *url) +{ + char *val; + char *t; + uint32_t active_ctx = DUMP_CTX; + uint32_t len; + uint32_t key; + + reset_dump_ctx(pool_intern(url)); + while ((t = buffer_read_line())) { + val = strstr(t, ": "); + if (!val) + continue; + *val++ = '\0'; + *val++ = '\0'; + key = pool_intern(t); + + if (key == keys.uuid) { + dump_ctx.uuid = pool_intern(val); + } else if (key == keys.revision_number) { + if (active_ctx == NODE_CTX) + handle_node(); + if (active_ctx != DUMP_CTX) + handle_revision(); + active_ctx = REV_CTX; + reset_rev_ctx(atoi(val)); + } else if (key == keys.node_path) { + if (active_ctx == NODE_CTX) + handle_node(); + active_ctx = NODE_CTX; + reset_node_ctx(val); + } else if (key == keys.node_kind) { + if (!strcmp(val, "dir")) + node_ctx.type = REPO_MODE_DIR; + else if (!strcmp(val, "file")) + node_ctx.type = REPO_MODE_BLB; + else + fprintf(stderr, "Unknown node-kind: %s\n", val); + } else if (key == keys.node_action) { + if (!strcmp(val, "delete")) { + node_ctx.action = NODEACT_DELETE; + } else if (!strcmp(val, "add")) { + node_ctx.action = NODEACT_ADD; + } else if (!strcmp(val, "change")) { + node_ctx.action = NODEACT_CHANGE; + } else if (!strcmp(val, "replace")) { + node_ctx.action = NODEACT_REPLACE; + } else { + fprintf(stderr, "Unknown node-action: %s\n", val); + node_ctx.action = NODEACT_UNKNOWN; + } + } else if (key == keys.node_copyfrom_path) { + pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val); + } else if (key == keys.node_copyfrom_rev) { + node_ctx.srcRev = atoi(val); + } else if (key == keys.text_content_length) { + node_ctx.textLength = atoi(val); + } else if (key == keys.prop_content_length) { + node_ctx.propLength = atoi(val); + } else if (key == keys.content_length) { + len = atoi(val); + buffer_read_line(); + if (active_ctx == REV_CTX) { + read_props(); + } else if (active_ctx == NODE_CTX) { + handle_node(); + active_ctx = REV_CTX; + } else { + fprintf(stderr, "Unexpected content length header: %d\n", len); + buffer_skip_bytes(len); + } + } + } + if (active_ctx == NODE_CTX) + handle_node(); + if (active_ctx != DUMP_CTX) + handle_revision(); +} + +void svndump_init(const char *filename) +{ + buffer_init(filename); + repo_init(); + reset_dump_ctx(~0); + reset_rev_ctx(0); + reset_node_ctx(NULL); + init_keys(); +} + +void svndump_deinit(void) +{ + log_reset(); + repo_reset(); + reset_dump_ctx(~0); + reset_rev_ctx(0); + reset_node_ctx(NULL); + if (buffer_deinit()) + fprintf(stderr, "Input error\n"); + if (ferror(stdout)) + fprintf(stderr, "Output error\n"); +} + +void svndump_reset(void) +{ + log_reset(); + buffer_reset(); + repo_reset(); + reset_dump_ctx(~0); + reset_rev_ctx(0); + reset_node_ctx(NULL); +} diff --git a/vcs-svn/svndump.h b/vcs-svn/svndump.h new file mode 100644 index 0000000..93c412f --- /dev/null +++ b/vcs-svn/svndump.h @@ -0,0 +1,9 @@ +#ifndef SVNDUMP_H_ +#define SVNDUMP_H_ + +void svndump_init(const char *filename); +void svndump_read(const char *url); +void svndump_deinit(void); +void svndump_reset(void); + +#endif -- cgit v0.10.2-6-g49f6 From 712308f29ab05f821365edec9550b45d8b616c2a Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Mon, 9 Aug 2010 17:55:43 -0500 Subject: Update svn-fe manual The svn-fe example does not litter the working directory with .bin files any more (hoorah!). The permissive error handling implies a known bug. We should be flagging iffy input and, even if we continue, reporting it on exit. Cc: David Barr Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano diff --git a/contrib/svn-fe/svn-fe.txt b/contrib/svn-fe/svn-fe.txt index de30f83..fb0ee56 100644 --- a/contrib/svn-fe/svn-fe.txt +++ b/contrib/svn-fe/svn-fe.txt @@ -43,11 +43,9 @@ user as committer, where 'user' is the value of the `svn:author` property and 'UUID' the repository's identifier. -To support incremental imports, 'svn-fe' will put a `git-svn-id` -line at the end of each commit log message if passed an url on the -command line. This line has the form `git-svn-id: URL@REVNO UUID`. - -Empty directories and unknown properties are silently discarded. +To support incremental imports, 'svn-fe' puts a `git-svn-id` line at +the end of each commit log message if passed an url on the command +line. This line has the form `git-svn-id: URL@REVNO UUID`. The resulting repository will generally require further processing to put each project in its own repository and to separate the history @@ -56,9 +54,9 @@ may be useful for this purpose. BUGS ---- -Litters the current working directory with .bin files for -persistence. Will be fixed when the svn-fe infrastructure is aware of -a Git working directory. +Empty directories and unknown properties are silently discarded. + +The exit status does not reflect whether an error was detected. SEE ALSO -------- -- cgit v0.10.2-6-g49f6 From d0fc40c741176a9f4092eaeb1a3d18aca60ac5df Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Mon, 9 Aug 2010 17:58:36 -0500 Subject: svn-fe manual: Clarify warning about deltas in dump files Those in the know would notice that dump file format version 2 means "svnadmin dump --no-deltas", but for the rest of us, an explicit reminder is useful. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano diff --git a/contrib/svn-fe/svn-fe.txt b/contrib/svn-fe/svn-fe.txt index fb0ee56..35f84bd 100644 --- a/contrib/svn-fe/svn-fe.txt +++ b/contrib/svn-fe/svn-fe.txt @@ -12,7 +12,7 @@ svnadmin dump --incremental REPO | svn-fe [url] | git fast-import DESCRIPTION ----------- -Converts a Subversion dumpfile (version: 2) into input suitable for +Converts a Subversion dumpfile into input suitable for git-fast-import(1) and similar importers. REPO is a path to a Subversion repository mirrored on the local disk. Remote Subversion repositories can be mirrored on local disk using the `svnsync` @@ -25,6 +25,9 @@ Subversion's repository dump format is documented in full in Files in this format can be generated using the 'svnadmin dump' or 'svk admin dump' command. +Dumps produced with 'svnadmin dump --deltas' (dumpfile format v3) +are not supported. + OUTPUT FORMAT ------------- The fast-import format is documented by the git-fast-import(1) -- cgit v0.10.2-6-g49f6 From be47d5a646f53454c081ec7cf0da2db945eb0f99 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Thu, 12 Aug 2010 16:30:47 -0500 Subject: vcs-svn: remove build artifacts on "make clean" Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano diff --git a/Makefile b/Makefile index ec84697..3452700 100644 --- a/Makefile +++ b/Makefile @@ -2208,8 +2208,8 @@ distclean: clean $(RM) configure clean: - $(RM) *.o block-sha1/*.o ppc/*.o compat/*.o compat/*/*.o xdiff/*.o \ - builtin/*.o $(LIB_FILE) $(XDIFF_LIB) + $(RM) *.o block-sha1/*.o ppc/*.o compat/*.o compat/*/*.o xdiff/*.o vcs-svn/*.o \ + builtin/*.o $(LIB_FILE) $(XDIFF_LIB) $(VCSSVN_LIB) $(RM) $(ALL_PROGRAMS) $(SCRIPT_LIB) $(BUILT_INS) git$X $(RM) $(TEST_PROGRAMS) $(RM) -r bin-wrappers -- cgit v0.10.2-6-g49f6 From 6ad263ce7afc6c21c3ada1691f4772993b8ae46b Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Thu, 12 Aug 2010 17:02:57 -0500 Subject: treap: style fix Missing spaces in while (0) and trpn_pointer(a, b). Remove parentheses around return value. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano diff --git a/vcs-svn/trp.h b/vcs-svn/trp.h index 1f5f51f..ee35c68 100644 --- a/vcs-svn/trp.h +++ b/vcs-svn/trp.h @@ -37,7 +37,7 @@ struct trp_root { *trpn_pointer(a_base, a_offset) = \ *trpn_pointer(a_base, old_offset); \ } \ - } while (0); + } while (0) /* Left accessors. */ #define trp_left_get(a_base, a_field, a_node) \ @@ -46,7 +46,7 @@ struct trp_root { do { \ trpn_modify(a_base, a_node); \ trp_left_get(a_base, a_field, a_node) = (a_left); \ - } while(0) + } while (0) /* Right accessors. */ #define trp_right_get(a_base, a_field, a_node) \ @@ -55,7 +55,7 @@ struct trp_root { do { \ trpn_modify(a_base, a_node); \ trp_right_get(a_base, a_field, a_node) = (a_right); \ - } while(0) + } while (0) /* * Fibonacci hash function. @@ -72,7 +72,7 @@ struct trp_root { do { \ trp_left_set(a_base, a_field, (a_node), ~0); \ trp_right_set(a_base, a_field, (a_node), ~0); \ - } while(0) + } while (0) /* Internal utility macros. */ #define trpn_first(a_base, a_field, a_root, r_node) \ @@ -90,7 +90,7 @@ struct trp_root { trp_right_set(a_base, a_field, (a_node), \ trp_left_get(a_base, a_field, (r_node))); \ trp_left_set(a_base, a_field, (r_node), (a_node)); \ - } while(0) + } while (0) #define trpn_rotate_right(a_base, a_field, a_node, r_node) \ do { \ @@ -98,7 +98,7 @@ struct trp_root { trp_left_set(a_base, a_field, (a_node), \ trp_right_get(a_base, a_field, (r_node))); \ trp_right_set(a_base, a_field, (r_node), (a_node)); \ - } while(0) + } while (0) #define trp_gen(a_attr, a_pre, a_type, a_field, a_base, a_cmp) \ a_attr a_type MAYBE_UNUSED *a_pre##first(struct trp_root *treap) \ @@ -136,7 +136,7 @@ a_attr a_type MAYBE_UNUSED *a_pre##search(struct trp_root *treap, a_type *key) \ { \ int cmp; \ uint32_t ret = treap->trp_root; \ - while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base,ret)))) { \ + while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \ if (cmp < 0) { \ ret = trp_left_get(a_base, a_field, ret); \ } else { \ @@ -149,7 +149,7 @@ a_attr a_type MAYBE_UNUSED *a_pre##nsearch(struct trp_root *treap, a_type *key) { \ int cmp; \ uint32_t ret = treap->trp_root; \ - while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base,ret)))) { \ + while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \ if (cmp < 0) { \ if (!~trp_left_get(a_base, a_field, ret)) \ break; \ @@ -163,7 +163,7 @@ a_attr a_type MAYBE_UNUSED *a_pre##nsearch(struct trp_root *treap, a_type *key) a_attr uint32_t MAYBE_UNUSED a_pre##insert_recurse(uint32_t cur_node, uint32_t ins_node) \ { \ if (cur_node == ~0) { \ - return (ins_node); \ + return ins_node; \ } else { \ uint32_t ret; \ int cmp = (a_cmp)(trpn_pointer(a_base, ins_node), \ @@ -185,7 +185,7 @@ a_attr uint32_t MAYBE_UNUSED a_pre##insert_recurse(uint32_t cur_node, uint32_t i else \ ret = cur_node; \ } \ - return (ret); \ + return ret; \ } \ } \ a_attr void MAYBE_UNUSED a_pre##insert(struct trp_root *treap, a_type *node) \ @@ -204,27 +204,27 @@ a_attr uint32_t MAYBE_UNUSED a_pre##remove_recurse(uint32_t cur_node, uint32_t r uint32_t right = trp_right_get(a_base, a_field, cur_node); \ if (left == ~0) { \ if (right == ~0) \ - return (~0); \ + return ~0; \ } else if (right == ~0 || trp_prio_get(left) < trp_prio_get(right)) { \ trpn_rotate_right(a_base, a_field, cur_node, ret); \ right = a_pre##remove_recurse(cur_node, rem_node); \ trp_right_set(a_base, a_field, ret, right); \ - return (ret); \ + return ret; \ } \ trpn_rotate_left(a_base, a_field, cur_node, ret); \ left = a_pre##remove_recurse(cur_node, rem_node); \ trp_left_set(a_base, a_field, ret, left); \ - return (ret); \ + return ret; \ } else if (cmp < 0) { \ uint32_t left = a_pre##remove_recurse( \ trp_left_get(a_base, a_field, cur_node), rem_node); \ trp_left_set(a_base, a_field, cur_node, left); \ - return (cur_node); \ + return cur_node; \ } else { \ uint32_t right = a_pre##remove_recurse( \ trp_right_get(a_base, a_field, cur_node), rem_node); \ trp_right_set(a_base, a_field, cur_node, right); \ - return (cur_node); \ + return cur_node; \ } \ } \ a_attr void MAYBE_UNUSED a_pre##remove(struct trp_root *treap, a_type *node) \ -- cgit v0.10.2-6-g49f6 From 78457bc0ccc1af8b9eb776a0b17986ebd50442bc Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 13 Aug 2010 18:59:40 -0500 Subject: compat: add strtok_r() Windows does not have strtok_r (and while it does have an identical strtok_s, but it is not obvious how to use it). Grab an implementation from glibc. The svn-fe tool uses strtok_r to parse paths. Acked-by: Johannes Sixt Helped-by: Jakub Narebski Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano diff --git a/Makefile b/Makefile index 3452700..fbc6581 100644 --- a/Makefile +++ b/Makefile @@ -68,6 +68,8 @@ all:: # # Define NO_MKSTEMPS if you don't have mkstemps in the C library. # +# Define NO_STRTOK_R if you don't have strtok_r in the C library. +# # Define NO_LIBGEN_H if you don't have libgen.h. # # Define NEEDS_LIBGEN if your libgen needs -lgen when linking @@ -1041,6 +1043,7 @@ ifeq ($(uname_S),Windows) NO_UNSETENV = YesPlease NO_STRCASESTR = YesPlease NO_STRLCPY = YesPlease + NO_STRTOK_R = YesPlease NO_MEMMEM = YesPlease # NEEDS_LIBICONV = YesPlease NO_ICONV = YesPlease @@ -1095,6 +1098,7 @@ ifneq (,$(findstring MINGW,$(uname_S))) NO_UNSETENV = YesPlease NO_STRCASESTR = YesPlease NO_STRLCPY = YesPlease + NO_STRTOK_R = YesPlease NO_MEMMEM = YesPlease NEEDS_LIBICONV = YesPlease OLD_ICONV = YesPlease @@ -1325,6 +1329,10 @@ endif ifdef NO_STRTOULL COMPAT_CFLAGS += -DNO_STRTOULL endif +ifdef NO_STRTOK_R + COMPAT_CFLAGS += -DNO_STRTOK_R + COMPAT_OBJS += compat/strtok_r.o +endif ifdef NO_SETENV COMPAT_CFLAGS += -DNO_SETENV COMPAT_OBJS += compat/setenv.o diff --git a/compat/strtok_r.c b/compat/strtok_r.c new file mode 100644 index 0000000..7b5d568 --- /dev/null +++ b/compat/strtok_r.c @@ -0,0 +1,61 @@ +/* Reentrant string tokenizer. Generic version. + Copyright (C) 1991,1996-1999,2001,2004 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include "../git-compat-util.h" + +/* Parse S into tokens separated by characters in DELIM. + If S is NULL, the saved pointer in SAVE_PTR is used as + the next starting point. For example: + char s[] = "-abc-=-def"; + char *sp; + x = strtok_r(s, "-", &sp); // x = "abc", sp = "=-def" + x = strtok_r(NULL, "-=", &sp); // x = "def", sp = NULL + x = strtok_r(NULL, "=", &sp); // x = NULL + // s = "abc\0-def\0" +*/ +char * +gitstrtok_r (char *s, const char *delim, char **save_ptr) +{ + char *token; + + if (s == NULL) + s = *save_ptr; + + /* Scan leading delimiters. */ + s += strspn (s, delim); + if (*s == '\0') + { + *save_ptr = s; + return NULL; + } + + /* Find the end of the token. */ + token = s; + s = strpbrk (token, delim); + if (s == NULL) + /* This token finishes the string. */ + *save_ptr = token + strlen (token); + else + { + /* Terminate the token and make *SAVE_PTR point past it. */ + *s = '\0'; + *save_ptr = s + 1; + } + return token; +} diff --git a/config.mak.in b/config.mak.in index b4e65c3..4ffd774 100644 --- a/config.mak.in +++ b/config.mak.in @@ -46,6 +46,7 @@ NO_IPV6=@NO_IPV6@ NO_C99_FORMAT=@NO_C99_FORMAT@ NO_HSTRERROR=@NO_HSTRERROR@ NO_STRCASESTR=@NO_STRCASESTR@ +NO_STRTOK_R=@NO_STRTOK_R@ NO_MEMMEM=@NO_MEMMEM@ NO_STRLCPY=@NO_STRLCPY@ NO_UINTMAX_T=@NO_UINTMAX_T@ diff --git a/configure.ac b/configure.ac index 5601e8b..708e7b8 100644 --- a/configure.ac +++ b/configure.ac @@ -783,6 +783,12 @@ GIT_CHECK_FUNC(strcasestr, [NO_STRCASESTR=YesPlease]) AC_SUBST(NO_STRCASESTR) # +# Define NO_STRTOK_R if you don't have strtok_r +GIT_CHECK_FUNC(strtok_r, +[NO_STRTOK_R=], +[NO_STRTOK_R=YesPlease]) +AC_SUBST(NO_STRTOK_R) +# # Define NO_MEMMEM if you don't have memmem. GIT_CHECK_FUNC(memmem, [NO_MEMMEM=], diff --git a/git-compat-util.h b/git-compat-util.h index 02a73ee..28d6b00 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -312,6 +312,11 @@ extern size_t gitstrlcpy(char *, const char *, size_t); extern uintmax_t gitstrtoumax(const char *, char **, int); #endif +#ifdef NO_STRTOK_R +#define strtok_r gitstrtok_r +extern char *gitstrtok_r(char *s, const char *delim, char **save_ptr); +#endif + #ifdef NO_HSTRERROR #define hstrerror githstrerror extern const char *githstrerror(int herror); -- cgit v0.10.2-6-g49f6 From 68b4cfbc91583b43e96d38b8d7efc8e6690589ad Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 13 Aug 2010 19:01:34 -0500 Subject: vcs-svn: Rename dirent pool to build on Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dirent is #define’d to mingw_dirent in compat/mingw.h, with the result that obj_pool_gen(dirent, struct repo_dirent, 4096) creates functions with names like mingw_dirent_alloc and references to dirent_alloc go unresolved. Rename the functions to dent_* to avoid this problem. Reported-by: Johannes Sixt Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c index c3d7ee7..e94d91d 100644 --- a/vcs-svn/repo_tree.c +++ b/vcs-svn/repo_tree.c @@ -30,7 +30,7 @@ struct repo_commit { /* Memory pools for commit, dir and dirent */ obj_pool_gen(commit, struct repo_commit, 4096) obj_pool_gen(dir, struct repo_dir, 4096) -obj_pool_gen(dirent, struct repo_dirent, 4096) +obj_pool_gen(dent, struct repo_dirent, 4096) static uint32_t active_commit; static uint32_t mark; @@ -38,7 +38,7 @@ static uint32_t mark; static int repo_dirent_name_cmp(const void *a, const void *b); /* Treap for directory entries */ -trp_gen(static, dirent_, struct repo_dirent, children, dirent, repo_dirent_name_cmp); +trp_gen(static, dent_, struct repo_dirent, children, dent, repo_dirent_name_cmp); uint32_t next_blob_mark(void) { @@ -52,27 +52,27 @@ static struct repo_dir *repo_commit_root_dir(struct repo_commit *commit) static struct repo_dirent *repo_first_dirent(struct repo_dir *dir) { - return dirent_first(&dir->entries); + return dent_first(&dir->entries); } static int repo_dirent_name_cmp(const void *a, const void *b) { - const struct repo_dirent *dirent1 = a, *dirent2 = b; - uint32_t a_offset = dirent1->name_offset; - uint32_t b_offset = dirent2->name_offset; + const struct repo_dirent *dent1 = a, *dent2 = b; + uint32_t a_offset = dent1->name_offset; + uint32_t b_offset = dent2->name_offset; return (a_offset > b_offset) - (a_offset < b_offset); } -static int repo_dirent_is_dir(struct repo_dirent *dirent) +static int repo_dirent_is_dir(struct repo_dirent *dent) { - return dirent != NULL && dirent->mode == REPO_MODE_DIR; + return dent != NULL && dent->mode == REPO_MODE_DIR; } -static struct repo_dir *repo_dir_from_dirent(struct repo_dirent *dirent) +static struct repo_dir *repo_dir_from_dirent(struct repo_dirent *dent) { - if (!repo_dirent_is_dir(dirent)) + if (!repo_dirent_is_dir(dent)) return NULL; - return dir_pointer(dirent->content_offset); + return dir_pointer(dent->content_offset); } static struct repo_dir *repo_clone_dir(struct repo_dir *orig_dir) @@ -90,19 +90,19 @@ static struct repo_dir *repo_clone_dir(struct repo_dir *orig_dir) static struct repo_dirent *repo_read_dirent(uint32_t revision, uint32_t *path) { uint32_t name = 0; - struct repo_dirent *key = dirent_pointer(dirent_alloc(1)); + struct repo_dirent *key = dent_pointer(dent_alloc(1)); struct repo_dir *dir = NULL; - struct repo_dirent *dirent = NULL; + struct repo_dirent *dent = NULL; dir = repo_commit_root_dir(commit_pointer(revision)); while (~(name = *path++)) { key->name_offset = name; - dirent = dirent_search(&dir->entries, key); - if (dirent == NULL || !repo_dirent_is_dir(dirent)) + dent = dent_search(&dir->entries, key); + if (dent == NULL || !repo_dirent_is_dir(dent)) break; - dir = repo_dir_from_dirent(dirent); + dir = repo_dir_from_dirent(dent); } - dirent_free(1); - return dirent; + dent_free(1); + return dent; } static void repo_write_dirent(uint32_t *path, uint32_t mode, @@ -111,7 +111,7 @@ static void repo_write_dirent(uint32_t *path, uint32_t mode, uint32_t name, revision, dir_o = ~0, parent_dir_o = ~0; struct repo_dir *dir; struct repo_dirent *key; - struct repo_dirent *dirent = NULL; + struct repo_dirent *dent = NULL; revision = active_commit; dir = repo_commit_root_dir(commit_pointer(revision)); dir = repo_clone_dir(dir); @@ -119,52 +119,52 @@ static void repo_write_dirent(uint32_t *path, uint32_t mode, while (~(name = *path++)) { parent_dir_o = dir_offset(dir); - key = dirent_pointer(dirent_alloc(1)); + key = dent_pointer(dent_alloc(1)); key->name_offset = name; - dirent = dirent_search(&dir->entries, key); - if (dirent == NULL) - dirent = key; + dent = dent_search(&dir->entries, key); + if (dent == NULL) + dent = key; else - dirent_free(1); + dent_free(1); - if (dirent == key) { - dirent->mode = REPO_MODE_DIR; - dirent->content_offset = 0; - dirent_insert(&dir->entries, dirent); + if (dent == key) { + dent->mode = REPO_MODE_DIR; + dent->content_offset = 0; + dent_insert(&dir->entries, dent); } - if (dirent_offset(dirent) < dirent_pool.committed) { - dir_o = repo_dirent_is_dir(dirent) ? - dirent->content_offset : ~0; - dirent_remove(&dir->entries, dirent); - dirent = dirent_pointer(dirent_alloc(1)); - dirent->name_offset = name; - dirent->mode = REPO_MODE_DIR; - dirent->content_offset = dir_o; - dirent_insert(&dir->entries, dirent); + if (dent_offset(dent) < dent_pool.committed) { + dir_o = repo_dirent_is_dir(dent) ? + dent->content_offset : ~0; + dent_remove(&dir->entries, dent); + dent = dent_pointer(dent_alloc(1)); + dent->name_offset = name; + dent->mode = REPO_MODE_DIR; + dent->content_offset = dir_o; + dent_insert(&dir->entries, dent); } - dir = repo_dir_from_dirent(dirent); + dir = repo_dir_from_dirent(dent); dir = repo_clone_dir(dir); - dirent->content_offset = dir_offset(dir); + dent->content_offset = dir_offset(dir); } - if (dirent == NULL) + if (dent == NULL) return; - dirent->mode = mode; - dirent->content_offset = content_offset; + dent->mode = mode; + dent->content_offset = content_offset; if (del && ~parent_dir_o) - dirent_remove(&dir_pointer(parent_dir_o)->entries, dirent); + dent_remove(&dir_pointer(parent_dir_o)->entries, dent); } uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst) { uint32_t mode = 0, content_offset = 0; - struct repo_dirent *src_dirent; - src_dirent = repo_read_dirent(revision, src); - if (src_dirent != NULL) { - mode = src_dirent->mode; - content_offset = src_dirent->content_offset; + struct repo_dirent *src_dent; + src_dent = repo_read_dirent(revision, src); + if (src_dent != NULL) { + mode = src_dent->mode; + content_offset = src_dent->content_offset; repo_write_dirent(dst, mode, content_offset, 0); } return mode; @@ -178,10 +178,10 @@ void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark) uint32_t repo_replace(uint32_t *path, uint32_t blob_mark) { uint32_t mode = 0; - struct repo_dirent *src_dirent; - src_dirent = repo_read_dirent(active_commit, path); - if (src_dirent != NULL) { - mode = src_dirent->mode; + struct repo_dirent *src_dent; + src_dent = repo_read_dirent(active_commit, path); + if (src_dent != NULL) { + mode = src_dent->mode; repo_write_dirent(path, mode, blob_mark, 0); } return mode; @@ -189,10 +189,10 @@ uint32_t repo_replace(uint32_t *path, uint32_t blob_mark) void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark) { - struct repo_dirent *src_dirent; - src_dirent = repo_read_dirent(active_commit, path); - if (src_dirent != NULL && blob_mark == 0) - blob_mark = src_dirent->content_offset; + struct repo_dirent *src_dent; + src_dent = repo_read_dirent(active_commit, path); + if (src_dent != NULL && blob_mark == 0) + blob_mark = src_dent->content_offset; repo_write_dirent(path, mode, blob_mark, 0); } @@ -203,13 +203,13 @@ void repo_delete(uint32_t *path) static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir); -static void repo_git_add(uint32_t depth, uint32_t *path, struct repo_dirent *dirent) +static void repo_git_add(uint32_t depth, uint32_t *path, struct repo_dirent *dent) { - if (repo_dirent_is_dir(dirent)) - repo_git_add_r(depth, path, repo_dir_from_dirent(dirent)); + if (repo_dirent_is_dir(dent)) + repo_git_add_r(depth, path, repo_dir_from_dirent(dent)); else fast_export_modify(depth, path, - dirent->mode, dirent->content_offset); + dent->mode, dent->content_offset); } static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir) @@ -218,7 +218,7 @@ static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir) while (de) { path[depth] = de->name_offset; repo_git_add(depth + 1, path, de); - de = dirent_next(&dir->entries, de); + de = dent_next(&dir->entries, de); } } @@ -233,13 +233,13 @@ static void repo_diff_r(uint32_t depth, uint32_t *path, struct repo_dir *dir1, if (de1->name_offset < de2->name_offset) { path[depth] = de1->name_offset; fast_export_delete(depth + 1, path); - de1 = dirent_next(&dir1->entries, de1); + de1 = dent_next(&dir1->entries, de1); continue; } if (de1->name_offset > de2->name_offset) { path[depth] = de2->name_offset; repo_git_add(depth + 1, path, de2); - de2 = dirent_next(&dir2->entries, de2); + de2 = dent_next(&dir2->entries, de2); continue; } path[depth] = de1->name_offset; @@ -257,18 +257,18 @@ static void repo_diff_r(uint32_t depth, uint32_t *path, struct repo_dir *dir1, fast_export_delete(depth + 1, path); repo_git_add(depth + 1, path, de2); } - de1 = dirent_next(&dir1->entries, de1); - de2 = dirent_next(&dir2->entries, de2); + de1 = dent_next(&dir1->entries, de1); + de2 = dent_next(&dir2->entries, de2); } while (de1) { path[depth] = de1->name_offset; fast_export_delete(depth + 1, path); - de1 = dirent_next(&dir1->entries, de1); + de1 = dent_next(&dir1->entries, de1); } while (de2) { path[depth] = de2->name_offset; repo_git_add(depth + 1, path, de2); - de2 = dirent_next(&dir2->entries, de2); + de2 = dent_next(&dir2->entries, de2); } } @@ -286,7 +286,7 @@ void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, uint32_t url, unsigned long timestamp) { fast_export_commit(revision, author, log, uuid, url, timestamp); - dirent_commit(); + dent_commit(); dir_commit(); active_commit = commit_alloc(1); commit_pointer(active_commit)->root_dir_offset = @@ -297,10 +297,10 @@ static void mark_init(void) { uint32_t i; mark = 0; - for (i = 0; i < dirent_pool.size; i++) - if (!repo_dirent_is_dir(dirent_pointer(i)) && - dirent_pointer(i)->content_offset > mark) - mark = dirent_pointer(i)->content_offset; + for (i = 0; i < dent_pool.size; i++) + if (!repo_dirent_is_dir(dent_pointer(i)) && + dent_pointer(i)->content_offset > mark) + mark = dent_pointer(i)->content_offset; mark++; } @@ -325,5 +325,5 @@ void repo_reset(void) pool_reset(); commit_reset(); dir_reset(); - dirent_reset(); + dent_reset(); } -- cgit v0.10.2-6-g49f6 From 6117abae569e53485f7a90d2595b135c7beb3c96 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 13 Aug 2010 19:03:17 -0500 Subject: vcs-svn: Avoid %z in format string In the spirit of v1.6.4-rc0~124 (MinGW: Fix compiler warning in merge-recursive, 2009-05-23), use a 32-bit integer instead; the dump file parser does not support any better, anyway. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 3a6156f..256a052 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -48,8 +48,9 @@ void fast_export_commit(uint32_t revision, uint32_t author, char *log, ~author ? pool_fetch(author) : "nobody", ~author ? pool_fetch(author) : "nobody", ~uuid ? pool_fetch(uuid) : "local", timestamp); - printf("data %zd\n%s%s\n", - strlen(log) + strlen(gitsvnline), log, gitsvnline); + printf("data %"PRIu32"\n%s%s\n", + (uint32_t) (strlen(log) + strlen(gitsvnline)), + log, gitsvnline); if (!first_commit_done) { if (revision > 1) printf("from refs/heads/master^0\n"); -- cgit v0.10.2-6-g49f6 From 24f1136894b6db0a21b7b89a77da2ae2909487e9 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 13 Aug 2010 19:04:50 -0500 Subject: t9010 (svn-fe): use Unix-style path in URI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ever since v1.6.3-rc0~101^2~14 (Tests on Windows: $(pwd) must return Windows-style paths, 2009-03-13), there is a subtle difference between $(pwd) and $PWD in tests: the former returns Windows-style paths as might be output by git and the latter Unix-style paths which msys programs tend to prefer. In file:// URIs, Unix-style paths are needed. Before: “svn export” declares it cannot find file://c:/apps/git/git/t/trash directory/simple-svco After: “svn export” successfully finds file:///c/apps/git/git/... Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh index bf9bbd6..dd8e78e 100644 --- a/t/t9010-svn-fe.sh +++ b/t/t9010-svn-fe.sh @@ -10,7 +10,7 @@ test_dump() { test_expect_success "$dump" ' svnadmin create "$label-svn" && svnadmin load "$label-svn" < "$TEST_DIRECTORY/$dump" && - svn_cmd export "file://$(pwd)/$label-svn" "$label-svnco" && + svn_cmd export "file://$PWD/$label-svn" "$label-svnco" && git init "$label-git" && test-svn-fe "$TEST_DIRECTORY/$dump" >"$label.fe" && ( -- cgit v0.10.2-6-g49f6 From 7e45e0569c6b659dbe4f7a95d59ee0c98f8b7cb6 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 13 Aug 2010 19:06:15 -0500 Subject: t9010 (svn-fe): avoid symlinks in test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The svn-fe test fails on Windows in the “svn export” step because of the lack of symlink support. With a less ambitious dump, it passes. Acked-by: Johannes Sixt Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh index dd8e78e..a713dfc 100644 --- a/t/t9010-svn-fe.sh +++ b/t/t9010-svn-fe.sh @@ -27,6 +27,6 @@ test_dump() { ' } -test_dump simple t9111/svnsync.dump +test_dump simple t9135/svn.dump test_done -- cgit v0.10.2-6-g49f6 From cd9a7b57a7118672441f9e9670a9fbb42249cf67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 19 Aug 2010 15:53:50 +0000 Subject: t/t9010-svn-fe.sh: add an +x bit to this test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh old mode 100644 new mode 100755 -- cgit v0.10.2-6-g49f6