diff options
-rw-r--r-- | Documentation/git-ls-files.txt | 9 | ||||
-rw-r--r-- | Documentation/git-ls-tree.txt | 9 | ||||
-rw-r--r-- | Makefile | 15 | ||||
-rw-r--r-- | count-delta.c | 72 | ||||
-rw-r--r-- | count-delta.h | 10 | ||||
-rw-r--r-- | diff.c | 286 | ||||
-rw-r--r-- | diffcore-break.c | 46 | ||||
-rw-r--r-- | diffcore-delta.c | 228 | ||||
-rw-r--r-- | diffcore-rename.c | 27 | ||||
-rw-r--r-- | diffcore.h | 7 | ||||
-rwxr-xr-x | git-push.sh | 6 | ||||
-rwxr-xr-x | git-send-email.perl | 86 | ||||
-rw-r--r-- | ls-files.c | 19 | ||||
-rw-r--r-- | ls-tree.c | 19 | ||||
-rw-r--r-- | rev-list.c | 11 | ||||
-rwxr-xr-x | t/t4001-diff-rename.sh | 2 | ||||
-rwxr-xr-x | t/t5000-tar-tree.sh | 3 | ||||
-rw-r--r-- | tar-tree.c | 385 | ||||
-rw-r--r-- | tar.h | 25 | ||||
-rw-r--r-- | xdiff/xdiff.h | 91 | ||||
-rw-r--r-- | xdiff/xdiffi.c | 469 | ||||
-rw-r--r-- | xdiff/xdiffi.h | 60 | ||||
-rw-r--r-- | xdiff/xemit.c | 141 | ||||
-rw-r--r-- | xdiff/xemit.h | 34 | ||||
-rw-r--r-- | xdiff/xinclude.h | 42 | ||||
-rw-r--r-- | xdiff/xmacros.h | 53 | ||||
-rw-r--r-- | xdiff/xprepare.c | 436 | ||||
-rw-r--r-- | xdiff/xprepare.h | 35 | ||||
-rw-r--r-- | xdiff/xtypes.h | 68 | ||||
-rw-r--r-- | xdiff/xutils.c | 277 | ||||
-rw-r--r-- | xdiff/xutils.h | 44 |
31 files changed, 2472 insertions, 543 deletions
diff --git a/Documentation/git-ls-files.txt b/Documentation/git-ls-files.txt index 980c5c9..796d049 100644 --- a/Documentation/git-ls-files.txt +++ b/Documentation/git-ls-files.txt @@ -14,9 +14,9 @@ SYNOPSIS (-[c|d|o|i|s|u|k|m])\* [-x <pattern>|--exclude=<pattern>] [-X <file>|--exclude-from=<file>] - [--exclude-per-directory=<file>] + [--exclude-per-directory=<file>] [--error-unmatch] - [--full-name] [--] [<file>]\* + [--full-name] [--abbrev] [--] [<file>]\* DESCRIPTION ----------- @@ -101,6 +101,11 @@ OPTIONS option forces paths to be output relative to the project top directory. +--abbrev[=<n>]:: + Instead of showing the full 40-byte hexadecimal object + lines, show only handful hexdigits prefix. + Non default number of digits can be specified with --abbrev=<n>. + --:: Do not interpret any more arguments as options. diff --git a/Documentation/git-ls-tree.txt b/Documentation/git-ls-tree.txt index 5bf6d8b..018c401 100644 --- a/Documentation/git-ls-tree.txt +++ b/Documentation/git-ls-tree.txt @@ -8,7 +8,9 @@ git-ls-tree - Lists the contents of a tree object SYNOPSIS -------- -'git-ls-tree' [-d] [-r] [-t] [-z] [--name-only] [--name-status] <tree-ish> [paths...] +'git-ls-tree' [-d] [-r] [-t] [-z] + [--name-only] [--name-status] [--full-name] [--abbrev=[<n>]] + <tree-ish> [paths...] DESCRIPTION ----------- @@ -40,6 +42,11 @@ OPTIONS --name-status:: List only filenames (instead of the "long" output), one per line. +--abbrev[=<n>]:: + Instead of showing the full 40-byte hexadecimal object + lines, show only handful hexdigits prefix. + Non default number of digits can be specified with --abbrev=<n>. + paths:: When paths are given, show them (note that this isn't really raw pathnames, but rather a list of patterns to match). Otherwise @@ -188,9 +188,10 @@ PYMODULES = \ gitMergeCommon.py LIB_FILE=libgit.a +XDIFF_LIB=xdiff/lib.a LIB_H = \ - blob.h cache.h commit.h count-delta.h csum-file.h delta.h \ + blob.h cache.h commit.h csum-file.h delta.h \ diff.h object.h pack.h pkt-line.h quote.h refs.h \ run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h @@ -200,7 +201,7 @@ DIFF_OBJS = \ diffcore-delta.o LIB_OBJS = \ - blob.o commit.o connect.o count-delta.o csum-file.o \ + blob.o commit.o connect.o csum-file.o \ date.o diff-delta.o entry.o exec_cmd.o ident.o index.o \ object.o pack-check.o patch-delta.o path.o pkt-line.o \ quote.o read-cache.o refs.o run-command.o \ @@ -209,7 +210,7 @@ LIB_OBJS = \ fetch-clone.o revision.o pager.o \ $(DIFF_OBJS) -LIBS = $(LIB_FILE) +LIBS = $(LIB_FILE) $(XDIFF_LIB) LIBS += -lz # @@ -544,12 +545,18 @@ init-db.o: init-db.c -DDEFAULT_GIT_TEMPLATE_DIR='"$(template_dir_SQ)"' $*.c $(LIB_OBJS): $(LIB_H) -$(patsubst git-%$X,%.o,$(PROGRAMS)): $(LIB_H) +$(patsubst git-%$X,%.o,$(PROGRAMS)): $(LIBS) $(DIFF_OBJS): diffcore.h $(LIB_FILE): $(LIB_OBJS) $(AR) rcs $@ $(LIB_OBJS) +XDIFF_OBJS=xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o + +$(XDIFF_LIB): $(XDIFF_OBJS) + $(AR) rcs $@ $(XDIFF_OBJS) + + doc: $(MAKE) -C Documentation all diff --git a/count-delta.c b/count-delta.c deleted file mode 100644 index 058a2aa..0000000 --- a/count-delta.c +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (C) 2005 Junio C Hamano - * The delta-parsing part is almost straight copy of patch-delta.c - * which is (C) 2005 Nicolas Pitre <nico@cam.org>. - */ -#include <stdlib.h> -#include <string.h> -#include <limits.h> -#include "delta.h" -#include "count-delta.h" - -/* - * NOTE. We do not _interpret_ delta fully. As an approximation, we - * just count the number of bytes that are copied from the source, and - * the number of literal data bytes that are inserted. - * - * Number of bytes that are _not_ copied from the source is deletion, - * and number of inserted literal bytes are addition, so sum of them - * is the extent of damage. - */ -int count_delta(void *delta_buf, unsigned long delta_size, - unsigned long *src_copied, unsigned long *literal_added) -{ - unsigned long copied_from_source, added_literal; - const unsigned char *data, *top; - unsigned char cmd; - unsigned long src_size, dst_size, out; - - if (delta_size < DELTA_SIZE_MIN) - return -1; - - data = delta_buf; - top = delta_buf + delta_size; - - src_size = get_delta_hdr_size(&data); - dst_size = get_delta_hdr_size(&data); - - added_literal = copied_from_source = out = 0; - while (data < top) { - cmd = *data++; - if (cmd & 0x80) { - unsigned long cp_off = 0, cp_size = 0; - if (cmd & 0x01) cp_off = *data++; - if (cmd & 0x02) cp_off |= (*data++ << 8); - if (cmd & 0x04) cp_off |= (*data++ << 16); - if (cmd & 0x08) cp_off |= (*data++ << 24); - if (cmd & 0x10) cp_size = *data++; - if (cmd & 0x20) cp_size |= (*data++ << 8); - if (cmd & 0x40) cp_size |= (*data++ << 16); - if (cp_size == 0) cp_size = 0x10000; - - copied_from_source += cp_size; - out += cp_size; - } else { - /* write literal into dst */ - added_literal += cmd; - out += cmd; - data += cmd; - } - } - - /* sanity check */ - if (data != top || out != dst_size) - return -1; - - /* delete size is what was _not_ copied from source. - * edit size is that and literal additions. - */ - *src_copied = copied_from_source; - *literal_added = added_literal; - return 0; -} diff --git a/count-delta.h b/count-delta.h deleted file mode 100644 index 7359629..0000000 --- a/count-delta.h +++ /dev/null @@ -1,10 +0,0 @@ -/* - * Copyright (C) 2005 Junio C Hamano - */ -#ifndef COUNT_DELTA_H -#define COUNT_DELTA_H - -int count_delta(void *, unsigned long, - unsigned long *src_copied, unsigned long *literal_added); - -#endif @@ -8,8 +8,7 @@ #include "quote.h" #include "diff.h" #include "diffcore.h" - -static const char *diff_opts = "-pu"; +#include "xdiff/xdiff.h" static int use_size_cache; @@ -69,25 +68,10 @@ static const char *external_diff(void) { static const char *external_diff_cmd = NULL; static int done_preparing = 0; - const char *env_diff_opts; if (done_preparing) return external_diff_cmd; - - /* - * Default values above are meant to match the - * Linux kernel development style. Examples of - * alternative styles you can specify via environment - * variables are: - * - * GIT_DIFF_OPTS="-c"; - */ external_diff_cmd = getenv("GIT_EXTERNAL_DIFF"); - - /* In case external diff fails... */ - env_diff_opts = getenv("GIT_DIFF_OPTS"); - if (env_diff_opts) diff_opts = env_diff_opts; - done_preparing = 1; return external_diff_cmd; } @@ -101,13 +85,12 @@ static struct diff_tempfile { char tmp_path[TEMPFILE_PATH_LEN]; } diff_temp[2]; -static int count_lines(const char *filename) +static int count_lines(const char *data, int size) { - FILE *in; int count, ch, completely_empty = 1, nl_just_seen = 0; - in = fopen(filename, "r"); count = 0; - while ((ch = fgetc(in)) != EOF) + while (0 < size--) { + ch = *data++; if (ch == '\n') { count++; nl_just_seen = 1; @@ -117,7 +100,7 @@ static int count_lines(const char *filename) nl_just_seen = 0; completely_empty = 0; } - fclose(in); + } if (completely_empty) return 0; if (!nl_just_seen) @@ -140,12 +123,11 @@ static void print_line_count(int count) } } -static void copy_file(int prefix, const char *filename) +static void copy_file(int prefix, const char *data, int size) { - FILE *in; int ch, nl_just_seen = 1; - in = fopen(filename, "r"); - while ((ch = fgetc(in)) != EOF) { + while (0 < size--) { + ch = *data++; if (nl_just_seen) putchar(prefix); putchar(ch); @@ -154,92 +136,106 @@ static void copy_file(int prefix, const char *filename) else nl_just_seen = 0; } - fclose(in); if (!nl_just_seen) printf("\n\\ No newline at end of file\n"); } static void emit_rewrite_diff(const char *name_a, const char *name_b, - struct diff_tempfile *temp) + struct diff_filespec *one, + struct diff_filespec *two) { /* Use temp[i].name as input, name_a and name_b as labels */ int lc_a, lc_b; - lc_a = count_lines(temp[0].name); - lc_b = count_lines(temp[1].name); + lc_a = count_lines(one->data, one->size); + lc_b = count_lines(two->data, two->size); printf("--- %s\n+++ %s\n@@ -", name_a, name_b); print_line_count(lc_a); printf(" +"); print_line_count(lc_b); printf(" @@\n"); if (lc_a) - copy_file('-', temp[0].name); + copy_file('-', one->data, one->size); if (lc_b) - copy_file('+', temp[1].name); + copy_file('+', two->data, two->size); } -static const char *builtin_diff(const char *name_a, - const char *name_b, - struct diff_tempfile *temp, - const char *xfrm_msg, - int complete_rewrite, - const char **args) +static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one) { - int i, next_at, cmd_size; - const char *const diff_cmd = "diff -L%s -L%s"; - const char *const diff_arg = "-- %s %s||:"; /* "||:" is to return 0 */ - const char *input_name_sq[2]; - const char *label_path[2]; - char *cmd; - - /* diff_cmd and diff_arg have 4 %s in total which makes - * the sum of these strings 8 bytes larger than required. - * we use 2 spaces around diff-opts, and we need to count - * terminating NUL; we used to subtract 5 here, but we do not - * care about small leaks in this subprocess that is about - * to exec "diff" anymore. - */ - cmd_size = (strlen(diff_cmd) + strlen(diff_opts) + strlen(diff_arg) - + 128); - - for (i = 0; i < 2; i++) { - input_name_sq[i] = sq_quote(temp[i].name); - if (!strcmp(temp[i].name, "/dev/null")) - label_path[i] = "/dev/null"; - else if (!i) - label_path[i] = sq_quote(quote_two("a/", name_a)); - else - label_path[i] = sq_quote(quote_two("b/", name_b)); - cmd_size += (strlen(label_path[i]) + strlen(input_name_sq[i])); + if (!DIFF_FILE_VALID(one)) { + mf->ptr = ""; /* does not matter */ + mf->size = 0; + return 0; + } + else if (diff_populate_filespec(one, 0)) + return -1; + mf->ptr = one->data; + mf->size = one->size; + return 0; +} + +struct emit_callback { + const char **label_path; +}; + +static int fn_out(void *priv, mmbuffer_t *mb, int nbuf) +{ + int i; + struct emit_callback *ecbdata = priv; + + if (ecbdata->label_path[0]) { + printf("--- %s\n", ecbdata->label_path[0]); + printf("+++ %s\n", ecbdata->label_path[1]); + ecbdata->label_path[0] = ecbdata->label_path[1] = NULL; } + for (i = 0; i < nbuf; i++) + if (!fwrite(mb[i].ptr, mb[i].size, 1, stdout)) + return -1; + return 0; +} + +#define FIRST_FEW_BYTES 8000 +static int mmfile_is_binary(mmfile_t *mf) +{ + long sz = mf->size; + if (FIRST_FEW_BYTES < sz) + sz = FIRST_FEW_BYTES; + if (memchr(mf->ptr, 0, sz)) + return 1; + return 0; +} - cmd = xmalloc(cmd_size); - - next_at = 0; - next_at += snprintf(cmd+next_at, cmd_size-next_at, - diff_cmd, label_path[0], label_path[1]); - next_at += snprintf(cmd+next_at, cmd_size-next_at, - " %s ", diff_opts); - next_at += snprintf(cmd+next_at, cmd_size-next_at, - diff_arg, input_name_sq[0], input_name_sq[1]); - - printf("diff --git %s %s\n", - quote_two("a/", name_a), quote_two("b/", name_b)); - if (label_path[0][0] == '/') { - /* dev/null */ - printf("new file mode %s\n", temp[1].mode); +static void builtin_diff(const char *name_a, + const char *name_b, + struct diff_filespec *one, + struct diff_filespec *two, + const char *xfrm_msg, + int complete_rewrite) +{ + mmfile_t mf1, mf2; + const char *lbl[2]; + char *a_one, *b_two; + + a_one = quote_two("a/", name_a); + b_two = quote_two("b/", name_b); + lbl[0] = DIFF_FILE_VALID(one) ? a_one : "/dev/null"; + lbl[1] = DIFF_FILE_VALID(two) ? b_two : "/dev/null"; + printf("diff --git %s %s\n", a_one, b_two); + if (lbl[0][0] == '/') { + /* /dev/null */ + printf("new file mode %06o\n", two->mode); if (xfrm_msg && xfrm_msg[0]) puts(xfrm_msg); } - else if (label_path[1][0] == '/') { - printf("deleted file mode %s\n", temp[0].mode); + else if (lbl[1][0] == '/') { + printf("deleted file mode %06o\n", one->mode); if (xfrm_msg && xfrm_msg[0]) puts(xfrm_msg); } else { - if (strcmp(temp[0].mode, temp[1].mode)) { - printf("old mode %s\n", temp[0].mode); - printf("new mode %s\n", temp[1].mode); + if (one->mode != two->mode) { + printf("old mode %06o\n", one->mode); + printf("new mode %06o\n", two->mode); } if (xfrm_msg && xfrm_msg[0]) puts(xfrm_msg); @@ -247,20 +243,45 @@ static const char *builtin_diff(const char *name_a, * we do not run diff between different kind * of objects. */ - if (strncmp(temp[0].mode, temp[1].mode, 3)) - return NULL; + if ((one->mode ^ two->mode) & S_IFMT) + goto free_ab_and_return; if (complete_rewrite) { - emit_rewrite_diff(name_a, name_b, temp); - return NULL; + emit_rewrite_diff(name_a, name_b, one, two); + goto free_ab_and_return; } } - /* This is disgusting */ - *args++ = "sh"; - *args++ = "-c"; - *args++ = cmd; - *args = NULL; - return "/bin/sh"; + if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0) + die("unable to read files to diff"); + + if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2)) + printf("Binary files %s and %s differ\n", lbl[0], lbl[1]); + else { + /* Crazy xdl interfaces.. */ + const char *diffopts = getenv("GIT_DIFF_OPTS"); + xpparam_t xpp; + xdemitconf_t xecfg; + xdemitcb_t ecb; + struct emit_callback ecbdata; + + ecbdata.label_path = lbl; + xpp.flags = XDF_NEED_MINIMAL; + xecfg.ctxlen = 3; + if (!diffopts) + ; + else if (!strncmp(diffopts, "--unified=", 10)) + xecfg.ctxlen = strtoul(diffopts + 10, NULL, 10); + else if (!strncmp(diffopts, "-u", 2)) + xecfg.ctxlen = strtoul(diffopts + 2, NULL, 10); + ecb.outf = fn_out; + ecb.priv = &ecbdata; + xdl_diff(&mf1, &mf2, &xpp, &xecfg, &ecb); + } + + free_ab_and_return: + free(a_one); + free(b_two); + return; } struct diff_filespec *alloc_filespec(const char *path) @@ -463,6 +484,8 @@ void diff_free_filespec_data(struct diff_filespec *s) munmap(s->data, s->size); s->should_free = s->should_munmap = 0; s->data = NULL; + free(s->cnt_data); + s->cnt_data = NULL; } static void prep_temp_blob(struct diff_tempfile *temp, @@ -618,6 +641,7 @@ static void run_external_diff(const char *pgm, int retval; static int atexit_asked = 0; const char *othername; + const char **arg = &spawn_arg[0]; othername = (other? other : name); if (one && two) { @@ -632,36 +656,25 @@ static void run_external_diff(const char *pgm, signal(SIGINT, remove_tempfile_on_signal); } - if (pgm) { - const char **arg = &spawn_arg[0]; - if (one && two) { - *arg++ = pgm; - *arg++ = name; - *arg++ = temp[0].name; - *arg++ = temp[0].hex; - *arg++ = temp[0].mode; - *arg++ = temp[1].name; - *arg++ = temp[1].hex; - *arg++ = temp[1].mode; - if (other) { - *arg++ = other; - *arg++ = xfrm_msg; - } - } else { - *arg++ = pgm; - *arg++ = name; + if (one && two) { + *arg++ = pgm; + *arg++ = name; + *arg++ = temp[0].name; + *arg++ = temp[0].hex; + *arg++ = temp[0].mode; + *arg++ = temp[1].name; + *arg++ = temp[1].hex; + *arg++ = temp[1].mode; + if (other) { + *arg++ = other; + *arg++ = xfrm_msg; } - *arg = NULL; } else { - if (one && two) { - pgm = builtin_diff(name, othername, temp, xfrm_msg, complete_rewrite, spawn_arg); - } else - printf("* Unmerged path %s\n", name); + *arg++ = pgm; + *arg++ = name; } - - retval = 0; - if (pgm) - retval = spawn_prog(pgm, spawn_arg); + *arg = NULL; + retval = spawn_prog(pgm, spawn_arg); remove_tempfile(); if (retval) { fprintf(stderr, "external diff died, stopping at %s.\n", name); @@ -669,6 +682,26 @@ static void run_external_diff(const char *pgm, } } +static void run_diff_cmd(const char *pgm, + const char *name, + const char *other, + struct diff_filespec *one, + struct diff_filespec *two, + const char *xfrm_msg, + int complete_rewrite) +{ + if (pgm) { + run_external_diff(pgm, name, other, one, two, xfrm_msg, + complete_rewrite); + return; + } + if (one && two) + builtin_diff(name, other ? other : name, + one, two, xfrm_msg, complete_rewrite); + else + printf("* Unmerged path %s\n", name); +} + static void diff_fill_sha1_info(struct diff_filespec *one) { if (DIFF_FILE_VALID(one)) { @@ -698,8 +731,7 @@ static void run_diff(struct diff_filepair *p, struct diff_options *o) if (DIFF_PAIR_UNMERGED(p)) { /* unmerged */ - run_external_diff(pgm, p->one->path, NULL, NULL, NULL, NULL, - 0); + run_diff_cmd(pgm, p->one->path, NULL, NULL, NULL, NULL, 0); return; } @@ -771,15 +803,15 @@ static void run_diff(struct diff_filepair *p, struct diff_options *o) * needs to be split into deletion and creation. */ struct diff_filespec *null = alloc_filespec(two->path); - run_external_diff(NULL, name, other, one, null, xfrm_msg, 0); + run_diff_cmd(NULL, name, other, one, null, xfrm_msg, 0); free(null); null = alloc_filespec(one->path); - run_external_diff(NULL, name, other, null, two, xfrm_msg, 0); + run_diff_cmd(NULL, name, other, null, two, xfrm_msg, 0); free(null); } else - run_external_diff(pgm, name, other, one, two, xfrm_msg, - complete_rewrite); + run_diff_cmd(pgm, name, other, one, two, xfrm_msg, + complete_rewrite); free(name_munged); free(other_munged); diff --git a/diffcore-break.c b/diffcore-break.c index 0fc2b86..ed0e14c 100644 --- a/diffcore-break.c +++ b/diffcore-break.c @@ -45,8 +45,8 @@ static int should_break(struct diff_filespec *src, * The value we return is 1 if we want the pair to be broken, * or 0 if we do not. */ - unsigned long delta_size, base_size, src_copied, literal_added; - int to_break = 0; + unsigned long delta_size, base_size, src_copied, literal_added, + src_removed; *merge_score_p = 0; /* assume no deletion --- "do not break" * is the default. @@ -68,37 +68,45 @@ static int should_break(struct diff_filespec *src, if (diffcore_count_changes(src->data, src->size, dst->data, dst->size, + NULL, NULL, 0, &src_copied, &literal_added)) return 0; + /* sanity */ + if (src->size < src_copied) + src_copied = src->size; + if (dst->size < literal_added + src_copied) { + if (src_copied < dst->size) + literal_added = dst->size - src_copied; + else + literal_added = 0; + } + src_removed = src->size - src_copied; + /* Compute merge-score, which is "how much is removed * from the source material". The clean-up stage will * merge the surviving pair together if the score is * less than the minimum, after rename/copy runs. */ - if (src->size <= src_copied) - ; /* all copied, nothing removed */ - else { - delta_size = src->size - src_copied; - *merge_score_p = delta_size * MAX_SCORE / src->size; - } - + *merge_score_p = src_removed * MAX_SCORE / src->size; + /* Extent of damage, which counts both inserts and * deletes. */ - if (src->size + literal_added <= src_copied) - delta_size = 0; /* avoid wrapping around */ - else - delta_size = (src->size - src_copied) + literal_added; - - /* We break if the edit exceeds the minimum. - * i.e. (break_score / MAX_SCORE < delta_size / base_size) + delta_size = src_removed + literal_added; + if (delta_size * MAX_SCORE / base_size < break_score) + return 0; + + /* If you removed a lot without adding new material, that is + * not really a rewrite. */ - if (break_score * base_size < delta_size * MAX_SCORE) - to_break = 1; + if ((src->size * break_score < src_removed * MAX_SCORE) && + (literal_added * 20 < src_removed) && + (literal_added * 20 < src_copied)) + return 0; - return to_break; + return 1; } void diffcore_break(int break_score) diff --git a/diffcore-delta.c b/diffcore-delta.c index 1e6a691..7338a40 100644 --- a/diffcore-delta.c +++ b/diffcore-delta.c @@ -1,43 +1,213 @@ #include "cache.h" #include "diff.h" #include "diffcore.h" -#include "delta.h" -#include "count-delta.h" - -static int diffcore_count_changes_1(void *src, unsigned long src_size, - void *dst, unsigned long dst_size, - unsigned long delta_limit, - unsigned long *src_copied, - unsigned long *literal_added) + +/* + * Idea here is very simple. + * + * We have total of (sz-N+1) N-byte overlapping sequences in buf whose + * size is sz. If the same N-byte sequence appears in both source and + * destination, we say the byte that starts that sequence is shared + * between them (i.e. copied from source to destination). + * + * For each possible N-byte sequence, if the source buffer has more + * instances of it than the destination buffer, that means the + * difference are the number of bytes not copied from source to + * destination. If the counts are the same, everything was copied + * from source to destination. If the destination has more, + * everything was copied, and destination added more. + * + * We are doing an approximation so we do not really have to waste + * memory by actually storing the sequence. We just hash them into + * somewhere around 2^16 hashbuckets and count the occurrences. + * + * The length of the sequence is arbitrarily set to 8 for now. + */ + +/* Wild guess at the initial hash size */ +#define INITIAL_HASH_SIZE 9 + +/* We leave more room in smaller hash but do not let it + * grow to have unused hole too much. + */ +#define INITIAL_FREE(sz_log2) ((1<<(sz_log2))*(sz_log2-3)/(sz_log2)) + +/* A prime rather carefully chosen between 2^16..2^17, so that + * HASHBASE < INITIAL_FREE(17). We want to keep the maximum hashtable + * size under the current 2<<17 maximum, which can hold this many + * different values before overflowing to hashtable of size 2<<18. + */ +#define HASHBASE 107927 + +struct spanhash { + unsigned int hashval; + unsigned int cnt; +}; +struct spanhash_top { + int alloc_log2; + int free; + struct spanhash data[FLEX_ARRAY]; +}; + +static struct spanhash *spanhash_find(struct spanhash_top *top, + unsigned int hashval) { - void *delta; - unsigned long delta_size; - - delta = diff_delta(src, src_size, - dst, dst_size, - &delta_size, delta_limit); - if (!delta) - /* If delta_limit is exceeded, we have too much differences */ - return -1; - - /* Estimate the edit size by interpreting delta. */ - if (count_delta(delta, delta_size, src_copied, literal_added)) { - free(delta); - return -1; + int sz = 1 << top->alloc_log2; + int bucket = hashval & (sz - 1); + while (1) { + struct spanhash *h = &(top->data[bucket++]); + if (!h->cnt) + return NULL; + if (h->hashval == hashval) + return h; + if (sz <= bucket) + bucket = 0; } - free(delta); - return 0; +} + +static struct spanhash_top *spanhash_rehash(struct spanhash_top *orig) +{ + struct spanhash_top *new; + int i; + int osz = 1 << orig->alloc_log2; + int sz = osz << 1; + + new = xmalloc(sizeof(*orig) + sizeof(struct spanhash) * sz); + new->alloc_log2 = orig->alloc_log2 + 1; + new->free = INITIAL_FREE(new->alloc_log2); + memset(new->data, 0, sizeof(struct spanhash) * sz); + for (i = 0; i < osz; i++) { + struct spanhash *o = &(orig->data[i]); + int bucket; + if (!o->cnt) + continue; + bucket = o->hashval & (sz - 1); + while (1) { + struct spanhash *h = &(new->data[bucket++]); + if (!h->cnt) { + h->hashval = o->hashval; + h->cnt = o->cnt; + new->free--; + break; + } + if (sz <= bucket) + bucket = 0; + } + } + free(orig); + return new; +} + +static struct spanhash_top *add_spanhash(struct spanhash_top *top, + unsigned int hashval, int cnt) +{ + int bucket, lim; + struct spanhash *h; + + lim = (1 << top->alloc_log2); + bucket = hashval & (lim - 1); + while (1) { + h = &(top->data[bucket++]); + if (!h->cnt) { + h->hashval = hashval; + h->cnt = cnt; + top->free--; + if (top->free < 0) + return spanhash_rehash(top); + return top; + } + if (h->hashval == hashval) { + h->cnt += cnt; + return top; + } + if (lim <= bucket) + bucket = 0; + } +} + +static struct spanhash_top *hash_chars(unsigned char *buf, unsigned int sz) +{ + int i, n; + unsigned int accum1, accum2, hashval; + struct spanhash_top *hash; + + i = INITIAL_HASH_SIZE; + hash = xmalloc(sizeof(*hash) + sizeof(struct spanhash) * (1<<i)); + hash->alloc_log2 = i; + hash->free = INITIAL_FREE(i); + memset(hash->data, 0, sizeof(struct spanhash) * (1<<i)); + + n = 0; + accum1 = accum2 = 0; + while (sz) { + unsigned int c = *buf++; + unsigned int old_1 = accum1; + sz--; + accum1 = (accum1 << 7) ^ (accum2 >> 25); + accum2 = (accum2 << 7) ^ (old_1 >> 25); + accum1 += c; + if (++n < 64 && c != '\n') + continue; + hashval = (accum1 + accum2 * 0x61) % HASHBASE; + hash = add_spanhash(hash, hashval, n); + n = 0; + accum1 = accum2 = 0; + } + return hash; } int diffcore_count_changes(void *src, unsigned long src_size, void *dst, unsigned long dst_size, + void **src_count_p, + void **dst_count_p, unsigned long delta_limit, unsigned long *src_copied, unsigned long *literal_added) { - return diffcore_count_changes_1(src, src_size, - dst, dst_size, - delta_limit, - src_copied, - literal_added); + int i, ssz; + struct spanhash_top *src_count, *dst_count; + unsigned long sc, la; + + src_count = dst_count = NULL; + if (src_count_p) + src_count = *src_count_p; + if (!src_count) { + src_count = hash_chars(src, src_size); + if (src_count_p) + *src_count_p = src_count; + } + if (dst_count_p) + dst_count = *dst_count_p; + if (!dst_count) { + dst_count = hash_chars(dst, dst_size); + if (dst_count_p) + *dst_count_p = dst_count; + } + sc = la = 0; + + ssz = 1 << src_count->alloc_log2; + for (i = 0; i < ssz; i++) { + struct spanhash *s = &(src_count->data[i]); + struct spanhash *d; + unsigned dst_cnt, src_cnt; + if (!s->cnt) + continue; + src_cnt = s->cnt; + d = spanhash_find(dst_count, s->hashval); + dst_cnt = d ? d->cnt : 0; + if (src_cnt < dst_cnt) { + la += dst_cnt - src_cnt; + sc += src_cnt; + } + else + sc += dst_cnt; + } + + if (!src_count_p) + free(src_count); + if (!dst_count_p) + free(dst_count); + *src_copied = sc; + *literal_added = la; + return 0; } diff --git a/diffcore-rename.c b/diffcore-rename.c index 55cf1c3..e992698 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -133,7 +133,7 @@ static int estimate_similarity(struct diff_filespec *src, * match than anything else; the destination does not even * call into this function in that case. */ - unsigned long delta_size, base_size, src_copied, literal_added; + unsigned long max_size, delta_size, base_size, src_copied, literal_added; unsigned long delta_limit; int score; @@ -144,9 +144,9 @@ static int estimate_similarity(struct diff_filespec *src, if (!S_ISREG(src->mode) || !S_ISREG(dst->mode)) return 0; - delta_size = ((src->size < dst->size) ? - (dst->size - src->size) : (src->size - dst->size)); + max_size = ((src->size > dst->size) ? src->size : dst->size); base_size = ((src->size < dst->size) ? src->size : dst->size); + delta_size = max_size - base_size; /* We would not consider edits that change the file size so * drastically. delta_size must be smaller than @@ -166,23 +166,18 @@ static int estimate_similarity(struct diff_filespec *src, delta_limit = base_size * (MAX_SCORE-minimum_score) / MAX_SCORE; if (diffcore_count_changes(src->data, src->size, dst->data, dst->size, + &src->cnt_data, &dst->cnt_data, delta_limit, &src_copied, &literal_added)) return 0; - /* Extent of damage */ - if (src->size + literal_added < src_copied) - delta_size = 0; - else - delta_size = (src->size - src_copied) + literal_added; - - /* - * Now we will give some score to it. 100% edit gets 0 points - * and 0% edit gets MAX_SCORE points. + /* How similar are they? + * what percentage of material in dst are from source? */ - score = MAX_SCORE - (MAX_SCORE * delta_size / base_size); - if (score < 0) return 0; - if (MAX_SCORE < score) return MAX_SCORE; + if (!dst->size) + score = 0; /* should not happen */ + else + score = src_copied * MAX_SCORE / max_size; return score; } @@ -310,6 +305,8 @@ void diffcore_rename(struct diff_options *options) m->score = estimate_similarity(one, two, minimum_score); } + /* We do not need the text anymore */ + diff_free_filespec_data(two); dst_cnt++; } /* cost matrix sorted by most to least similar pair */ @@ -17,8 +17,8 @@ */ #define MAX_SCORE 60000.0 #define DEFAULT_RENAME_SCORE 30000 /* rename/copy similarity minimum (50%) */ -#define DEFAULT_BREAK_SCORE 30000 /* minimum for break to happen (50%)*/ -#define DEFAULT_MERGE_SCORE 48000 /* maximum for break-merge to happen (80%)*/ +#define DEFAULT_BREAK_SCORE 30000 /* minimum for break to happen (50%) */ +#define DEFAULT_MERGE_SCORE 36000 /* maximum for break-merge to happen 60%) */ #define MINIMUM_BREAK_SIZE 400 /* do not break a file smaller than this */ @@ -26,6 +26,7 @@ struct diff_filespec { unsigned char sha1[20]; char *path; void *data; + void *cnt_data; unsigned long size; int xfrm_flags; /* for use by the xfrm */ unsigned short mode; /* file mode */ @@ -103,6 +104,8 @@ void diff_debug_queue(const char *, struct diff_queue_struct *); extern int diffcore_count_changes(void *src, unsigned long src_size, void *dst, unsigned long dst_size, + void **src_count_p, + void **dst_count_p, unsigned long delta_limit, unsigned long *src_copied, unsigned long *literal_added); diff --git a/git-push.sh b/git-push.sh index 73dcf06..f10cadb 100755 --- a/git-push.sh +++ b/git-push.sh @@ -8,7 +8,7 @@ USAGE='[--all] [--tags] [--force] <repository> [<refspec>...]' has_all= has_force= has_exec= -has_thin= +has_thin=--thin remote= do_tags= @@ -24,7 +24,9 @@ do --exec=*) has_exec="$1" ;; --thin) - has_thin="$1" ;; + ;; # noop + --no-thin) + has_thin= ;; -*) usage ;; *) diff --git a/git-send-email.perl b/git-send-email.perl index b220d11..ecfa347 100755 --- a/git-send-email.perl +++ b/git-send-email.perl @@ -19,10 +19,16 @@ use strict; use warnings; use Term::ReadLine; -use Mail::Sendmail qw(sendmail %mailcfg); use Getopt::Long; use Data::Dumper; -use Email::Valid; +use Net::SMTP; + +# most mail servers generate the Date: header, but not all... +$ENV{LC_ALL} = 'C'; +use POSIX qw/strftime/; + +my $have_email_valid = eval { require Email::Valid; 1 }; +my $smtp; sub unique_email_list(@); sub cleanup_compose_files(); @@ -31,7 +37,7 @@ sub cleanup_compose_files(); my $compose_filename = ".msg.$$"; # Variables we fill in automatically, or via prompting: -my (@to,@cc,@initial_cc,$initial_reply_to,$initial_subject,@files,$from,$compose); +my (@to,@cc,@initial_cc,$initial_reply_to,$initial_subject,@files,$from,$compose,$time); # Behavior modification variables my ($chain_reply_to, $smtp_server, $quiet, $suppress_from, $no_signed_off_cc) = (1, "localhost", 0, 0, 0); @@ -244,6 +250,16 @@ EOT # Variables we set as part of the loop over files our ($message_id, $cc, %mail, $subject, $reply_to, $message); +sub extract_valid_address { + my $address = shift; + if ($have_email_valid) { + return Email::Valid->address($address); + } else { + # less robust/correct than the monster regexp in Email::Valid, + # but still does a 99% job, and one less dependency + return ($address =~ /([^\"<>\s]+@[^<>\s]+)/); + } +} # Usually don't need to change anything below here. @@ -253,13 +269,12 @@ our ($message_id, $cc, %mail, $subject, $reply_to, $message); # 1 second since the last time we were called. # We'll setup a template for the message id, using the "from" address: -my $message_id_from = Email::Valid->address($from); +my $message_id_from = extract_valid_address($from); my $message_id_template = "<%s-git-send-email-$message_id_from>"; sub make_message_id { - my $date = `date "+\%s"`; - chomp($date); + my $date = time; my $pseudo_rand = int (rand(4200)); $message_id = sprintf $message_id_template, "$date$pseudo_rand"; #print "new message id = $message_id\n"; # Was useful for debugging @@ -268,38 +283,49 @@ sub make_message_id $cc = ""; +$time = time - scalar $#files; sub send_message { - my $to = join (", ", unique_email_list(@to)); - - %mail = ( To => $to, - From => $from, - CC => $cc, - Subject => $subject, - Message => $message, - 'Reply-to' => $from, - 'In-Reply-To' => $reply_to, - 'Message-ID' => $message_id, - 'X-Mailer' => "git-send-email", - ); - - $mail{smtp} = $smtp_server; - $mailcfg{mime} = 0; - - #print Data::Dumper->Dump([\%mail],[qw(*mail)]); - - sendmail(%mail) or die $Mail::Sendmail::error; + my @recipients = unique_email_list(@to); + my $to = join (",\n\t", @recipients); + @recipients = unique_email_list(@recipients,@cc); + my $date = strftime('%a, %d %b %Y %H:%M:%S %z', localtime($time++)); + + my $header = "From: $from +To: $to +Cc: $cc +Subject: $subject +Reply-To: $from +Date: $date +Message-Id: $message_id +X-Mailer: git-send-email @@GIT_VERSION@@ +"; + $header .= "In-Reply-To: $reply_to\n" if $reply_to; + + $smtp ||= Net::SMTP->new( $smtp_server ); + $smtp->mail( $from ) or die $smtp->message; + $smtp->to( @recipients ) or die $smtp->message; + $smtp->data or die $smtp->message; + $smtp->datasend("$header\n$message") or die $smtp->message; + $smtp->dataend() or die $smtp->message; + $smtp->ok or die "Failed to send $subject\n".$smtp->message; if ($quiet) { printf "Sent %s\n", $subject; } else { - print "OK. Log says:\n", $Mail::Sendmail::log; - print "\n\n" + print "OK. Log says: +Date: $date +Server: $smtp_server Port: 25 +From: $from +Subject: $subject +Cc: $cc +To: $to + +Result: ", $smtp->code, ' ', ($smtp->message =~ /\n([^\n]+\n)$/s), "\n"; } } - $reply_to = $initial_reply_to; make_message_id(); $subject = $initial_subject; @@ -390,14 +416,14 @@ sub cleanup_compose_files() { } - +$smtp->quit if $smtp; sub unique_email_list(@) { my %seen; my @emails; foreach my $entry (@_) { - my $clean = Email::Valid->address($entry); + my $clean = extract_valid_address($entry); next if $seen{$clean}++; push @emails, $entry; } @@ -11,6 +11,7 @@ #include "cache.h" #include "quote.h" +static int abbrev = 0; static int show_deleted = 0; static int show_cached = 0; static int show_others = 0; @@ -502,7 +503,8 @@ static void show_ce_entry(const char *tag, struct cache_entry *ce) printf("%s%06o %s %d\t", tag, ntohl(ce->ce_mode), - sha1_to_hex(ce->sha1), + abbrev ? find_unique_abbrev(ce->sha1,abbrev) + : sha1_to_hex(ce->sha1), ce_stage(ce)); write_name_quoted("", 0, ce->name + offset, line_terminator, stdout); @@ -643,7 +645,8 @@ static void verify_pathspec(void) static const char ls_files_usage[] = "git-ls-files [-z] [-t] [-v] (--[cached|deleted|others|stage|unmerged|killed|modified])* " "[ --ignored ] [--exclude=<pattern>] [--exclude-from=<file>] " - "[ --exclude-per-directory=<filename> ] [--full-name] [--] [<file>]*"; + "[ --exclude-per-directory=<filename> ] [--full-name] [--abbrev] " + "[--] [<file>]*"; int main(int argc, const char **argv) { @@ -754,6 +757,18 @@ int main(int argc, const char **argv) error_unmatch = 1; continue; } + if (!strncmp(arg, "--abbrev=", 9)) { + abbrev = strtoul(arg+9, NULL, 10); + if (abbrev && abbrev < MINIMUM_ABBREV) + abbrev = MINIMUM_ABBREV; + else if (abbrev > 40) + abbrev = 40; + continue; + } + if (!strcmp(arg, "--abbrev")) { + abbrev = DEFAULT_ABBREV; + continue; + } if (*arg == '-') usage(ls_files_usage); break; @@ -13,13 +13,14 @@ static int line_termination = '\n'; #define LS_TREE_ONLY 2 #define LS_SHOW_TREES 4 #define LS_NAME_ONLY 8 +static int abbrev = 0; static int ls_options = 0; const char **pathspec; static int chomp_prefix = 0; static const char *prefix; static const char ls_tree_usage[] = - "git-ls-tree [-d] [-r] [-t] [-z] [--name-only] [--name-status] [--full-name] <tree-ish> [path...]"; + "git-ls-tree [-d] [-r] [-t] [-z] [--name-only] [--name-status] [--full-name] [--abbrev[=<n>]] <tree-ish> [path...]"; static int show_recursive(const char *base, int baselen, const char *pathname) { @@ -73,7 +74,9 @@ static int show_tree(unsigned char *sha1, const char *base, int baselen, return 0; if (!(ls_options & LS_NAME_ONLY)) - printf("%06o %s %s\t", mode, type, sha1_to_hex(sha1)); + printf("%06o %s %s\t", mode, type, + abbrev ? find_unique_abbrev(sha1,abbrev) + : sha1_to_hex(sha1)); write_name_quoted(base + chomp_prefix, baselen - chomp_prefix, pathname, line_termination, stdout); @@ -114,6 +117,18 @@ int main(int argc, const char **argv) chomp_prefix = 0; break; } + if (!strncmp(argv[1]+2, "abbrev=",7)) { + abbrev = strtoul(argv[1]+9, NULL, 10); + if (abbrev && abbrev < MINIMUM_ABBREV) + abbrev = MINIMUM_ABBREV; + else if (abbrev > 40) + abbrev = 40; + break; + } + if (!strcmp(argv[1]+2, "abbrev")) { + abbrev = DEFAULT_ABBREV; + break; + } /* otherwise fallthru */ default: usage(ls_tree_usage); @@ -40,13 +40,18 @@ static int bisect_list = 0; static int verbose_header = 0; static int abbrev = DEFAULT_ABBREV; static int show_parents = 0; +static int show_timestamp = 0; static int hdr_termination = 0; static const char *commit_prefix = ""; static enum cmit_fmt commit_format = CMIT_FMT_RAW; static void show_commit(struct commit *commit) { - printf("%s%s", commit_prefix, sha1_to_hex(commit->object.sha1)); + if (show_timestamp) + printf("%lu ", commit->date); + if (commit_prefix[0]) + fputs(commit_prefix, stdout); + fputs(sha1_to_hex(commit->object.sha1), stdout); if (show_parents) { struct commit_list *parents = commit->parents; while (parents) { @@ -335,6 +340,10 @@ int main(int argc, const char **argv) show_parents = 1; continue; } + if (!strcmp(arg, "--timestamp")) { + show_timestamp = 1; + continue; + } if (!strcmp(arg, "--bisect")) { bisect_list = 1; continue; diff --git a/t/t4001-diff-rename.sh b/t/t4001-diff-rename.sh index 2e3c20d..08c1131 100755 --- a/t/t4001-diff-rename.sh +++ b/t/t4001-diff-rename.sh @@ -49,7 +49,7 @@ rename from path0 rename to path1 --- a/path0 +++ b/path1 -@@ -8,7 +8,7 @@ Line 7 +@@ -8,7 +8,7 @@ Line 8 Line 9 Line 10 diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh index adc5e93..278eb66 100755 --- a/t/t5000-tar-tree.sh +++ b/t/t5000-tar-tree.sh @@ -34,6 +34,9 @@ test_expect_success \ mkdir a/bin && cp /bin/sh a/bin && ln -s a a/l1 && + (p=long_path_to_a_file && cd a && + for depth in 1 2 3 4 5; do mkdir $p && cd $p; done && + echo text >file_with_long_path) && (cd a && find .) | sort >a.lst' test_expect_success \ @@ -1,37 +1,23 @@ /* - * Copyright (c) 2005 Rene Scharfe + * Copyright (c) 2005, 2006 Rene Scharfe */ #include <time.h> #include "cache.h" #include "diff.h" #include "commit.h" +#include "strbuf.h" +#include "tar.h" #define RECORDSIZE (512) #define BLOCKSIZE (RECORDSIZE * 20) -#define TYPEFLAG_AUTO '\0' -#define TYPEFLAG_REG '0' -#define TYPEFLAG_LNK '2' -#define TYPEFLAG_DIR '5' -#define TYPEFLAG_GLOBAL_HEADER 'g' -#define TYPEFLAG_EXT_HEADER 'x' - -#define EXT_HEADER_PATH 1 -#define EXT_HEADER_LINKPATH 2 - static const char tar_tree_usage[] = "git-tar-tree <key> [basedir]"; static char block[BLOCKSIZE]; static unsigned long offset; -static const char *basedir; static time_t archive_time; -struct path_prefix { - struct path_prefix *prev; - const char *name; -}; - /* tries hard to write, either succeeds or dies in the attempt */ static void reliable_write(void *buf, unsigned long size) { @@ -119,230 +105,170 @@ static void write_blocked(void *buf, unsigned long size) write_if_needed(); } -static void append_string(char **p, const char *s) -{ - unsigned int len = strlen(s); - memcpy(*p, s, len); - *p += len; -} - -static void append_char(char **p, char c) -{ - **p = c; - *p += 1; -} - -static void append_path_prefix(char **buffer, struct path_prefix *prefix) +static void strbuf_append_string(struct strbuf *sb, const char *s) { - if (!prefix) - return; - append_path_prefix(buffer, prefix->prev); - append_string(buffer, prefix->name); - append_char(buffer, '/'); -} - -static unsigned int path_prefix_len(struct path_prefix *prefix) -{ - if (!prefix) - return 0; - return path_prefix_len(prefix->prev) + strlen(prefix->name) + 1; -} - -static void append_path(char **p, int is_dir, const char *basepath, - struct path_prefix *prefix, const char *path) -{ - if (basepath) { - append_string(p, basepath); - append_char(p, '/'); + int slen = strlen(s); + int total = sb->len + slen; + if (total > sb->alloc) { + sb->buf = xrealloc(sb->buf, total); + sb->alloc = total; } - append_path_prefix(p, prefix); - append_string(p, path); - if (is_dir) - append_char(p, '/'); + memcpy(sb->buf + sb->len, s, slen); + sb->len = total; } -static unsigned int path_len(int is_dir, const char *basepath, - struct path_prefix *prefix, const char *path) -{ - unsigned int len = 0; - if (basepath) - len += strlen(basepath) + 1; - len += path_prefix_len(prefix) + strlen(path); - if (is_dir) - len++; - return len; -} - -static void append_extended_header_prefix(char **p, unsigned int size, - const char *keyword) +/* + * pax extended header records have the format "%u %s=%s\n". %u contains + * the size of the whole string (including the %u), the first %s is the + * keyword, the second one is the value. This function constructs such a + * string and appends it to a struct strbuf. + */ +static void strbuf_append_ext_header(struct strbuf *sb, const char *keyword, + const char *value, unsigned int valuelen) { - int len = sprintf(*p, "%u %s=", size, keyword); - *p += len; -} + char *p; + int len, total, tmp; -static unsigned int extended_header_len(const char *keyword, - unsigned int valuelen) -{ /* "%u %s=%s\n" */ - unsigned int len = 1 + 1 + strlen(keyword) + 1 + valuelen + 1; - if (len > 9) - len++; - if (len > 99) + len = 1 + 1 + strlen(keyword) + 1 + valuelen + 1; + for (tmp = len; tmp > 9; tmp /= 10) len++; - return len; -} -static void append_extended_header(char **p, const char *keyword, - const char *value, unsigned int len) -{ - unsigned int size = extended_header_len(keyword, len); - append_extended_header_prefix(p, size, keyword); - memcpy(*p, value, len); - *p += len; - append_char(p, '\n'); -} + total = sb->len + len; + if (total > sb->alloc) { + sb->buf = xrealloc(sb->buf, total); + sb->alloc = total; + } -static void write_header(const unsigned char *, char, const char *, struct path_prefix *, - const char *, unsigned int, void *, unsigned long); + p = sb->buf; + p += sprintf(p, "%u %s=", len, keyword); + memcpy(p, value, valuelen); + p += valuelen; + *p = '\n'; + sb->len = total; +} -/* stores a pax extended header directly in the block buffer */ -static void write_extended_header(const char *headerfilename, int is_dir, - unsigned int flags, const char *basepath, - struct path_prefix *prefix, - const char *path, unsigned int namelen, - void *content, unsigned int contentsize) +static unsigned int ustar_header_chksum(const struct ustar_header *header) { - char *buffer, *p; - unsigned int pathlen, size, linkpathlen = 0; - - size = pathlen = extended_header_len("path", namelen); - if (flags & EXT_HEADER_LINKPATH) { - linkpathlen = extended_header_len("linkpath", contentsize); - size += linkpathlen; - } - write_header(NULL, TYPEFLAG_EXT_HEADER, NULL, NULL, headerfilename, - 0100600, NULL, size); - - buffer = p = malloc(size); - if (!buffer) - die("git-tar-tree: %s", strerror(errno)); - append_extended_header_prefix(&p, pathlen, "path"); - append_path(&p, is_dir, basepath, prefix, path); - append_char(&p, '\n'); - if (flags & EXT_HEADER_LINKPATH) - append_extended_header(&p, "linkpath", content, contentsize); - write_blocked(buffer, size); - free(buffer); + char *p = (char *)header; + unsigned int chksum = 0; + while (p < header->chksum) + chksum += *p++; + chksum += sizeof(header->chksum) * ' '; + p += sizeof(header->chksum); + while (p < (char *)header + sizeof(struct ustar_header)) + chksum += *p++; + return chksum; } -static void write_global_extended_header(const unsigned char *sha1) +static int get_path_prefix(const struct strbuf *path, int maxlen) { - char *p; - unsigned int size; - - size = extended_header_len("comment", 40); - write_header(NULL, TYPEFLAG_GLOBAL_HEADER, NULL, NULL, - "pax_global_header", 0100600, NULL, size); - - p = get_record(); - append_extended_header(&p, "comment", sha1_to_hex(sha1), 40); - write_if_needed(); + int i = path->len; + if (i > maxlen) + i = maxlen; + while (i > 0 && path->buf[i] != '/') + i--; + return i; } -/* stores a ustar header directly in the block buffer */ -static void write_header(const unsigned char *sha1, char typeflag, const char *basepath, - struct path_prefix *prefix, const char *path, - unsigned int mode, void *buffer, unsigned long size) +static void write_entry(const unsigned char *sha1, struct strbuf *path, + unsigned int mode, void *buffer, unsigned long size) { - unsigned int namelen; - char *header = NULL; - unsigned int checksum = 0; - int i; - unsigned int ext_header = 0; - - if (typeflag == TYPEFLAG_AUTO) { - if (S_ISDIR(mode)) - typeflag = TYPEFLAG_DIR; - else if (S_ISLNK(mode)) - typeflag = TYPEFLAG_LNK; - else - typeflag = TYPEFLAG_REG; - } - - namelen = path_len(S_ISDIR(mode), basepath, prefix, path); - if (namelen > 100) - ext_header |= EXT_HEADER_PATH; - if (typeflag == TYPEFLAG_LNK && size > 100) - ext_header |= EXT_HEADER_LINKPATH; - - /* the extended header must be written before the normal one */ - if (ext_header) { - char headerfilename[51]; - sprintf(headerfilename, "%s.paxheader", sha1_to_hex(sha1)); - write_extended_header(headerfilename, S_ISDIR(mode), - ext_header, basepath, prefix, path, - namelen, buffer, size); - } - - header = get_record(); - - if (ext_header) { - sprintf(header, "%s.data", sha1_to_hex(sha1)); + struct ustar_header header; + struct strbuf ext_header; + + memset(&header, 0, sizeof(header)); + ext_header.buf = NULL; + ext_header.len = ext_header.alloc = 0; + + if (!sha1) { + *header.typeflag = TYPEFLAG_GLOBAL_HEADER; + mode = 0100666; + strcpy(header.name, "pax_global_header"); + } else if (!path) { + *header.typeflag = TYPEFLAG_EXT_HEADER; + mode = 0100666; + sprintf(header.name, "%s.paxheader", sha1_to_hex(sha1)); } else { - char *p = header; - append_path(&p, S_ISDIR(mode), basepath, prefix, path); + if (S_ISDIR(mode)) { + *header.typeflag = TYPEFLAG_DIR; + mode |= 0777; + } else if (S_ISLNK(mode)) { + *header.typeflag = TYPEFLAG_LNK; + mode |= 0777; + } else if (S_ISREG(mode)) { + *header.typeflag = TYPEFLAG_REG; + mode |= (mode & 0100) ? 0777 : 0666; + } else { + error("unsupported file mode: 0%o (SHA1: %s)", + mode, sha1_to_hex(sha1)); + return; + } + if (path->len > sizeof(header.name)) { + int plen = get_path_prefix(path, sizeof(header.prefix)); + int rest = path->len - plen - 1; + if (plen > 0 && rest <= sizeof(header.name)) { + memcpy(header.prefix, path->buf, plen); + memcpy(header.name, path->buf + plen + 1, rest); + } else { + sprintf(header.name, "%s.data", + sha1_to_hex(sha1)); + strbuf_append_ext_header(&ext_header, "path", + path->buf, path->len); + } + } else + memcpy(header.name, path->buf, path->len); } - if (typeflag == TYPEFLAG_LNK) { - if (ext_header & EXT_HEADER_LINKPATH) { - sprintf(&header[157], "see %s.paxheader", + if (S_ISLNK(mode) && buffer) { + if (size > sizeof(header.linkname)) { + sprintf(header.linkname, "see %s.paxheader", sha1_to_hex(sha1)); - } else { - if (buffer) - strncpy(&header[157], buffer, size); - } + strbuf_append_ext_header(&ext_header, "linkpath", + buffer, size); + } else + memcpy(header.linkname, buffer, size); } - if (S_ISDIR(mode)) - mode |= 0777; - else if (S_ISREG(mode)) - mode |= (mode & 0100) ? 0777 : 0666; - else if (S_ISLNK(mode)) - mode |= 0777; - sprintf(&header[100], "%07o", mode & 07777); + sprintf(header.mode, "%07o", mode & 07777); + sprintf(header.size, "%011lo", S_ISREG(mode) ? size : 0); + sprintf(header.mtime, "%011lo", archive_time); /* XXX: should we provide more meaningful info here? */ - sprintf(&header[108], "%07o", 0); /* uid */ - sprintf(&header[116], "%07o", 0); /* gid */ - strncpy(&header[265], "git", 31); /* uname */ - strncpy(&header[297], "git", 31); /* gname */ - - if (S_ISDIR(mode) || S_ISLNK(mode)) - size = 0; - sprintf(&header[124], "%011lo", size); - sprintf(&header[136], "%011lo", archive_time); + sprintf(header.uid, "%07o", 0); + sprintf(header.gid, "%07o", 0); + strncpy(header.uname, "git", 31); + strncpy(header.gname, "git", 31); + sprintf(header.devmajor, "%07o", 0); + sprintf(header.devminor, "%07o", 0); - header[156] = typeflag; + memcpy(header.magic, "ustar", 6); + memcpy(header.version, "00", 2); - memcpy(&header[257], "ustar", 6); - memcpy(&header[263], "00", 2); + sprintf(header.chksum, "%07o", ustar_header_chksum(&header)); - sprintf(&header[329], "%07o", 0); /* devmajor */ - sprintf(&header[337], "%07o", 0); /* devminor */ - - memset(&header[148], ' ', 8); - for (i = 0; i < RECORDSIZE; i++) - checksum += header[i]; - sprintf(&header[148], "%07o", checksum & 0x1fffff); + if (ext_header.len > 0) { + write_entry(sha1, NULL, 0, ext_header.buf, ext_header.len); + free(ext_header.buf); + } + write_blocked(&header, sizeof(header)); + if (S_ISREG(mode) && buffer && size > 0) + write_blocked(buffer, size); +} - write_if_needed(); +static void write_global_extended_header(const unsigned char *sha1) +{ + struct strbuf ext_header; + ext_header.buf = NULL; + ext_header.len = ext_header.alloc = 0; + strbuf_append_ext_header(&ext_header, "comment", sha1_to_hex(sha1), 40); + write_entry(NULL, NULL, 0, ext_header.buf, ext_header.len); + free(ext_header.buf); } -static void traverse_tree(struct tree_desc *tree, - struct path_prefix *prefix) +static void traverse_tree(struct tree_desc *tree, struct strbuf *path) { - struct path_prefix this_prefix; - this_prefix.prev = prefix; + int pathlen = path->len; while (tree->size) { const char *name; @@ -358,16 +284,19 @@ static void traverse_tree(struct tree_desc *tree, eltbuf = read_sha1_file(sha1, elttype, &eltsize); if (!eltbuf) die("cannot read %s", sha1_to_hex(sha1)); - write_header(sha1, TYPEFLAG_AUTO, basedir, - prefix, name, mode, eltbuf, eltsize); + + path->len = pathlen; + strbuf_append_string(path, name); + if (S_ISDIR(mode)) + strbuf_append_string(path, "/"); + + write_entry(sha1, path, mode, eltbuf, eltsize); + if (S_ISDIR(mode)) { struct tree_desc subtree; subtree.buf = eltbuf; subtree.size = eltsize; - this_prefix.name = name; - traverse_tree(&subtree, &this_prefix); - } else if (!S_ISLNK(mode)) { - write_blocked(eltbuf, eltsize); + traverse_tree(&subtree, path); } free(eltbuf); } @@ -375,16 +304,22 @@ static void traverse_tree(struct tree_desc *tree, int main(int argc, char **argv) { - unsigned char sha1[20]; + unsigned char sha1[20], tree_sha1[20]; struct commit *commit; struct tree_desc tree; + struct strbuf current_path; + + current_path.buf = xmalloc(PATH_MAX); + current_path.alloc = PATH_MAX; + current_path.len = current_path.eof = 0; setup_git_directory(); git_config(git_default_config); switch (argc) { case 3: - basedir = argv[2]; + strbuf_append_string(¤t_path, argv[2]); + strbuf_append_string(¤t_path, "/"); /* FALLTHROUGH */ case 2: if (get_sha1(argv[1], sha1) < 0) @@ -398,17 +333,19 @@ int main(int argc, char **argv) if (commit) { write_global_extended_header(commit->object.sha1); archive_time = commit->date; - } - tree.buf = read_object_with_reference(sha1, "tree", &tree.size, NULL); + } else + archive_time = time(NULL); + + tree.buf = read_object_with_reference(sha1, "tree", &tree.size, + tree_sha1); if (!tree.buf) die("not a reference to a tag, commit or tree object: %s", sha1_to_hex(sha1)); - if (!archive_time) - archive_time = time(NULL); - if (basedir) - write_header((unsigned char *)"0", TYPEFLAG_DIR, NULL, NULL, - basedir, 040777, NULL, 0); - traverse_tree(&tree, NULL); + + if (current_path.len > 0) + write_entry(tree_sha1, ¤t_path, 040777, NULL, 0); + traverse_tree(&tree, ¤t_path); write_trailer(); + free(current_path.buf); return 0; } @@ -0,0 +1,25 @@ +#define TYPEFLAG_AUTO '\0' +#define TYPEFLAG_REG '0' +#define TYPEFLAG_LNK '2' +#define TYPEFLAG_DIR '5' +#define TYPEFLAG_GLOBAL_HEADER 'g' +#define TYPEFLAG_EXT_HEADER 'x' + +struct ustar_header { + char name[100]; /* 0 */ + char mode[8]; /* 100 */ + char uid[8]; /* 108 */ + char gid[8]; /* 116 */ + char size[12]; /* 124 */ + char mtime[12]; /* 136 */ + char chksum[8]; /* 148 */ + char typeflag[1]; /* 156 */ + char linkname[100]; /* 157 */ + char magic[6]; /* 257 */ + char version[2]; /* 263 */ + char uname[32]; /* 265 */ + char gname[32]; /* 297 */ + char devmajor[8]; /* 329 */ + char devminor[8]; /* 337 */ + char prefix[155]; /* 345 */ +}; diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h new file mode 100644 index 0000000..71cb939 --- /dev/null +++ b/xdiff/xdiff.h @@ -0,0 +1,91 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#if !defined(XDIFF_H) +#define XDIFF_H + +#ifdef __cplusplus +extern "C" { +#endif /* #ifdef __cplusplus */ + + +#define XDF_NEED_MINIMAL (1 << 1) + +#define XDL_PATCH_NORMAL '-' +#define XDL_PATCH_REVERSE '+' +#define XDL_PATCH_MODEMASK ((1 << 8) - 1) +#define XDL_PATCH_IGNOREBSPACE (1 << 8) + +#define XDL_MMB_READONLY (1 << 0) + +#define XDL_MMF_ATOMIC (1 << 0) + +#define XDL_BDOP_INS 1 +#define XDL_BDOP_CPY 2 +#define XDL_BDOP_INSB 3 + + +typedef struct s_mmfile { + char *ptr; + long size; +} mmfile_t; + +typedef struct s_mmbuffer { + char *ptr; + long size; +} mmbuffer_t; + +typedef struct s_xpparam { + unsigned long flags; +} xpparam_t; + +typedef struct s_xdemitcb { + void *priv; + int (*outf)(void *, mmbuffer_t *, int); +} xdemitcb_t; + +typedef struct s_xdemitconf { + long ctxlen; +} xdemitconf_t; + +typedef struct s_bdiffparam { + long bsize; +} bdiffparam_t; + + +#define xdl_malloc(x) malloc(x) +#define xdl_free(ptr) free(ptr) +#define xdl_realloc(ptr,x) realloc(ptr,x) + +void *xdl_mmfile_first(mmfile_t *mmf, long *size); +void *xdl_mmfile_next(mmfile_t *mmf, long *size); +long xdl_mmfile_size(mmfile_t *mmf); + +int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, + xdemitconf_t const *xecfg, xdemitcb_t *ecb); + +#ifdef __cplusplus +} +#endif /* #ifdef __cplusplus */ + +#endif /* #if !defined(XDIFF_H) */ + diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c new file mode 100644 index 0000000..8ea0483 --- /dev/null +++ b/xdiff/xdiffi.c @@ -0,0 +1,469 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#include "xinclude.h" + + + +#define XDL_MAX_COST_MIN 256 +#define XDL_HEUR_MIN_COST 256 +#define XDL_LINE_MAX (long)((1UL << (8 * sizeof(long) - 1)) - 1) +#define XDL_SNAKE_CNT 20 +#define XDL_K_HEUR 4 + + + +typedef struct s_xdpsplit { + long i1, i2; + int min_lo, min_hi; +} xdpsplit_t; + + + + +static long xdl_split(unsigned long const *ha1, long off1, long lim1, + unsigned long const *ha2, long off2, long lim2, + long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl, + xdalgoenv_t *xenv); +static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2); + + + + +/* + * See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers. + * Basically considers a "box" (off1, off2, lim1, lim2) and scan from both + * the forward diagonal starting from (off1, off2) and the backward diagonal + * starting from (lim1, lim2). If the K values on the same diagonal crosses + * returns the furthest point of reach. We might end up having to expensive + * cases using this algorithm is full, so a little bit of heuristic is needed + * to cut the search and to return a suboptimal point. + */ +static long xdl_split(unsigned long const *ha1, long off1, long lim1, + unsigned long const *ha2, long off2, long lim2, + long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl, + xdalgoenv_t *xenv) { + long dmin = off1 - lim2, dmax = lim1 - off2; + long fmid = off1 - off2, bmid = lim1 - lim2; + long odd = (fmid - bmid) & 1; + long fmin = fmid, fmax = fmid; + long bmin = bmid, bmax = bmid; + long ec, d, i1, i2, prev1, best, dd, v, k; + + /* + * Set initial diagonal values for both forward and backward path. + */ + kvdf[fmid] = off1; + kvdb[bmid] = lim1; + + for (ec = 1;; ec++) { + int got_snake = 0; + + /* + * We need to extent the diagonal "domain" by one. If the next + * values exits the box boundaries we need to change it in the + * opposite direction because (max - min) must be a power of two. + * Also we initialize the extenal K value to -1 so that we can + * avoid extra conditions check inside the core loop. + */ + if (fmin > dmin) + kvdf[--fmin - 1] = -1; + else + ++fmin; + if (fmax < dmax) + kvdf[++fmax + 1] = -1; + else + --fmax; + + for (d = fmax; d >= fmin; d -= 2) { + if (kvdf[d - 1] >= kvdf[d + 1]) + i1 = kvdf[d - 1] + 1; + else + i1 = kvdf[d + 1]; + prev1 = i1; + i2 = i1 - d; + for (; i1 < lim1 && i2 < lim2 && ha1[i1] == ha2[i2]; i1++, i2++); + if (i1 - prev1 > xenv->snake_cnt) + got_snake = 1; + kvdf[d] = i1; + if (odd && bmin <= d && d <= bmax && kvdb[d] <= i1) { + spl->i1 = i1; + spl->i2 = i2; + spl->min_lo = spl->min_hi = 1; + return ec; + } + } + + /* + * We need to extent the diagonal "domain" by one. If the next + * values exits the box boundaries we need to change it in the + * opposite direction because (max - min) must be a power of two. + * Also we initialize the extenal K value to -1 so that we can + * avoid extra conditions check inside the core loop. + */ + if (bmin > dmin) + kvdb[--bmin - 1] = XDL_LINE_MAX; + else + ++bmin; + if (bmax < dmax) + kvdb[++bmax + 1] = XDL_LINE_MAX; + else + --bmax; + + for (d = bmax; d >= bmin; d -= 2) { + if (kvdb[d - 1] < kvdb[d + 1]) + i1 = kvdb[d - 1]; + else + i1 = kvdb[d + 1] - 1; + prev1 = i1; + i2 = i1 - d; + for (; i1 > off1 && i2 > off2 && ha1[i1 - 1] == ha2[i2 - 1]; i1--, i2--); + if (prev1 - i1 > xenv->snake_cnt) + got_snake = 1; + kvdb[d] = i1; + if (!odd && fmin <= d && d <= fmax && i1 <= kvdf[d]) { + spl->i1 = i1; + spl->i2 = i2; + spl->min_lo = spl->min_hi = 1; + return ec; + } + } + + if (need_min) + continue; + + /* + * If the edit cost is above the heuristic trigger and if + * we got a good snake, we sample current diagonals to see + * if some of the, have reached an "interesting" path. Our + * measure is a function of the distance from the diagonal + * corner (i1 + i2) penalized with the distance from the + * mid diagonal itself. If this value is above the current + * edit cost times a magic factor (XDL_K_HEUR) we consider + * it interesting. + */ + if (got_snake && ec > xenv->heur_min) { + for (best = 0, d = fmax; d >= fmin; d -= 2) { + dd = d > fmid ? d - fmid: fmid - d; + i1 = kvdf[d]; + i2 = i1 - d; + v = (i1 - off1) + (i2 - off2) - dd; + + if (v > XDL_K_HEUR * ec && v > best && + off1 + xenv->snake_cnt <= i1 && i1 < lim1 && + off2 + xenv->snake_cnt <= i2 && i2 < lim2) { + for (k = 1; ha1[i1 - k] == ha2[i2 - k]; k++) + if (k == xenv->snake_cnt) { + best = v; + spl->i1 = i1; + spl->i2 = i2; + break; + } + } + } + if (best > 0) { + spl->min_lo = 1; + spl->min_hi = 0; + return ec; + } + + for (best = 0, d = bmax; d >= bmin; d -= 2) { + dd = d > bmid ? d - bmid: bmid - d; + i1 = kvdb[d]; + i2 = i1 - d; + v = (lim1 - i1) + (lim2 - i2) - dd; + + if (v > XDL_K_HEUR * ec && v > best && + off1 < i1 && i1 <= lim1 - xenv->snake_cnt && + off2 < i2 && i2 <= lim2 - xenv->snake_cnt) { + for (k = 0; ha1[i1 + k] == ha2[i2 + k]; k++) + if (k == xenv->snake_cnt - 1) { + best = v; + spl->i1 = i1; + spl->i2 = i2; + break; + } + } + } + if (best > 0) { + spl->min_lo = 0; + spl->min_hi = 1; + return ec; + } + } + + /* + * Enough is enough. We spent too much time here and now we collect + * the furthest reaching path using the (i1 + i2) measure. + */ + if (ec >= xenv->mxcost) { + long fbest, fbest1, bbest, bbest1; + + fbest = -1; + for (d = fmax; d >= fmin; d -= 2) { + i1 = XDL_MIN(kvdf[d], lim1); + i2 = i1 - d; + if (lim2 < i2) + i1 = lim2 + d, i2 = lim2; + if (fbest < i1 + i2) { + fbest = i1 + i2; + fbest1 = i1; + } + } + + bbest = XDL_LINE_MAX; + for (d = bmax; d >= bmin; d -= 2) { + i1 = XDL_MAX(off1, kvdb[d]); + i2 = i1 - d; + if (i2 < off2) + i1 = off2 + d, i2 = off2; + if (i1 + i2 < bbest) { + bbest = i1 + i2; + bbest1 = i1; + } + } + + if ((lim1 + lim2) - bbest < fbest - (off1 + off2)) { + spl->i1 = fbest1; + spl->i2 = fbest - fbest1; + spl->min_lo = 1; + spl->min_hi = 0; + } else { + spl->i1 = bbest1; + spl->i2 = bbest - bbest1; + spl->min_lo = 0; + spl->min_hi = 1; + } + return ec; + } + } + + return -1; +} + + +/* + * Rule: "Divide et Impera". Recursively split the box in sub-boxes by calling + * the box splitting function. Note that the real job (marking changed lines) + * is done in the two boundary reaching checks. + */ +int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1, + diffdata_t *dd2, long off2, long lim2, + long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv) { + unsigned long const *ha1 = dd1->ha, *ha2 = dd2->ha; + + /* + * Shrink the box by walking through each diagonal snake (SW and NE). + */ + for (; off1 < lim1 && off2 < lim2 && ha1[off1] == ha2[off2]; off1++, off2++); + for (; off1 < lim1 && off2 < lim2 && ha1[lim1 - 1] == ha2[lim2 - 1]; lim1--, lim2--); + + /* + * If one dimension is empty, then all records on the other one must + * be obviously changed. + */ + if (off1 == lim1) { + char *rchg2 = dd2->rchg; + long *rindex2 = dd2->rindex; + + for (; off2 < lim2; off2++) + rchg2[rindex2[off2]] = 1; + } else if (off2 == lim2) { + char *rchg1 = dd1->rchg; + long *rindex1 = dd1->rindex; + + for (; off1 < lim1; off1++) + rchg1[rindex1[off1]] = 1; + } else { + long ec; + xdpsplit_t spl; + + /* + * Divide ... + */ + if ((ec = xdl_split(ha1, off1, lim1, ha2, off2, lim2, kvdf, kvdb, + need_min, &spl, xenv)) < 0) { + + return -1; + } + + /* + * ... et Impera. + */ + if (xdl_recs_cmp(dd1, off1, spl.i1, dd2, off2, spl.i2, + kvdf, kvdb, spl.min_lo, xenv) < 0 || + xdl_recs_cmp(dd1, spl.i1, lim1, dd2, spl.i2, lim2, + kvdf, kvdb, spl.min_hi, xenv) < 0) { + + return -1; + } + } + + return 0; +} + + +int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, + xdfenv_t *xe) { + long ndiags; + long *kvd, *kvdf, *kvdb; + xdalgoenv_t xenv; + diffdata_t dd1, dd2; + + if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) { + + return -1; + } + + /* + * Allocate and setup K vectors to be used by the differential algorithm. + * One is to store the forward path and one to store the backward path. + */ + ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3; + if (!(kvd = (long *) xdl_malloc((2 * ndiags + 2) * sizeof(long)))) { + + xdl_free_env(xe); + return -1; + } + kvdf = kvd; + kvdb = kvdf + ndiags; + kvdf += xe->xdf2.nreff + 1; + kvdb += xe->xdf2.nreff + 1; + + /* + * Classical integer square root approximation using shifts. + */ + xenv.mxcost = 1; + for (; ndiags; ndiags >>= 2) + xenv.mxcost <<= 1; + if (xenv.mxcost < XDL_MAX_COST_MIN) + xenv.mxcost = XDL_MAX_COST_MIN; + xenv.snake_cnt = XDL_SNAKE_CNT; + xenv.heur_min = XDL_HEUR_MIN_COST; + + dd1.nrec = xe->xdf1.nreff; + dd1.ha = xe->xdf1.ha; + dd1.rchg = xe->xdf1.rchg; + dd1.rindex = xe->xdf1.rindex; + dd2.nrec = xe->xdf2.nreff; + dd2.ha = xe->xdf2.ha; + dd2.rchg = xe->xdf2.rchg; + dd2.rindex = xe->xdf2.rindex; + + if (xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec, + kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0, &xenv) < 0) { + + xdl_free(kvd); + xdl_free_env(xe); + return -1; + } + + xdl_free(kvd); + + return 0; +} + + +static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2) { + xdchange_t *xch; + + if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t)))) + return NULL; + + xch->next = xscr; + xch->i1 = i1; + xch->i2 = i2; + xch->chg1 = chg1; + xch->chg2 = chg2; + + return xch; +} + + +int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr) { + xdchange_t *cscr = NULL, *xch; + char *rchg1 = xe->xdf1.rchg, *rchg2 = xe->xdf2.rchg; + long i1, i2, l1, l2; + + /* + * Trivial. Collects "groups" of changes and creates an edit script. + */ + for (i1 = xe->xdf1.nrec, i2 = xe->xdf2.nrec; i1 >= 0 || i2 >= 0; i1--, i2--) + if (rchg1[i1 - 1] || rchg2[i2 - 1]) { + for (l1 = i1; rchg1[i1 - 1]; i1--); + for (l2 = i2; rchg2[i2 - 1]; i2--); + + if (!(xch = xdl_add_change(cscr, i1, i2, l1 - i1, l2 - i2))) { + xdl_free_script(cscr); + return -1; + } + cscr = xch; + } + + *xscr = cscr; + + return 0; +} + + +void xdl_free_script(xdchange_t *xscr) { + xdchange_t *xch; + + while ((xch = xscr) != NULL) { + xscr = xscr->next; + xdl_free(xch); + } +} + + +int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, + xdemitconf_t const *xecfg, xdemitcb_t *ecb) { + xdchange_t *xscr; + xdfenv_t xe; + + if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0) { + + return -1; + } + + if (xdl_build_script(&xe, &xscr) < 0) { + + xdl_free_env(&xe); + return -1; + } + + if (xscr) { + if (xdl_emit_diff(&xe, xscr, ecb, xecfg) < 0) { + + xdl_free_script(xscr); + xdl_free_env(&xe); + return -1; + } + + xdl_free_script(xscr); + } + + xdl_free_env(&xe); + + return 0; +} + diff --git a/xdiff/xdiffi.h b/xdiff/xdiffi.h new file mode 100644 index 0000000..dd8f3c9 --- /dev/null +++ b/xdiff/xdiffi.h @@ -0,0 +1,60 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#if !defined(XDIFFI_H) +#define XDIFFI_H + + +typedef struct s_diffdata { + long nrec; + unsigned long const *ha; + long *rindex; + char *rchg; +} diffdata_t; + +typedef struct s_xdalgoenv { + long mxcost; + long snake_cnt; + long heur_min; +} xdalgoenv_t; + +typedef struct s_xdchange { + struct s_xdchange *next; + long i1, i2; + long chg1, chg2; +} xdchange_t; + + + +int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1, + diffdata_t *dd2, long off2, long lim2, + long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv); +int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, + xdfenv_t *xe); +int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr); +void xdl_free_script(xdchange_t *xscr); +int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, + xdemitconf_t const *xecfg); + + +#endif /* #if !defined(XDIFFI_H) */ + diff --git a/xdiff/xemit.c b/xdiff/xemit.c new file mode 100644 index 0000000..2e5d54c --- /dev/null +++ b/xdiff/xemit.c @@ -0,0 +1,141 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#include "xinclude.h" + + + + +static long xdl_get_rec(xdfile_t *xdf, long ri, char const **rec); +static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb); +static xdchange_t *xdl_get_hunk(xdchange_t *xscr, xdemitconf_t const *xecfg); + + + + +static long xdl_get_rec(xdfile_t *xdf, long ri, char const **rec) { + + *rec = xdf->recs[ri]->ptr; + + return xdf->recs[ri]->size; +} + + +static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb) { + long size, psize = strlen(pre); + char const *rec; + + size = xdl_get_rec(xdf, ri, &rec); + if (xdl_emit_diffrec(rec, size, pre, psize, ecb) < 0) { + + return -1; + } + + return 0; +} + + +/* + * Starting at the passed change atom, find the latest change atom to be included + * inside the differential hunk according to the specified configuration. + */ +static xdchange_t *xdl_get_hunk(xdchange_t *xscr, xdemitconf_t const *xecfg) { + xdchange_t *xch, *xchp; + + for (xchp = xscr, xch = xscr->next; xch; xchp = xch, xch = xch->next) + if (xch->i1 - (xchp->i1 + xchp->chg1) > 2 * xecfg->ctxlen) + break; + + return xchp; +} + + +int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, + xdemitconf_t const *xecfg) { + long s1, s2, e1, e2, lctx; + xdchange_t *xch, *xche; + + for (xch = xche = xscr; xch; xch = xche->next) { + xche = xdl_get_hunk(xch, xecfg); + + s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0); + s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0); + + lctx = xecfg->ctxlen; + lctx = XDL_MIN(lctx, xe->xdf1.nrec - (xche->i1 + xche->chg1)); + lctx = XDL_MIN(lctx, xe->xdf2.nrec - (xche->i2 + xche->chg2)); + + e1 = xche->i1 + xche->chg1 + lctx; + e2 = xche->i2 + xche->chg2 + lctx; + + /* + * Emit current hunk header. + */ + if (xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2, ecb) < 0) + return -1; + + /* + * Emit pre-context. + */ + for (; s1 < xch->i1; s1++) + if (xdl_emit_record(&xe->xdf1, s1, " ", ecb) < 0) + return -1; + + for (s1 = xch->i1, s2 = xch->i2;; xch = xch->next) { + /* + * Merge previous with current change atom. + */ + for (; s1 < xch->i1 && s2 < xch->i2; s1++, s2++) + if (xdl_emit_record(&xe->xdf1, s1, " ", ecb) < 0) + return -1; + + /* + * Removes lines from the first file. + */ + for (s1 = xch->i1; s1 < xch->i1 + xch->chg1; s1++) + if (xdl_emit_record(&xe->xdf1, s1, "-", ecb) < 0) + return -1; + + /* + * Adds lines from the second file. + */ + for (s2 = xch->i2; s2 < xch->i2 + xch->chg2; s2++) + if (xdl_emit_record(&xe->xdf2, s2, "+", ecb) < 0) + return -1; + + if (xch == xche) + break; + s1 = xch->i1 + xch->chg1; + s2 = xch->i2 + xch->chg2; + } + + /* + * Emit post-context. + */ + for (s1 = xche->i1 + xche->chg1; s1 < e1; s1++) + if (xdl_emit_record(&xe->xdf1, s1, " ", ecb) < 0) + return -1; + } + + return 0; +} + diff --git a/xdiff/xemit.h b/xdiff/xemit.h new file mode 100644 index 0000000..e629417 --- /dev/null +++ b/xdiff/xemit.h @@ -0,0 +1,34 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#if !defined(XEMIT_H) +#define XEMIT_H + + + +int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, + xdemitconf_t const *xecfg); + + + +#endif /* #if !defined(XEMIT_H) */ + diff --git a/xdiff/xinclude.h b/xdiff/xinclude.h new file mode 100644 index 0000000..9490fc5 --- /dev/null +++ b/xdiff/xinclude.h @@ -0,0 +1,42 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#if !defined(XINCLUDE_H) +#define XINCLUDE_H + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <limits.h> + +#include "xmacros.h" +#include "xdiff.h" +#include "xtypes.h" +#include "xutils.h" +#include "xprepare.h" +#include "xdiffi.h" +#include "xemit.h" + + +#endif /* #if !defined(XINCLUDE_H) */ + diff --git a/xdiff/xmacros.h b/xdiff/xmacros.h new file mode 100644 index 0000000..4c2fde8 --- /dev/null +++ b/xdiff/xmacros.h @@ -0,0 +1,53 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#if !defined(XMACROS_H) +#define XMACROS_H + + +#define GR_PRIME 0x9e370001UL + + +#define XDL_MIN(a, b) ((a) < (b) ? (a): (b)) +#define XDL_MAX(a, b) ((a) > (b) ? (a): (b)) +#define XDL_ABS(v) ((v) >= 0 ? (v): -(v)) +#define XDL_ISDIGIT(c) ((c) >= '0' && (c) <= '9') +#define XDL_HASHLONG(v, b) (((unsigned long)(v) * GR_PRIME) >> ((CHAR_BIT * sizeof(unsigned long)) - (b))) +#define XDL_PTRFREE(p) do { if (p) { xdl_free(p); (p) = NULL; } } while (0) +#define XDL_LE32_PUT(p, v) \ +do { \ + unsigned char *__p = (unsigned char *) (p); \ + *__p++ = (unsigned char) (v); \ + *__p++ = (unsigned char) ((v) >> 8); \ + *__p++ = (unsigned char) ((v) >> 16); \ + *__p = (unsigned char) ((v) >> 24); \ +} while (0) +#define XDL_LE32_GET(p, v) \ +do { \ + unsigned char const *__p = (unsigned char const *) (p); \ + (v) = (unsigned long) __p[0] | ((unsigned long) __p[1]) << 8 | \ + ((unsigned long) __p[2]) << 16 | ((unsigned long) __p[3]) << 24; \ +} while (0) + + +#endif /* #if !defined(XMACROS_H) */ + diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c new file mode 100644 index 0000000..27a0879 --- /dev/null +++ b/xdiff/xprepare.c @@ -0,0 +1,436 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#include "xinclude.h" + + + +#define XDL_KPDIS_RUN 4 + + + +typedef struct s_xdlclass { + struct s_xdlclass *next; + unsigned long ha; + char const *line; + long size; + long idx; +} xdlclass_t; + +typedef struct s_xdlclassifier { + unsigned int hbits; + long hsize; + xdlclass_t **rchash; + chastore_t ncha; + long count; +} xdlclassifier_t; + + + + +static int xdl_init_classifier(xdlclassifier_t *cf, long size); +static void xdl_free_classifier(xdlclassifier_t *cf); +static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned int hbits, + xrecord_t *rec); +static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, + xdlclassifier_t *cf, xdfile_t *xdf); +static void xdl_free_ctx(xdfile_t *xdf); +static int xdl_clean_mmatch(char const *dis, long i, long s, long e); +static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2); +static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2); +static int xdl_optimize_ctxs(xdfile_t *xdf1, xdfile_t *xdf2); + + + + +static int xdl_init_classifier(xdlclassifier_t *cf, long size) { + long i; + + cf->hbits = xdl_hashbits((unsigned int) size); + cf->hsize = 1 << cf->hbits; + + if (xdl_cha_init(&cf->ncha, sizeof(xdlclass_t), size / 4 + 1) < 0) { + + return -1; + } + if (!(cf->rchash = (xdlclass_t **) xdl_malloc(cf->hsize * sizeof(xdlclass_t *)))) { + + xdl_cha_free(&cf->ncha); + return -1; + } + for (i = 0; i < cf->hsize; i++) + cf->rchash[i] = NULL; + + cf->count = 0; + + return 0; +} + + +static void xdl_free_classifier(xdlclassifier_t *cf) { + + xdl_free(cf->rchash); + xdl_cha_free(&cf->ncha); +} + + +static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned int hbits, + xrecord_t *rec) { + long hi; + char const *line; + xdlclass_t *rcrec; + + line = rec->ptr; + hi = (long) XDL_HASHLONG(rec->ha, cf->hbits); + for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next) + if (rcrec->ha == rec->ha && rcrec->size == rec->size && + !memcmp(line, rcrec->line, rec->size)) + break; + + if (!rcrec) { + if (!(rcrec = xdl_cha_alloc(&cf->ncha))) { + + return -1; + } + rcrec->idx = cf->count++; + rcrec->line = line; + rcrec->size = rec->size; + rcrec->ha = rec->ha; + rcrec->next = cf->rchash[hi]; + cf->rchash[hi] = rcrec; + } + + rec->ha = (unsigned long) rcrec->idx; + + hi = (long) XDL_HASHLONG(rec->ha, hbits); + rec->next = rhash[hi]; + rhash[hi] = rec; + + return 0; +} + + +static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, + xdlclassifier_t *cf, xdfile_t *xdf) { + unsigned int hbits; + long i, nrec, hsize, bsize; + unsigned long hav; + char const *blk, *cur, *top, *prev; + xrecord_t *crec; + xrecord_t **recs, **rrecs; + xrecord_t **rhash; + unsigned long *ha; + char *rchg; + long *rindex; + + if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0) { + + return -1; + } + if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *)))) { + + xdl_cha_free(&xdf->rcha); + return -1; + } + + hbits = xdl_hashbits((unsigned int) narec); + hsize = 1 << hbits; + if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) { + + xdl_free(recs); + xdl_cha_free(&xdf->rcha); + return -1; + } + for (i = 0; i < hsize; i++) + rhash[i] = NULL; + + nrec = 0; + if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) { + for (top = blk + bsize;;) { + if (cur >= top) { + if (!(cur = blk = xdl_mmfile_next(mf, &bsize))) + break; + top = blk + bsize; + } + prev = cur; + hav = xdl_hash_record(&cur, top); + if (nrec >= narec) { + narec *= 2; + if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *)))) { + + xdl_free(rhash); + xdl_free(recs); + xdl_cha_free(&xdf->rcha); + return -1; + } + recs = rrecs; + } + if (!(crec = xdl_cha_alloc(&xdf->rcha))) { + + xdl_free(rhash); + xdl_free(recs); + xdl_cha_free(&xdf->rcha); + return -1; + } + crec->ptr = prev; + crec->size = (long) (cur - prev); + crec->ha = hav; + recs[nrec++] = crec; + + if (xdl_classify_record(cf, rhash, hbits, crec) < 0) { + + xdl_free(rhash); + xdl_free(recs); + xdl_cha_free(&xdf->rcha); + return -1; + } + } + } + + if (!(rchg = (char *) xdl_malloc((nrec + 2) * sizeof(char)))) { + + xdl_free(rhash); + xdl_free(recs); + xdl_cha_free(&xdf->rcha); + return -1; + } + memset(rchg, 0, (nrec + 2) * sizeof(char)); + + if (!(rindex = (long *) xdl_malloc((nrec + 1) * sizeof(long)))) { + + xdl_free(rchg); + xdl_free(rhash); + xdl_free(recs); + xdl_cha_free(&xdf->rcha); + return -1; + } + if (!(ha = (unsigned long *) xdl_malloc((nrec + 1) * sizeof(unsigned long)))) { + + xdl_free(rindex); + xdl_free(rchg); + xdl_free(rhash); + xdl_free(recs); + xdl_cha_free(&xdf->rcha); + return -1; + } + + xdf->nrec = nrec; + xdf->recs = recs; + xdf->hbits = hbits; + xdf->rhash = rhash; + xdf->rchg = rchg + 1; + xdf->rindex = rindex; + xdf->nreff = 0; + xdf->ha = ha; + xdf->dstart = 0; + xdf->dend = nrec - 1; + + return 0; +} + + +static void xdl_free_ctx(xdfile_t *xdf) { + + xdl_free(xdf->rhash); + xdl_free(xdf->rindex); + xdl_free(xdf->rchg - 1); + xdl_free(xdf->ha); + xdl_free(xdf->recs); + xdl_cha_free(&xdf->rcha); +} + + +int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, + xdfenv_t *xe) { + long enl1, enl2; + xdlclassifier_t cf; + + enl1 = xdl_guess_lines(mf1) + 1; + enl2 = xdl_guess_lines(mf2) + 1; + + if (xdl_init_classifier(&cf, enl1 + enl2 + 1) < 0) { + + return -1; + } + + if (xdl_prepare_ctx(mf1, enl1, xpp, &cf, &xe->xdf1) < 0) { + + xdl_free_classifier(&cf); + return -1; + } + if (xdl_prepare_ctx(mf2, enl2, xpp, &cf, &xe->xdf2) < 0) { + + xdl_free_ctx(&xe->xdf1); + xdl_free_classifier(&cf); + return -1; + } + + xdl_free_classifier(&cf); + + if (xdl_optimize_ctxs(&xe->xdf1, &xe->xdf2) < 0) { + + xdl_free_ctx(&xe->xdf2); + xdl_free_ctx(&xe->xdf1); + return -1; + } + + return 0; +} + + +void xdl_free_env(xdfenv_t *xe) { + + xdl_free_ctx(&xe->xdf2); + xdl_free_ctx(&xe->xdf1); +} + + +static int xdl_clean_mmatch(char const *dis, long i, long s, long e) { + long r, rdis, rpdis; + + for (r = 1, rdis = 0, rpdis = 1; (i - r) >= s; r++) { + if (!dis[i - r]) + rdis++; + else if (dis[i - r] == 2) + rpdis++; + else + break; + } + for (r = 1; (i + r) <= e; r++) { + if (!dis[i + r]) + rdis++; + else if (dis[i + r] == 2) + rpdis++; + else + break; + } + + return rpdis * XDL_KPDIS_RUN < (rpdis + rdis); +} + + +/* + * Try to reduce the problem complexity, discard records that have no + * matches on the other file. Also, lines that have multiple matches + * might be potentially discarded if they happear in a run of discardable. + */ +static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2) { + long i, rhi, nreff; + unsigned long hav; + xrecord_t **recs; + xrecord_t *rec; + char *dis, *dis1, *dis2; + + if (!(dis = (char *) xdl_malloc((xdf1->nrec + xdf2->nrec + 2) * sizeof(char)))) { + + return -1; + } + memset(dis, 0, (xdf1->nrec + xdf2->nrec + 2) * sizeof(char)); + dis1 = dis; + dis2 = dis1 + xdf1->nrec + 1; + + for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { + hav = (*recs)->ha; + rhi = (long) XDL_HASHLONG(hav, xdf2->hbits); + for (rec = xdf2->rhash[rhi]; rec; rec = rec->next) + if (rec->ha == hav && ++dis1[i] == 2) + break; + } + + for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { + hav = (*recs)->ha; + rhi = (long) XDL_HASHLONG(hav, xdf1->hbits); + for (rec = xdf1->rhash[rhi]; rec; rec = rec->next) + if (rec->ha == hav && ++dis2[i] == 2) + break; + } + + for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; + i <= xdf1->dend; i++, recs++) { + if (dis1[i] == 1 || + (dis1[i] == 2 && !xdl_clean_mmatch(dis1, i, xdf1->dstart, xdf1->dend))) { + xdf1->rindex[nreff] = i; + xdf1->ha[nreff] = (*recs)->ha; + nreff++; + } else + xdf1->rchg[i] = 1; + } + xdf1->nreff = nreff; + + for (nreff = 0, i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; + i <= xdf2->dend; i++, recs++) { + if (dis2[i] == 1 || + (dis2[i] == 2 && !xdl_clean_mmatch(dis2, i, xdf2->dstart, xdf2->dend))) { + xdf2->rindex[nreff] = i; + xdf2->ha[nreff] = (*recs)->ha; + nreff++; + } else + xdf2->rchg[i] = 1; + } + xdf2->nreff = nreff; + + xdl_free(dis); + + return 0; +} + + +/* + * Early trim initial and terminal matching records. + */ +static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { + long i, lim; + xrecord_t **recs1, **recs2; + + recs1 = xdf1->recs; + recs2 = xdf2->recs; + for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim; + i++, recs1++, recs2++) + if ((*recs1)->ha != (*recs2)->ha) + break; + + xdf1->dstart = xdf2->dstart = i; + + recs1 = xdf1->recs + xdf1->nrec - 1; + recs2 = xdf2->recs + xdf2->nrec - 1; + for (lim -= i, i = 0; i < lim; i++, recs1--, recs2--) + if ((*recs1)->ha != (*recs2)->ha) + break; + + xdf1->dend = xdf1->nrec - i - 1; + xdf2->dend = xdf2->nrec - i - 1; + + return 0; +} + + +static int xdl_optimize_ctxs(xdfile_t *xdf1, xdfile_t *xdf2) { + + if (xdl_trim_ends(xdf1, xdf2) < 0 || + xdl_cleanup_records(xdf1, xdf2) < 0) { + + return -1; + } + + return 0; +} + diff --git a/xdiff/xprepare.h b/xdiff/xprepare.h new file mode 100644 index 0000000..344c569 --- /dev/null +++ b/xdiff/xprepare.h @@ -0,0 +1,35 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#if !defined(XPREPARE_H) +#define XPREPARE_H + + + +int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, + xdfenv_t *xe); +void xdl_free_env(xdfenv_t *xe); + + + +#endif /* #if !defined(XPREPARE_H) */ + diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h new file mode 100644 index 0000000..3593a66 --- /dev/null +++ b/xdiff/xtypes.h @@ -0,0 +1,68 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#if !defined(XTYPES_H) +#define XTYPES_H + + + +typedef struct s_chanode { + struct s_chanode *next; + long icurr; +} chanode_t; + +typedef struct s_chastore { + chanode_t *head, *tail; + long isize, nsize; + chanode_t *ancur; + chanode_t *sncur; + long scurr; +} chastore_t; + +typedef struct s_xrecord { + struct s_xrecord *next; + char const *ptr; + long size; + unsigned long ha; +} xrecord_t; + +typedef struct s_xdfile { + chastore_t rcha; + long nrec; + unsigned int hbits; + xrecord_t **rhash; + long dstart, dend; + xrecord_t **recs; + char *rchg; + long *rindex; + long nreff; + unsigned long *ha; +} xdfile_t; + +typedef struct s_xdfenv { + xdfile_t xdf1, xdf2; +} xdfenv_t; + + + +#endif /* #if !defined(XTYPES_H) */ + diff --git a/xdiff/xutils.c b/xdiff/xutils.c new file mode 100644 index 0000000..8221806 --- /dev/null +++ b/xdiff/xutils.c @@ -0,0 +1,277 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#include "xinclude.h" + + + +#define XDL_GUESS_NLINES 256 + + + + +int xdl_emit_diffrec(char const *rec, long size, char const *pre, long psize, + xdemitcb_t *ecb) { + mmbuffer_t mb[3]; + int i; + + mb[0].ptr = (char *) pre; + mb[0].size = psize; + mb[1].ptr = (char *) rec; + mb[1].size = size; + i = 2; + + if (!size || rec[size-1] != '\n') { + mb[2].ptr = "\n\\ No newline at end of file\n"; + mb[2].size = strlen(mb[2].ptr); + i = 3; + } + + if (ecb->outf(ecb->priv, mb, i) < 0) { + + return -1; + } + + return 0; +} + +void *xdl_mmfile_first(mmfile_t *mmf, long *size) +{ + *size = mmf->size; + return mmf->ptr; +} + + +void *xdl_mmfile_next(mmfile_t *mmf, long *size) +{ + return NULL; +} + + +long xdl_mmfile_size(mmfile_t *mmf) +{ + return mmf->size; +} + + +int xdl_cha_init(chastore_t *cha, long isize, long icount) { + + cha->head = cha->tail = NULL; + cha->isize = isize; + cha->nsize = icount * isize; + cha->ancur = cha->sncur = NULL; + cha->scurr = 0; + + return 0; +} + + +void xdl_cha_free(chastore_t *cha) { + chanode_t *cur, *tmp; + + for (cur = cha->head; (tmp = cur) != NULL;) { + cur = cur->next; + xdl_free(tmp); + } +} + + +void *xdl_cha_alloc(chastore_t *cha) { + chanode_t *ancur; + void *data; + + if (!(ancur = cha->ancur) || ancur->icurr == cha->nsize) { + if (!(ancur = (chanode_t *) xdl_malloc(sizeof(chanode_t) + cha->nsize))) { + + return NULL; + } + ancur->icurr = 0; + ancur->next = NULL; + if (cha->tail) + cha->tail->next = ancur; + if (!cha->head) + cha->head = ancur; + cha->tail = ancur; + cha->ancur = ancur; + } + + data = (char *) ancur + sizeof(chanode_t) + ancur->icurr; + ancur->icurr += cha->isize; + + return data; +} + + +void *xdl_cha_first(chastore_t *cha) { + chanode_t *sncur; + + if (!(cha->sncur = sncur = cha->head)) + return NULL; + + cha->scurr = 0; + + return (char *) sncur + sizeof(chanode_t) + cha->scurr; +} + + +void *xdl_cha_next(chastore_t *cha) { + chanode_t *sncur; + + if (!(sncur = cha->sncur)) + return NULL; + cha->scurr += cha->isize; + if (cha->scurr == sncur->icurr) { + if (!(sncur = cha->sncur = sncur->next)) + return NULL; + cha->scurr = 0; + } + + return (char *) sncur + sizeof(chanode_t) + cha->scurr; +} + + +long xdl_guess_lines(mmfile_t *mf) { + long nl = 0, size, tsize = 0; + char const *data, *cur, *top; + + if ((cur = data = xdl_mmfile_first(mf, &size)) != NULL) { + for (top = data + size; nl < XDL_GUESS_NLINES;) { + if (cur >= top) { + tsize += (long) (cur - data); + if (!(cur = data = xdl_mmfile_next(mf, &size))) + break; + top = data + size; + } + nl++; + if (!(cur = memchr(cur, '\n', top - cur))) + cur = top; + else + cur++; + } + tsize += (long) (cur - data); + } + + if (nl && tsize) + nl = xdl_mmfile_size(mf) / (tsize / nl); + + return nl + 1; +} + + +unsigned long xdl_hash_record(char const **data, char const *top) { + unsigned long ha = 5381; + char const *ptr = *data; + + for (; ptr < top && *ptr != '\n'; ptr++) { + ha += (ha << 5); + ha ^= (unsigned long) *ptr; + } + *data = ptr < top ? ptr + 1: ptr; + + return ha; +} + + +unsigned int xdl_hashbits(unsigned int size) { + unsigned int val = 1, bits = 0; + + for (; val < size && bits < CHAR_BIT * sizeof(unsigned int); val <<= 1, bits++); + return bits ? bits: 1; +} + + +int xdl_num_out(char *out, long val) { + char *ptr, *str = out; + char buf[32]; + + ptr = buf + sizeof(buf) - 1; + *ptr = '\0'; + if (val < 0) { + *--ptr = '-'; + val = -val; + } + for (; val && ptr > buf; val /= 10) + *--ptr = "0123456789"[val % 10]; + if (*ptr) + for (; *ptr; ptr++, str++) + *str = *ptr; + else + *str++ = '0'; + *str = '\0'; + + return str - out; +} + + +long xdl_atol(char const *str, char const **next) { + long val, base; + char const *top; + + for (top = str; XDL_ISDIGIT(*top); top++); + if (next) + *next = top; + for (val = 0, base = 1, top--; top >= str; top--, base *= 10) + val += base * (long)(*top - '0'); + return val; +} + + +int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2, xdemitcb_t *ecb) { + int nb = 0; + mmbuffer_t mb; + char buf[128]; + + memcpy(buf, "@@ -", 4); + nb += 4; + + nb += xdl_num_out(buf + nb, c1 ? s1: 0); + + if (c1 != 1) { + memcpy(buf + nb, ",", 1); + nb += 1; + + nb += xdl_num_out(buf + nb, c1); + } + + memcpy(buf + nb, " +", 2); + nb += 2; + + nb += xdl_num_out(buf + nb, c2 ? s2: 0); + + if (c2 != 1) { + memcpy(buf + nb, ",", 1); + nb += 1; + + nb += xdl_num_out(buf + nb, c2); + } + + memcpy(buf + nb, " @@\n", 4); + nb += 4; + + mb.ptr = buf; + mb.size = nb; + if (ecb->outf(ecb->priv, &mb, 1) < 0) + return -1; + + return 0; +} + diff --git a/xdiff/xutils.h b/xdiff/xutils.h new file mode 100644 index 0000000..428a4bb --- /dev/null +++ b/xdiff/xutils.h @@ -0,0 +1,44 @@ +/* + * LibXDiff by Davide Libenzi ( File Differential Library ) + * Copyright (C) 2003 Davide Libenzi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Davide Libenzi <davidel@xmailserver.org> + * + */ + +#if !defined(XUTILS_H) +#define XUTILS_H + + +int xdl_emit_diffrec(char const *rec, long size, char const *pre, long psize, + xdemitcb_t *ecb); +int xdl_cha_init(chastore_t *cha, long isize, long icount); +void xdl_cha_free(chastore_t *cha); +void *xdl_cha_alloc(chastore_t *cha); +void *xdl_cha_first(chastore_t *cha); +void *xdl_cha_next(chastore_t *cha); +long xdl_guess_lines(mmfile_t *mf); +unsigned long xdl_hash_record(char const **data, char const *top); +unsigned int xdl_hashbits(unsigned int size); +int xdl_num_out(char *out, long val); +long xdl_atol(char const *str, char const **next); +int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2, xdemitcb_t *ecb); + + + +#endif /* #if !defined(XUTILS_H) */ + |