summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/git-ls-files.txt9
-rw-r--r--Documentation/git-ls-tree.txt9
-rw-r--r--Makefile15
-rw-r--r--count-delta.c72
-rw-r--r--count-delta.h10
-rw-r--r--diff.c286
-rw-r--r--diffcore-break.c46
-rw-r--r--diffcore-delta.c228
-rw-r--r--diffcore-rename.c27
-rw-r--r--diffcore.h7
-rwxr-xr-xgit-push.sh6
-rwxr-xr-xgit-send-email.perl86
-rw-r--r--ls-files.c19
-rw-r--r--ls-tree.c19
-rw-r--r--rev-list.c11
-rwxr-xr-xt/t4001-diff-rename.sh2
-rwxr-xr-xt/t5000-tar-tree.sh3
-rw-r--r--tar-tree.c385
-rw-r--r--tar.h25
-rw-r--r--xdiff/xdiff.h91
-rw-r--r--xdiff/xdiffi.c469
-rw-r--r--xdiff/xdiffi.h60
-rw-r--r--xdiff/xemit.c141
-rw-r--r--xdiff/xemit.h34
-rw-r--r--xdiff/xinclude.h42
-rw-r--r--xdiff/xmacros.h53
-rw-r--r--xdiff/xprepare.c436
-rw-r--r--xdiff/xprepare.h35
-rw-r--r--xdiff/xtypes.h68
-rw-r--r--xdiff/xutils.c277
-rw-r--r--xdiff/xutils.h44
31 files changed, 2472 insertions, 543 deletions
diff --git a/Documentation/git-ls-files.txt b/Documentation/git-ls-files.txt
index 980c5c9..796d049 100644
--- a/Documentation/git-ls-files.txt
+++ b/Documentation/git-ls-files.txt
@@ -14,9 +14,9 @@ SYNOPSIS
(-[c|d|o|i|s|u|k|m])\*
[-x <pattern>|--exclude=<pattern>]
[-X <file>|--exclude-from=<file>]
- [--exclude-per-directory=<file>]
+ [--exclude-per-directory=<file>]
[--error-unmatch]
- [--full-name] [--] [<file>]\*
+ [--full-name] [--abbrev] [--] [<file>]\*
DESCRIPTION
-----------
@@ -101,6 +101,11 @@ OPTIONS
option forces paths to be output relative to the project
top directory.
+--abbrev[=<n>]::
+ Instead of showing the full 40-byte hexadecimal object
+ lines, show only handful hexdigits prefix.
+ Non default number of digits can be specified with --abbrev=<n>.
+
--::
Do not interpret any more arguments as options.
diff --git a/Documentation/git-ls-tree.txt b/Documentation/git-ls-tree.txt
index 5bf6d8b..018c401 100644
--- a/Documentation/git-ls-tree.txt
+++ b/Documentation/git-ls-tree.txt
@@ -8,7 +8,9 @@ git-ls-tree - Lists the contents of a tree object
SYNOPSIS
--------
-'git-ls-tree' [-d] [-r] [-t] [-z] [--name-only] [--name-status] <tree-ish> [paths...]
+'git-ls-tree' [-d] [-r] [-t] [-z]
+ [--name-only] [--name-status] [--full-name] [--abbrev=[<n>]]
+ <tree-ish> [paths...]
DESCRIPTION
-----------
@@ -40,6 +42,11 @@ OPTIONS
--name-status::
List only filenames (instead of the "long" output), one per line.
+--abbrev[=<n>]::
+ Instead of showing the full 40-byte hexadecimal object
+ lines, show only handful hexdigits prefix.
+ Non default number of digits can be specified with --abbrev=<n>.
+
paths::
When paths are given, show them (note that this isn't really raw
pathnames, but rather a list of patterns to match). Otherwise
diff --git a/Makefile b/Makefile
index 8d45378..663a803 100644
--- a/Makefile
+++ b/Makefile
@@ -188,9 +188,10 @@ PYMODULES = \
gitMergeCommon.py
LIB_FILE=libgit.a
+XDIFF_LIB=xdiff/lib.a
LIB_H = \
- blob.h cache.h commit.h count-delta.h csum-file.h delta.h \
+ blob.h cache.h commit.h csum-file.h delta.h \
diff.h object.h pack.h pkt-line.h quote.h refs.h \
run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h
@@ -200,7 +201,7 @@ DIFF_OBJS = \
diffcore-delta.o
LIB_OBJS = \
- blob.o commit.o connect.o count-delta.o csum-file.o \
+ blob.o commit.o connect.o csum-file.o \
date.o diff-delta.o entry.o exec_cmd.o ident.o index.o \
object.o pack-check.o patch-delta.o path.o pkt-line.o \
quote.o read-cache.o refs.o run-command.o \
@@ -209,7 +210,7 @@ LIB_OBJS = \
fetch-clone.o revision.o pager.o \
$(DIFF_OBJS)
-LIBS = $(LIB_FILE)
+LIBS = $(LIB_FILE) $(XDIFF_LIB)
LIBS += -lz
#
@@ -544,12 +545,18 @@ init-db.o: init-db.c
-DDEFAULT_GIT_TEMPLATE_DIR='"$(template_dir_SQ)"' $*.c
$(LIB_OBJS): $(LIB_H)
-$(patsubst git-%$X,%.o,$(PROGRAMS)): $(LIB_H)
+$(patsubst git-%$X,%.o,$(PROGRAMS)): $(LIBS)
$(DIFF_OBJS): diffcore.h
$(LIB_FILE): $(LIB_OBJS)
$(AR) rcs $@ $(LIB_OBJS)
+XDIFF_OBJS=xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o
+
+$(XDIFF_LIB): $(XDIFF_OBJS)
+ $(AR) rcs $@ $(XDIFF_OBJS)
+
+
doc:
$(MAKE) -C Documentation all
diff --git a/count-delta.c b/count-delta.c
deleted file mode 100644
index 058a2aa..0000000
--- a/count-delta.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (C) 2005 Junio C Hamano
- * The delta-parsing part is almost straight copy of patch-delta.c
- * which is (C) 2005 Nicolas Pitre <nico@cam.org>.
- */
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include "delta.h"
-#include "count-delta.h"
-
-/*
- * NOTE. We do not _interpret_ delta fully. As an approximation, we
- * just count the number of bytes that are copied from the source, and
- * the number of literal data bytes that are inserted.
- *
- * Number of bytes that are _not_ copied from the source is deletion,
- * and number of inserted literal bytes are addition, so sum of them
- * is the extent of damage.
- */
-int count_delta(void *delta_buf, unsigned long delta_size,
- unsigned long *src_copied, unsigned long *literal_added)
-{
- unsigned long copied_from_source, added_literal;
- const unsigned char *data, *top;
- unsigned char cmd;
- unsigned long src_size, dst_size, out;
-
- if (delta_size < DELTA_SIZE_MIN)
- return -1;
-
- data = delta_buf;
- top = delta_buf + delta_size;
-
- src_size = get_delta_hdr_size(&data);
- dst_size = get_delta_hdr_size(&data);
-
- added_literal = copied_from_source = out = 0;
- while (data < top) {
- cmd = *data++;
- if (cmd & 0x80) {
- unsigned long cp_off = 0, cp_size = 0;
- if (cmd & 0x01) cp_off = *data++;
- if (cmd & 0x02) cp_off |= (*data++ << 8);
- if (cmd & 0x04) cp_off |= (*data++ << 16);
- if (cmd & 0x08) cp_off |= (*data++ << 24);
- if (cmd & 0x10) cp_size = *data++;
- if (cmd & 0x20) cp_size |= (*data++ << 8);
- if (cmd & 0x40) cp_size |= (*data++ << 16);
- if (cp_size == 0) cp_size = 0x10000;
-
- copied_from_source += cp_size;
- out += cp_size;
- } else {
- /* write literal into dst */
- added_literal += cmd;
- out += cmd;
- data += cmd;
- }
- }
-
- /* sanity check */
- if (data != top || out != dst_size)
- return -1;
-
- /* delete size is what was _not_ copied from source.
- * edit size is that and literal additions.
- */
- *src_copied = copied_from_source;
- *literal_added = added_literal;
- return 0;
-}
diff --git a/count-delta.h b/count-delta.h
deleted file mode 100644
index 7359629..0000000
--- a/count-delta.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/*
- * Copyright (C) 2005 Junio C Hamano
- */
-#ifndef COUNT_DELTA_H
-#define COUNT_DELTA_H
-
-int count_delta(void *, unsigned long,
- unsigned long *src_copied, unsigned long *literal_added);
-
-#endif
diff --git a/diff.c b/diff.c
index c0548ee..5eae094 100644
--- a/diff.c
+++ b/diff.c
@@ -8,8 +8,7 @@
#include "quote.h"
#include "diff.h"
#include "diffcore.h"
-
-static const char *diff_opts = "-pu";
+#include "xdiff/xdiff.h"
static int use_size_cache;
@@ -69,25 +68,10 @@ static const char *external_diff(void)
{
static const char *external_diff_cmd = NULL;
static int done_preparing = 0;
- const char *env_diff_opts;
if (done_preparing)
return external_diff_cmd;
-
- /*
- * Default values above are meant to match the
- * Linux kernel development style. Examples of
- * alternative styles you can specify via environment
- * variables are:
- *
- * GIT_DIFF_OPTS="-c";
- */
external_diff_cmd = getenv("GIT_EXTERNAL_DIFF");
-
- /* In case external diff fails... */
- env_diff_opts = getenv("GIT_DIFF_OPTS");
- if (env_diff_opts) diff_opts = env_diff_opts;
-
done_preparing = 1;
return external_diff_cmd;
}
@@ -101,13 +85,12 @@ static struct diff_tempfile {
char tmp_path[TEMPFILE_PATH_LEN];
} diff_temp[2];
-static int count_lines(const char *filename)
+static int count_lines(const char *data, int size)
{
- FILE *in;
int count, ch, completely_empty = 1, nl_just_seen = 0;
- in = fopen(filename, "r");
count = 0;
- while ((ch = fgetc(in)) != EOF)
+ while (0 < size--) {
+ ch = *data++;
if (ch == '\n') {
count++;
nl_just_seen = 1;
@@ -117,7 +100,7 @@ static int count_lines(const char *filename)
nl_just_seen = 0;
completely_empty = 0;
}
- fclose(in);
+ }
if (completely_empty)
return 0;
if (!nl_just_seen)
@@ -140,12 +123,11 @@ static void print_line_count(int count)
}
}
-static void copy_file(int prefix, const char *filename)
+static void copy_file(int prefix, const char *data, int size)
{
- FILE *in;
int ch, nl_just_seen = 1;
- in = fopen(filename, "r");
- while ((ch = fgetc(in)) != EOF) {
+ while (0 < size--) {
+ ch = *data++;
if (nl_just_seen)
putchar(prefix);
putchar(ch);
@@ -154,92 +136,106 @@ static void copy_file(int prefix, const char *filename)
else
nl_just_seen = 0;
}
- fclose(in);
if (!nl_just_seen)
printf("\n\\ No newline at end of file\n");
}
static void emit_rewrite_diff(const char *name_a,
const char *name_b,
- struct diff_tempfile *temp)
+ struct diff_filespec *one,
+ struct diff_filespec *two)
{
/* Use temp[i].name as input, name_a and name_b as labels */
int lc_a, lc_b;
- lc_a = count_lines(temp[0].name);
- lc_b = count_lines(temp[1].name);
+ lc_a = count_lines(one->data, one->size);
+ lc_b = count_lines(two->data, two->size);
printf("--- %s\n+++ %s\n@@ -", name_a, name_b);
print_line_count(lc_a);
printf(" +");
print_line_count(lc_b);
printf(" @@\n");
if (lc_a)
- copy_file('-', temp[0].name);
+ copy_file('-', one->data, one->size);
if (lc_b)
- copy_file('+', temp[1].name);
+ copy_file('+', two->data, two->size);
}
-static const char *builtin_diff(const char *name_a,
- const char *name_b,
- struct diff_tempfile *temp,
- const char *xfrm_msg,
- int complete_rewrite,
- const char **args)
+static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one)
{
- int i, next_at, cmd_size;
- const char *const diff_cmd = "diff -L%s -L%s";
- const char *const diff_arg = "-- %s %s||:"; /* "||:" is to return 0 */
- const char *input_name_sq[2];
- const char *label_path[2];
- char *cmd;
-
- /* diff_cmd and diff_arg have 4 %s in total which makes
- * the sum of these strings 8 bytes larger than required.
- * we use 2 spaces around diff-opts, and we need to count
- * terminating NUL; we used to subtract 5 here, but we do not
- * care about small leaks in this subprocess that is about
- * to exec "diff" anymore.
- */
- cmd_size = (strlen(diff_cmd) + strlen(diff_opts) + strlen(diff_arg)
- + 128);
-
- for (i = 0; i < 2; i++) {
- input_name_sq[i] = sq_quote(temp[i].name);
- if (!strcmp(temp[i].name, "/dev/null"))
- label_path[i] = "/dev/null";
- else if (!i)
- label_path[i] = sq_quote(quote_two("a/", name_a));
- else
- label_path[i] = sq_quote(quote_two("b/", name_b));
- cmd_size += (strlen(label_path[i]) + strlen(input_name_sq[i]));
+ if (!DIFF_FILE_VALID(one)) {
+ mf->ptr = ""; /* does not matter */
+ mf->size = 0;
+ return 0;
+ }
+ else if (diff_populate_filespec(one, 0))
+ return -1;
+ mf->ptr = one->data;
+ mf->size = one->size;
+ return 0;
+}
+
+struct emit_callback {
+ const char **label_path;
+};
+
+static int fn_out(void *priv, mmbuffer_t *mb, int nbuf)
+{
+ int i;
+ struct emit_callback *ecbdata = priv;
+
+ if (ecbdata->label_path[0]) {
+ printf("--- %s\n", ecbdata->label_path[0]);
+ printf("+++ %s\n", ecbdata->label_path[1]);
+ ecbdata->label_path[0] = ecbdata->label_path[1] = NULL;
}
+ for (i = 0; i < nbuf; i++)
+ if (!fwrite(mb[i].ptr, mb[i].size, 1, stdout))
+ return -1;
+ return 0;
+}
+
+#define FIRST_FEW_BYTES 8000
+static int mmfile_is_binary(mmfile_t *mf)
+{
+ long sz = mf->size;
+ if (FIRST_FEW_BYTES < sz)
+ sz = FIRST_FEW_BYTES;
+ if (memchr(mf->ptr, 0, sz))
+ return 1;
+ return 0;
+}
- cmd = xmalloc(cmd_size);
-
- next_at = 0;
- next_at += snprintf(cmd+next_at, cmd_size-next_at,
- diff_cmd, label_path[0], label_path[1]);
- next_at += snprintf(cmd+next_at, cmd_size-next_at,
- " %s ", diff_opts);
- next_at += snprintf(cmd+next_at, cmd_size-next_at,
- diff_arg, input_name_sq[0], input_name_sq[1]);
-
- printf("diff --git %s %s\n",
- quote_two("a/", name_a), quote_two("b/", name_b));
- if (label_path[0][0] == '/') {
- /* dev/null */
- printf("new file mode %s\n", temp[1].mode);
+static void builtin_diff(const char *name_a,
+ const char *name_b,
+ struct diff_filespec *one,
+ struct diff_filespec *two,
+ const char *xfrm_msg,
+ int complete_rewrite)
+{
+ mmfile_t mf1, mf2;
+ const char *lbl[2];
+ char *a_one, *b_two;
+
+ a_one = quote_two("a/", name_a);
+ b_two = quote_two("b/", name_b);
+ lbl[0] = DIFF_FILE_VALID(one) ? a_one : "/dev/null";
+ lbl[1] = DIFF_FILE_VALID(two) ? b_two : "/dev/null";
+ printf("diff --git %s %s\n", a_one, b_two);
+ if (lbl[0][0] == '/') {
+ /* /dev/null */
+ printf("new file mode %06o\n", two->mode);
if (xfrm_msg && xfrm_msg[0])
puts(xfrm_msg);
}
- else if (label_path[1][0] == '/') {
- printf("deleted file mode %s\n", temp[0].mode);
+ else if (lbl[1][0] == '/') {
+ printf("deleted file mode %06o\n", one->mode);
if (xfrm_msg && xfrm_msg[0])
puts(xfrm_msg);
}
else {
- if (strcmp(temp[0].mode, temp[1].mode)) {
- printf("old mode %s\n", temp[0].mode);
- printf("new mode %s\n", temp[1].mode);
+ if (one->mode != two->mode) {
+ printf("old mode %06o\n", one->mode);
+ printf("new mode %06o\n", two->mode);
}
if (xfrm_msg && xfrm_msg[0])
puts(xfrm_msg);
@@ -247,20 +243,45 @@ static const char *builtin_diff(const char *name_a,
* we do not run diff between different kind
* of objects.
*/
- if (strncmp(temp[0].mode, temp[1].mode, 3))
- return NULL;
+ if ((one->mode ^ two->mode) & S_IFMT)
+ goto free_ab_and_return;
if (complete_rewrite) {
- emit_rewrite_diff(name_a, name_b, temp);
- return NULL;
+ emit_rewrite_diff(name_a, name_b, one, two);
+ goto free_ab_and_return;
}
}
- /* This is disgusting */
- *args++ = "sh";
- *args++ = "-c";
- *args++ = cmd;
- *args = NULL;
- return "/bin/sh";
+ if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
+ die("unable to read files to diff");
+
+ if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2))
+ printf("Binary files %s and %s differ\n", lbl[0], lbl[1]);
+ else {
+ /* Crazy xdl interfaces.. */
+ const char *diffopts = getenv("GIT_DIFF_OPTS");
+ xpparam_t xpp;
+ xdemitconf_t xecfg;
+ xdemitcb_t ecb;
+ struct emit_callback ecbdata;
+
+ ecbdata.label_path = lbl;
+ xpp.flags = XDF_NEED_MINIMAL;
+ xecfg.ctxlen = 3;
+ if (!diffopts)
+ ;
+ else if (!strncmp(diffopts, "--unified=", 10))
+ xecfg.ctxlen = strtoul(diffopts + 10, NULL, 10);
+ else if (!strncmp(diffopts, "-u", 2))
+ xecfg.ctxlen = strtoul(diffopts + 2, NULL, 10);
+ ecb.outf = fn_out;
+ ecb.priv = &ecbdata;
+ xdl_diff(&mf1, &mf2, &xpp, &xecfg, &ecb);
+ }
+
+ free_ab_and_return:
+ free(a_one);
+ free(b_two);
+ return;
}
struct diff_filespec *alloc_filespec(const char *path)
@@ -463,6 +484,8 @@ void diff_free_filespec_data(struct diff_filespec *s)
munmap(s->data, s->size);
s->should_free = s->should_munmap = 0;
s->data = NULL;
+ free(s->cnt_data);
+ s->cnt_data = NULL;
}
static void prep_temp_blob(struct diff_tempfile *temp,
@@ -618,6 +641,7 @@ static void run_external_diff(const char *pgm,
int retval;
static int atexit_asked = 0;
const char *othername;
+ const char **arg = &spawn_arg[0];
othername = (other? other : name);
if (one && two) {
@@ -632,36 +656,25 @@ static void run_external_diff(const char *pgm,
signal(SIGINT, remove_tempfile_on_signal);
}
- if (pgm) {
- const char **arg = &spawn_arg[0];
- if (one && two) {
- *arg++ = pgm;
- *arg++ = name;
- *arg++ = temp[0].name;
- *arg++ = temp[0].hex;
- *arg++ = temp[0].mode;
- *arg++ = temp[1].name;
- *arg++ = temp[1].hex;
- *arg++ = temp[1].mode;
- if (other) {
- *arg++ = other;
- *arg++ = xfrm_msg;
- }
- } else {
- *arg++ = pgm;
- *arg++ = name;
+ if (one && two) {
+ *arg++ = pgm;
+ *arg++ = name;
+ *arg++ = temp[0].name;
+ *arg++ = temp[0].hex;
+ *arg++ = temp[0].mode;
+ *arg++ = temp[1].name;
+ *arg++ = temp[1].hex;
+ *arg++ = temp[1].mode;
+ if (other) {
+ *arg++ = other;
+ *arg++ = xfrm_msg;
}
- *arg = NULL;
} else {
- if (one && two) {
- pgm = builtin_diff(name, othername, temp, xfrm_msg, complete_rewrite, spawn_arg);
- } else
- printf("* Unmerged path %s\n", name);
+ *arg++ = pgm;
+ *arg++ = name;
}
-
- retval = 0;
- if (pgm)
- retval = spawn_prog(pgm, spawn_arg);
+ *arg = NULL;
+ retval = spawn_prog(pgm, spawn_arg);
remove_tempfile();
if (retval) {
fprintf(stderr, "external diff died, stopping at %s.\n", name);
@@ -669,6 +682,26 @@ static void run_external_diff(const char *pgm,
}
}
+static void run_diff_cmd(const char *pgm,
+ const char *name,
+ const char *other,
+ struct diff_filespec *one,
+ struct diff_filespec *two,
+ const char *xfrm_msg,
+ int complete_rewrite)
+{
+ if (pgm) {
+ run_external_diff(pgm, name, other, one, two, xfrm_msg,
+ complete_rewrite);
+ return;
+ }
+ if (one && two)
+ builtin_diff(name, other ? other : name,
+ one, two, xfrm_msg, complete_rewrite);
+ else
+ printf("* Unmerged path %s\n", name);
+}
+
static void diff_fill_sha1_info(struct diff_filespec *one)
{
if (DIFF_FILE_VALID(one)) {
@@ -698,8 +731,7 @@ static void run_diff(struct diff_filepair *p, struct diff_options *o)
if (DIFF_PAIR_UNMERGED(p)) {
/* unmerged */
- run_external_diff(pgm, p->one->path, NULL, NULL, NULL, NULL,
- 0);
+ run_diff_cmd(pgm, p->one->path, NULL, NULL, NULL, NULL, 0);
return;
}
@@ -771,15 +803,15 @@ static void run_diff(struct diff_filepair *p, struct diff_options *o)
* needs to be split into deletion and creation.
*/
struct diff_filespec *null = alloc_filespec(two->path);
- run_external_diff(NULL, name, other, one, null, xfrm_msg, 0);
+ run_diff_cmd(NULL, name, other, one, null, xfrm_msg, 0);
free(null);
null = alloc_filespec(one->path);
- run_external_diff(NULL, name, other, null, two, xfrm_msg, 0);
+ run_diff_cmd(NULL, name, other, null, two, xfrm_msg, 0);
free(null);
}
else
- run_external_diff(pgm, name, other, one, two, xfrm_msg,
- complete_rewrite);
+ run_diff_cmd(pgm, name, other, one, two, xfrm_msg,
+ complete_rewrite);
free(name_munged);
free(other_munged);
diff --git a/diffcore-break.c b/diffcore-break.c
index 0fc2b86..ed0e14c 100644
--- a/diffcore-break.c
+++ b/diffcore-break.c
@@ -45,8 +45,8 @@ static int should_break(struct diff_filespec *src,
* The value we return is 1 if we want the pair to be broken,
* or 0 if we do not.
*/
- unsigned long delta_size, base_size, src_copied, literal_added;
- int to_break = 0;
+ unsigned long delta_size, base_size, src_copied, literal_added,
+ src_removed;
*merge_score_p = 0; /* assume no deletion --- "do not break"
* is the default.
@@ -68,37 +68,45 @@ static int should_break(struct diff_filespec *src,
if (diffcore_count_changes(src->data, src->size,
dst->data, dst->size,
+ NULL, NULL,
0,
&src_copied, &literal_added))
return 0;
+ /* sanity */
+ if (src->size < src_copied)
+ src_copied = src->size;
+ if (dst->size < literal_added + src_copied) {
+ if (src_copied < dst->size)
+ literal_added = dst->size - src_copied;
+ else
+ literal_added = 0;
+ }
+ src_removed = src->size - src_copied;
+
/* Compute merge-score, which is "how much is removed
* from the source material". The clean-up stage will
* merge the surviving pair together if the score is
* less than the minimum, after rename/copy runs.
*/
- if (src->size <= src_copied)
- ; /* all copied, nothing removed */
- else {
- delta_size = src->size - src_copied;
- *merge_score_p = delta_size * MAX_SCORE / src->size;
- }
-
+ *merge_score_p = src_removed * MAX_SCORE / src->size;
+
/* Extent of damage, which counts both inserts and
* deletes.
*/
- if (src->size + literal_added <= src_copied)
- delta_size = 0; /* avoid wrapping around */
- else
- delta_size = (src->size - src_copied) + literal_added;
-
- /* We break if the edit exceeds the minimum.
- * i.e. (break_score / MAX_SCORE < delta_size / base_size)
+ delta_size = src_removed + literal_added;
+ if (delta_size * MAX_SCORE / base_size < break_score)
+ return 0;
+
+ /* If you removed a lot without adding new material, that is
+ * not really a rewrite.
*/
- if (break_score * base_size < delta_size * MAX_SCORE)
- to_break = 1;
+ if ((src->size * break_score < src_removed * MAX_SCORE) &&
+ (literal_added * 20 < src_removed) &&
+ (literal_added * 20 < src_copied))
+ return 0;
- return to_break;
+ return 1;
}
void diffcore_break(int break_score)
diff --git a/diffcore-delta.c b/diffcore-delta.c
index 1e6a691..7338a40 100644
--- a/diffcore-delta.c
+++ b/diffcore-delta.c
@@ -1,43 +1,213 @@
#include "cache.h"
#include "diff.h"
#include "diffcore.h"
-#include "delta.h"
-#include "count-delta.h"
-
-static int diffcore_count_changes_1(void *src, unsigned long src_size,
- void *dst, unsigned long dst_size,
- unsigned long delta_limit,
- unsigned long *src_copied,
- unsigned long *literal_added)
+
+/*
+ * Idea here is very simple.
+ *
+ * We have total of (sz-N+1) N-byte overlapping sequences in buf whose
+ * size is sz. If the same N-byte sequence appears in both source and
+ * destination, we say the byte that starts that sequence is shared
+ * between them (i.e. copied from source to destination).
+ *
+ * For each possible N-byte sequence, if the source buffer has more
+ * instances of it than the destination buffer, that means the
+ * difference are the number of bytes not copied from source to
+ * destination. If the counts are the same, everything was copied
+ * from source to destination. If the destination has more,
+ * everything was copied, and destination added more.
+ *
+ * We are doing an approximation so we do not really have to waste
+ * memory by actually storing the sequence. We just hash them into
+ * somewhere around 2^16 hashbuckets and count the occurrences.
+ *
+ * The length of the sequence is arbitrarily set to 8 for now.
+ */
+
+/* Wild guess at the initial hash size */
+#define INITIAL_HASH_SIZE 9
+
+/* We leave more room in smaller hash but do not let it
+ * grow to have unused hole too much.
+ */
+#define INITIAL_FREE(sz_log2) ((1<<(sz_log2))*(sz_log2-3)/(sz_log2))
+
+/* A prime rather carefully chosen between 2^16..2^17, so that
+ * HASHBASE < INITIAL_FREE(17). We want to keep the maximum hashtable
+ * size under the current 2<<17 maximum, which can hold this many
+ * different values before overflowing to hashtable of size 2<<18.
+ */
+#define HASHBASE 107927
+
+struct spanhash {
+ unsigned int hashval;
+ unsigned int cnt;
+};
+struct spanhash_top {
+ int alloc_log2;
+ int free;
+ struct spanhash data[FLEX_ARRAY];
+};
+
+static struct spanhash *spanhash_find(struct spanhash_top *top,
+ unsigned int hashval)
{
- void *delta;
- unsigned long delta_size;
-
- delta = diff_delta(src, src_size,
- dst, dst_size,
- &delta_size, delta_limit);
- if (!delta)
- /* If delta_limit is exceeded, we have too much differences */
- return -1;
-
- /* Estimate the edit size by interpreting delta. */
- if (count_delta(delta, delta_size, src_copied, literal_added)) {
- free(delta);
- return -1;
+ int sz = 1 << top->alloc_log2;
+ int bucket = hashval & (sz - 1);
+ while (1) {
+ struct spanhash *h = &(top->data[bucket++]);
+ if (!h->cnt)
+ return NULL;
+ if (h->hashval == hashval)
+ return h;
+ if (sz <= bucket)
+ bucket = 0;
}
- free(delta);
- return 0;
+}
+
+static struct spanhash_top *spanhash_rehash(struct spanhash_top *orig)
+{
+ struct spanhash_top *new;
+ int i;
+ int osz = 1 << orig->alloc_log2;
+ int sz = osz << 1;
+
+ new = xmalloc(sizeof(*orig) + sizeof(struct spanhash) * sz);
+ new->alloc_log2 = orig->alloc_log2 + 1;
+ new->free = INITIAL_FREE(new->alloc_log2);
+ memset(new->data, 0, sizeof(struct spanhash) * sz);
+ for (i = 0; i < osz; i++) {
+ struct spanhash *o = &(orig->data[i]);
+ int bucket;
+ if (!o->cnt)
+ continue;
+ bucket = o->hashval & (sz - 1);
+ while (1) {
+ struct spanhash *h = &(new->data[bucket++]);
+ if (!h->cnt) {
+ h->hashval = o->hashval;
+ h->cnt = o->cnt;
+ new->free--;
+ break;
+ }
+ if (sz <= bucket)
+ bucket = 0;
+ }
+ }
+ free(orig);
+ return new;
+}
+
+static struct spanhash_top *add_spanhash(struct spanhash_top *top,
+ unsigned int hashval, int cnt)
+{
+ int bucket, lim;
+ struct spanhash *h;
+
+ lim = (1 << top->alloc_log2);
+ bucket = hashval & (lim - 1);
+ while (1) {
+ h = &(top->data[bucket++]);
+ if (!h->cnt) {
+ h->hashval = hashval;
+ h->cnt = cnt;
+ top->free--;
+ if (top->free < 0)
+ return spanhash_rehash(top);
+ return top;
+ }
+ if (h->hashval == hashval) {
+ h->cnt += cnt;
+ return top;
+ }
+ if (lim <= bucket)
+ bucket = 0;
+ }
+}
+
+static struct spanhash_top *hash_chars(unsigned char *buf, unsigned int sz)
+{
+ int i, n;
+ unsigned int accum1, accum2, hashval;
+ struct spanhash_top *hash;
+
+ i = INITIAL_HASH_SIZE;
+ hash = xmalloc(sizeof(*hash) + sizeof(struct spanhash) * (1<<i));
+ hash->alloc_log2 = i;
+ hash->free = INITIAL_FREE(i);
+ memset(hash->data, 0, sizeof(struct spanhash) * (1<<i));
+
+ n = 0;
+ accum1 = accum2 = 0;
+ while (sz) {
+ unsigned int c = *buf++;
+ unsigned int old_1 = accum1;
+ sz--;
+ accum1 = (accum1 << 7) ^ (accum2 >> 25);
+ accum2 = (accum2 << 7) ^ (old_1 >> 25);
+ accum1 += c;
+ if (++n < 64 && c != '\n')
+ continue;
+ hashval = (accum1 + accum2 * 0x61) % HASHBASE;
+ hash = add_spanhash(hash, hashval, n);
+ n = 0;
+ accum1 = accum2 = 0;
+ }
+ return hash;
}
int diffcore_count_changes(void *src, unsigned long src_size,
void *dst, unsigned long dst_size,
+ void **src_count_p,
+ void **dst_count_p,
unsigned long delta_limit,
unsigned long *src_copied,
unsigned long *literal_added)
{
- return diffcore_count_changes_1(src, src_size,
- dst, dst_size,
- delta_limit,
- src_copied,
- literal_added);
+ int i, ssz;
+ struct spanhash_top *src_count, *dst_count;
+ unsigned long sc, la;
+
+ src_count = dst_count = NULL;
+ if (src_count_p)
+ src_count = *src_count_p;
+ if (!src_count) {
+ src_count = hash_chars(src, src_size);
+ if (src_count_p)
+ *src_count_p = src_count;
+ }
+ if (dst_count_p)
+ dst_count = *dst_count_p;
+ if (!dst_count) {
+ dst_count = hash_chars(dst, dst_size);
+ if (dst_count_p)
+ *dst_count_p = dst_count;
+ }
+ sc = la = 0;
+
+ ssz = 1 << src_count->alloc_log2;
+ for (i = 0; i < ssz; i++) {
+ struct spanhash *s = &(src_count->data[i]);
+ struct spanhash *d;
+ unsigned dst_cnt, src_cnt;
+ if (!s->cnt)
+ continue;
+ src_cnt = s->cnt;
+ d = spanhash_find(dst_count, s->hashval);
+ dst_cnt = d ? d->cnt : 0;
+ if (src_cnt < dst_cnt) {
+ la += dst_cnt - src_cnt;
+ sc += src_cnt;
+ }
+ else
+ sc += dst_cnt;
+ }
+
+ if (!src_count_p)
+ free(src_count);
+ if (!dst_count_p)
+ free(dst_count);
+ *src_copied = sc;
+ *literal_added = la;
+ return 0;
}
diff --git a/diffcore-rename.c b/diffcore-rename.c
index 55cf1c3..e992698 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -133,7 +133,7 @@ static int estimate_similarity(struct diff_filespec *src,
* match than anything else; the destination does not even
* call into this function in that case.
*/
- unsigned long delta_size, base_size, src_copied, literal_added;
+ unsigned long max_size, delta_size, base_size, src_copied, literal_added;
unsigned long delta_limit;
int score;
@@ -144,9 +144,9 @@ static int estimate_similarity(struct diff_filespec *src,
if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
return 0;
- delta_size = ((src->size < dst->size) ?
- (dst->size - src->size) : (src->size - dst->size));
+ max_size = ((src->size > dst->size) ? src->size : dst->size);
base_size = ((src->size < dst->size) ? src->size : dst->size);
+ delta_size = max_size - base_size;
/* We would not consider edits that change the file size so
* drastically. delta_size must be smaller than
@@ -166,23 +166,18 @@ static int estimate_similarity(struct diff_filespec *src,
delta_limit = base_size * (MAX_SCORE-minimum_score) / MAX_SCORE;
if (diffcore_count_changes(src->data, src->size,
dst->data, dst->size,
+ &src->cnt_data, &dst->cnt_data,
delta_limit,
&src_copied, &literal_added))
return 0;
- /* Extent of damage */
- if (src->size + literal_added < src_copied)
- delta_size = 0;
- else
- delta_size = (src->size - src_copied) + literal_added;
-
- /*
- * Now we will give some score to it. 100% edit gets 0 points
- * and 0% edit gets MAX_SCORE points.
+ /* How similar are they?
+ * what percentage of material in dst are from source?
*/
- score = MAX_SCORE - (MAX_SCORE * delta_size / base_size);
- if (score < 0) return 0;
- if (MAX_SCORE < score) return MAX_SCORE;
+ if (!dst->size)
+ score = 0; /* should not happen */
+ else
+ score = src_copied * MAX_SCORE / max_size;
return score;
}
@@ -310,6 +305,8 @@ void diffcore_rename(struct diff_options *options)
m->score = estimate_similarity(one, two,
minimum_score);
}
+ /* We do not need the text anymore */
+ diff_free_filespec_data(two);
dst_cnt++;
}
/* cost matrix sorted by most to least similar pair */
diff --git a/diffcore.h b/diffcore.h
index dba4f17..73c7842 100644
--- a/diffcore.h
+++ b/diffcore.h
@@ -17,8 +17,8 @@
*/
#define MAX_SCORE 60000.0
#define DEFAULT_RENAME_SCORE 30000 /* rename/copy similarity minimum (50%) */
-#define DEFAULT_BREAK_SCORE 30000 /* minimum for break to happen (50%)*/
-#define DEFAULT_MERGE_SCORE 48000 /* maximum for break-merge to happen (80%)*/
+#define DEFAULT_BREAK_SCORE 30000 /* minimum for break to happen (50%) */
+#define DEFAULT_MERGE_SCORE 36000 /* maximum for break-merge to happen 60%) */
#define MINIMUM_BREAK_SIZE 400 /* do not break a file smaller than this */
@@ -26,6 +26,7 @@ struct diff_filespec {
unsigned char sha1[20];
char *path;
void *data;
+ void *cnt_data;
unsigned long size;
int xfrm_flags; /* for use by the xfrm */
unsigned short mode; /* file mode */
@@ -103,6 +104,8 @@ void diff_debug_queue(const char *, struct diff_queue_struct *);
extern int diffcore_count_changes(void *src, unsigned long src_size,
void *dst, unsigned long dst_size,
+ void **src_count_p,
+ void **dst_count_p,
unsigned long delta_limit,
unsigned long *src_copied,
unsigned long *literal_added);
diff --git a/git-push.sh b/git-push.sh
index 73dcf06..f10cadb 100755
--- a/git-push.sh
+++ b/git-push.sh
@@ -8,7 +8,7 @@ USAGE='[--all] [--tags] [--force] <repository> [<refspec>...]'
has_all=
has_force=
has_exec=
-has_thin=
+has_thin=--thin
remote=
do_tags=
@@ -24,7 +24,9 @@ do
--exec=*)
has_exec="$1" ;;
--thin)
- has_thin="$1" ;;
+ ;; # noop
+ --no-thin)
+ has_thin= ;;
-*)
usage ;;
*)
diff --git a/git-send-email.perl b/git-send-email.perl
index b220d11..ecfa347 100755
--- a/git-send-email.perl
+++ b/git-send-email.perl
@@ -19,10 +19,16 @@
use strict;
use warnings;
use Term::ReadLine;
-use Mail::Sendmail qw(sendmail %mailcfg);
use Getopt::Long;
use Data::Dumper;
-use Email::Valid;
+use Net::SMTP;
+
+# most mail servers generate the Date: header, but not all...
+$ENV{LC_ALL} = 'C';
+use POSIX qw/strftime/;
+
+my $have_email_valid = eval { require Email::Valid; 1 };
+my $smtp;
sub unique_email_list(@);
sub cleanup_compose_files();
@@ -31,7 +37,7 @@ sub cleanup_compose_files();
my $compose_filename = ".msg.$$";
# Variables we fill in automatically, or via prompting:
-my (@to,@cc,@initial_cc,$initial_reply_to,$initial_subject,@files,$from,$compose);
+my (@to,@cc,@initial_cc,$initial_reply_to,$initial_subject,@files,$from,$compose,$time);
# Behavior modification variables
my ($chain_reply_to, $smtp_server, $quiet, $suppress_from, $no_signed_off_cc) = (1, "localhost", 0, 0, 0);
@@ -244,6 +250,16 @@ EOT
# Variables we set as part of the loop over files
our ($message_id, $cc, %mail, $subject, $reply_to, $message);
+sub extract_valid_address {
+ my $address = shift;
+ if ($have_email_valid) {
+ return Email::Valid->address($address);
+ } else {
+ # less robust/correct than the monster regexp in Email::Valid,
+ # but still does a 99% job, and one less dependency
+ return ($address =~ /([^\"<>\s]+@[^<>\s]+)/);
+ }
+}
# Usually don't need to change anything below here.
@@ -253,13 +269,12 @@ our ($message_id, $cc, %mail, $subject, $reply_to, $message);
# 1 second since the last time we were called.
# We'll setup a template for the message id, using the "from" address:
-my $message_id_from = Email::Valid->address($from);
+my $message_id_from = extract_valid_address($from);
my $message_id_template = "<%s-git-send-email-$message_id_from>";
sub make_message_id
{
- my $date = `date "+\%s"`;
- chomp($date);
+ my $date = time;
my $pseudo_rand = int (rand(4200));
$message_id = sprintf $message_id_template, "$date$pseudo_rand";
#print "new message id = $message_id\n"; # Was useful for debugging
@@ -268,38 +283,49 @@ sub make_message_id
$cc = "";
+$time = time - scalar $#files;
sub send_message
{
- my $to = join (", ", unique_email_list(@to));
-
- %mail = ( To => $to,
- From => $from,
- CC => $cc,
- Subject => $subject,
- Message => $message,
- 'Reply-to' => $from,
- 'In-Reply-To' => $reply_to,
- 'Message-ID' => $message_id,
- 'X-Mailer' => "git-send-email",
- );
-
- $mail{smtp} = $smtp_server;
- $mailcfg{mime} = 0;
-
- #print Data::Dumper->Dump([\%mail],[qw(*mail)]);
-
- sendmail(%mail) or die $Mail::Sendmail::error;
+ my @recipients = unique_email_list(@to);
+ my $to = join (",\n\t", @recipients);
+ @recipients = unique_email_list(@recipients,@cc);
+ my $date = strftime('%a, %d %b %Y %H:%M:%S %z', localtime($time++));
+
+ my $header = "From: $from
+To: $to
+Cc: $cc
+Subject: $subject
+Reply-To: $from
+Date: $date
+Message-Id: $message_id
+X-Mailer: git-send-email @@GIT_VERSION@@
+";
+ $header .= "In-Reply-To: $reply_to\n" if $reply_to;
+
+ $smtp ||= Net::SMTP->new( $smtp_server );
+ $smtp->mail( $from ) or die $smtp->message;
+ $smtp->to( @recipients ) or die $smtp->message;
+ $smtp->data or die $smtp->message;
+ $smtp->datasend("$header\n$message") or die $smtp->message;
+ $smtp->dataend() or die $smtp->message;
+ $smtp->ok or die "Failed to send $subject\n".$smtp->message;
if ($quiet) {
printf "Sent %s\n", $subject;
} else {
- print "OK. Log says:\n", $Mail::Sendmail::log;
- print "\n\n"
+ print "OK. Log says:
+Date: $date
+Server: $smtp_server Port: 25
+From: $from
+Subject: $subject
+Cc: $cc
+To: $to
+
+Result: ", $smtp->code, ' ', ($smtp->message =~ /\n([^\n]+\n)$/s), "\n";
}
}
-
$reply_to = $initial_reply_to;
make_message_id();
$subject = $initial_subject;
@@ -390,14 +416,14 @@ sub cleanup_compose_files() {
}
-
+$smtp->quit if $smtp;
sub unique_email_list(@) {
my %seen;
my @emails;
foreach my $entry (@_) {
- my $clean = Email::Valid->address($entry);
+ my $clean = extract_valid_address($entry);
next if $seen{$clean}++;
push @emails, $entry;
}
diff --git a/ls-files.c b/ls-files.c
index 83b0a3b..4a4af1c 100644
--- a/ls-files.c
+++ b/ls-files.c
@@ -11,6 +11,7 @@
#include "cache.h"
#include "quote.h"
+static int abbrev = 0;
static int show_deleted = 0;
static int show_cached = 0;
static int show_others = 0;
@@ -502,7 +503,8 @@ static void show_ce_entry(const char *tag, struct cache_entry *ce)
printf("%s%06o %s %d\t",
tag,
ntohl(ce->ce_mode),
- sha1_to_hex(ce->sha1),
+ abbrev ? find_unique_abbrev(ce->sha1,abbrev)
+ : sha1_to_hex(ce->sha1),
ce_stage(ce));
write_name_quoted("", 0, ce->name + offset,
line_terminator, stdout);
@@ -643,7 +645,8 @@ static void verify_pathspec(void)
static const char ls_files_usage[] =
"git-ls-files [-z] [-t] [-v] (--[cached|deleted|others|stage|unmerged|killed|modified])* "
"[ --ignored ] [--exclude=<pattern>] [--exclude-from=<file>] "
- "[ --exclude-per-directory=<filename> ] [--full-name] [--] [<file>]*";
+ "[ --exclude-per-directory=<filename> ] [--full-name] [--abbrev] "
+ "[--] [<file>]*";
int main(int argc, const char **argv)
{
@@ -754,6 +757,18 @@ int main(int argc, const char **argv)
error_unmatch = 1;
continue;
}
+ if (!strncmp(arg, "--abbrev=", 9)) {
+ abbrev = strtoul(arg+9, NULL, 10);
+ if (abbrev && abbrev < MINIMUM_ABBREV)
+ abbrev = MINIMUM_ABBREV;
+ else if (abbrev > 40)
+ abbrev = 40;
+ continue;
+ }
+ if (!strcmp(arg, "--abbrev")) {
+ abbrev = DEFAULT_ABBREV;
+ continue;
+ }
if (*arg == '-')
usage(ls_files_usage);
break;
diff --git a/ls-tree.c b/ls-tree.c
index 58663ff..26258c3 100644
--- a/ls-tree.c
+++ b/ls-tree.c
@@ -13,13 +13,14 @@ static int line_termination = '\n';
#define LS_TREE_ONLY 2
#define LS_SHOW_TREES 4
#define LS_NAME_ONLY 8
+static int abbrev = 0;
static int ls_options = 0;
const char **pathspec;
static int chomp_prefix = 0;
static const char *prefix;
static const char ls_tree_usage[] =
- "git-ls-tree [-d] [-r] [-t] [-z] [--name-only] [--name-status] [--full-name] <tree-ish> [path...]";
+ "git-ls-tree [-d] [-r] [-t] [-z] [--name-only] [--name-status] [--full-name] [--abbrev[=<n>]] <tree-ish> [path...]";
static int show_recursive(const char *base, int baselen, const char *pathname)
{
@@ -73,7 +74,9 @@ static int show_tree(unsigned char *sha1, const char *base, int baselen,
return 0;
if (!(ls_options & LS_NAME_ONLY))
- printf("%06o %s %s\t", mode, type, sha1_to_hex(sha1));
+ printf("%06o %s %s\t", mode, type,
+ abbrev ? find_unique_abbrev(sha1,abbrev)
+ : sha1_to_hex(sha1));
write_name_quoted(base + chomp_prefix, baselen - chomp_prefix,
pathname,
line_termination, stdout);
@@ -114,6 +117,18 @@ int main(int argc, const char **argv)
chomp_prefix = 0;
break;
}
+ if (!strncmp(argv[1]+2, "abbrev=",7)) {
+ abbrev = strtoul(argv[1]+9, NULL, 10);
+ if (abbrev && abbrev < MINIMUM_ABBREV)
+ abbrev = MINIMUM_ABBREV;
+ else if (abbrev > 40)
+ abbrev = 40;
+ break;
+ }
+ if (!strcmp(argv[1]+2, "abbrev")) {
+ abbrev = DEFAULT_ABBREV;
+ break;
+ }
/* otherwise fallthru */
default:
usage(ls_tree_usage);
diff --git a/rev-list.c b/rev-list.c
index 812d237..441c437 100644
--- a/rev-list.c
+++ b/rev-list.c
@@ -40,13 +40,18 @@ static int bisect_list = 0;
static int verbose_header = 0;
static int abbrev = DEFAULT_ABBREV;
static int show_parents = 0;
+static int show_timestamp = 0;
static int hdr_termination = 0;
static const char *commit_prefix = "";
static enum cmit_fmt commit_format = CMIT_FMT_RAW;
static void show_commit(struct commit *commit)
{
- printf("%s%s", commit_prefix, sha1_to_hex(commit->object.sha1));
+ if (show_timestamp)
+ printf("%lu ", commit->date);
+ if (commit_prefix[0])
+ fputs(commit_prefix, stdout);
+ fputs(sha1_to_hex(commit->object.sha1), stdout);
if (show_parents) {
struct commit_list *parents = commit->parents;
while (parents) {
@@ -335,6 +340,10 @@ int main(int argc, const char **argv)
show_parents = 1;
continue;
}
+ if (!strcmp(arg, "--timestamp")) {
+ show_timestamp = 1;
+ continue;
+ }
if (!strcmp(arg, "--bisect")) {
bisect_list = 1;
continue;
diff --git a/t/t4001-diff-rename.sh b/t/t4001-diff-rename.sh
index 2e3c20d..08c1131 100755
--- a/t/t4001-diff-rename.sh
+++ b/t/t4001-diff-rename.sh
@@ -49,7 +49,7 @@ rename from path0
rename to path1
--- a/path0
+++ b/path1
-@@ -8,7 +8,7 @@ Line 7
+@@ -8,7 +8,7 @@
Line 8
Line 9
Line 10
diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh
index adc5e93..278eb66 100755
--- a/t/t5000-tar-tree.sh
+++ b/t/t5000-tar-tree.sh
@@ -34,6 +34,9 @@ test_expect_success \
mkdir a/bin &&
cp /bin/sh a/bin &&
ln -s a a/l1 &&
+ (p=long_path_to_a_file && cd a &&
+ for depth in 1 2 3 4 5; do mkdir $p && cd $p; done &&
+ echo text >file_with_long_path) &&
(cd a && find .) | sort >a.lst'
test_expect_success \
diff --git a/tar-tree.c b/tar-tree.c
index 92035f5..8d9e31c 100644
--- a/tar-tree.c
+++ b/tar-tree.c
@@ -1,37 +1,23 @@
/*
- * Copyright (c) 2005 Rene Scharfe
+ * Copyright (c) 2005, 2006 Rene Scharfe
*/
#include <time.h>
#include "cache.h"
#include "diff.h"
#include "commit.h"
+#include "strbuf.h"
+#include "tar.h"
#define RECORDSIZE (512)
#define BLOCKSIZE (RECORDSIZE * 20)
-#define TYPEFLAG_AUTO '\0'
-#define TYPEFLAG_REG '0'
-#define TYPEFLAG_LNK '2'
-#define TYPEFLAG_DIR '5'
-#define TYPEFLAG_GLOBAL_HEADER 'g'
-#define TYPEFLAG_EXT_HEADER 'x'
-
-#define EXT_HEADER_PATH 1
-#define EXT_HEADER_LINKPATH 2
-
static const char tar_tree_usage[] = "git-tar-tree <key> [basedir]";
static char block[BLOCKSIZE];
static unsigned long offset;
-static const char *basedir;
static time_t archive_time;
-struct path_prefix {
- struct path_prefix *prev;
- const char *name;
-};
-
/* tries hard to write, either succeeds or dies in the attempt */
static void reliable_write(void *buf, unsigned long size)
{
@@ -119,230 +105,170 @@ static void write_blocked(void *buf, unsigned long size)
write_if_needed();
}
-static void append_string(char **p, const char *s)
-{
- unsigned int len = strlen(s);
- memcpy(*p, s, len);
- *p += len;
-}
-
-static void append_char(char **p, char c)
-{
- **p = c;
- *p += 1;
-}
-
-static void append_path_prefix(char **buffer, struct path_prefix *prefix)
+static void strbuf_append_string(struct strbuf *sb, const char *s)
{
- if (!prefix)
- return;
- append_path_prefix(buffer, prefix->prev);
- append_string(buffer, prefix->name);
- append_char(buffer, '/');
-}
-
-static unsigned int path_prefix_len(struct path_prefix *prefix)
-{
- if (!prefix)
- return 0;
- return path_prefix_len(prefix->prev) + strlen(prefix->name) + 1;
-}
-
-static void append_path(char **p, int is_dir, const char *basepath,
- struct path_prefix *prefix, const char *path)
-{
- if (basepath) {
- append_string(p, basepath);
- append_char(p, '/');
+ int slen = strlen(s);
+ int total = sb->len + slen;
+ if (total > sb->alloc) {
+ sb->buf = xrealloc(sb->buf, total);
+ sb->alloc = total;
}
- append_path_prefix(p, prefix);
- append_string(p, path);
- if (is_dir)
- append_char(p, '/');
+ memcpy(sb->buf + sb->len, s, slen);
+ sb->len = total;
}
-static unsigned int path_len(int is_dir, const char *basepath,
- struct path_prefix *prefix, const char *path)
-{
- unsigned int len = 0;
- if (basepath)
- len += strlen(basepath) + 1;
- len += path_prefix_len(prefix) + strlen(path);
- if (is_dir)
- len++;
- return len;
-}
-
-static void append_extended_header_prefix(char **p, unsigned int size,
- const char *keyword)
+/*
+ * pax extended header records have the format "%u %s=%s\n". %u contains
+ * the size of the whole string (including the %u), the first %s is the
+ * keyword, the second one is the value. This function constructs such a
+ * string and appends it to a struct strbuf.
+ */
+static void strbuf_append_ext_header(struct strbuf *sb, const char *keyword,
+ const char *value, unsigned int valuelen)
{
- int len = sprintf(*p, "%u %s=", size, keyword);
- *p += len;
-}
+ char *p;
+ int len, total, tmp;
-static unsigned int extended_header_len(const char *keyword,
- unsigned int valuelen)
-{
/* "%u %s=%s\n" */
- unsigned int len = 1 + 1 + strlen(keyword) + 1 + valuelen + 1;
- if (len > 9)
- len++;
- if (len > 99)
+ len = 1 + 1 + strlen(keyword) + 1 + valuelen + 1;
+ for (tmp = len; tmp > 9; tmp /= 10)
len++;
- return len;
-}
-static void append_extended_header(char **p, const char *keyword,
- const char *value, unsigned int len)
-{
- unsigned int size = extended_header_len(keyword, len);
- append_extended_header_prefix(p, size, keyword);
- memcpy(*p, value, len);
- *p += len;
- append_char(p, '\n');
-}
+ total = sb->len + len;
+ if (total > sb->alloc) {
+ sb->buf = xrealloc(sb->buf, total);
+ sb->alloc = total;
+ }
-static void write_header(const unsigned char *, char, const char *, struct path_prefix *,
- const char *, unsigned int, void *, unsigned long);
+ p = sb->buf;
+ p += sprintf(p, "%u %s=", len, keyword);
+ memcpy(p, value, valuelen);
+ p += valuelen;
+ *p = '\n';
+ sb->len = total;
+}
-/* stores a pax extended header directly in the block buffer */
-static void write_extended_header(const char *headerfilename, int is_dir,
- unsigned int flags, const char *basepath,
- struct path_prefix *prefix,
- const char *path, unsigned int namelen,
- void *content, unsigned int contentsize)
+static unsigned int ustar_header_chksum(const struct ustar_header *header)
{
- char *buffer, *p;
- unsigned int pathlen, size, linkpathlen = 0;
-
- size = pathlen = extended_header_len("path", namelen);
- if (flags & EXT_HEADER_LINKPATH) {
- linkpathlen = extended_header_len("linkpath", contentsize);
- size += linkpathlen;
- }
- write_header(NULL, TYPEFLAG_EXT_HEADER, NULL, NULL, headerfilename,
- 0100600, NULL, size);
-
- buffer = p = malloc(size);
- if (!buffer)
- die("git-tar-tree: %s", strerror(errno));
- append_extended_header_prefix(&p, pathlen, "path");
- append_path(&p, is_dir, basepath, prefix, path);
- append_char(&p, '\n');
- if (flags & EXT_HEADER_LINKPATH)
- append_extended_header(&p, "linkpath", content, contentsize);
- write_blocked(buffer, size);
- free(buffer);
+ char *p = (char *)header;
+ unsigned int chksum = 0;
+ while (p < header->chksum)
+ chksum += *p++;
+ chksum += sizeof(header->chksum) * ' ';
+ p += sizeof(header->chksum);
+ while (p < (char *)header + sizeof(struct ustar_header))
+ chksum += *p++;
+ return chksum;
}
-static void write_global_extended_header(const unsigned char *sha1)
+static int get_path_prefix(const struct strbuf *path, int maxlen)
{
- char *p;
- unsigned int size;
-
- size = extended_header_len("comment", 40);
- write_header(NULL, TYPEFLAG_GLOBAL_HEADER, NULL, NULL,
- "pax_global_header", 0100600, NULL, size);
-
- p = get_record();
- append_extended_header(&p, "comment", sha1_to_hex(sha1), 40);
- write_if_needed();
+ int i = path->len;
+ if (i > maxlen)
+ i = maxlen;
+ while (i > 0 && path->buf[i] != '/')
+ i--;
+ return i;
}
-/* stores a ustar header directly in the block buffer */
-static void write_header(const unsigned char *sha1, char typeflag, const char *basepath,
- struct path_prefix *prefix, const char *path,
- unsigned int mode, void *buffer, unsigned long size)
+static void write_entry(const unsigned char *sha1, struct strbuf *path,
+ unsigned int mode, void *buffer, unsigned long size)
{
- unsigned int namelen;
- char *header = NULL;
- unsigned int checksum = 0;
- int i;
- unsigned int ext_header = 0;
-
- if (typeflag == TYPEFLAG_AUTO) {
- if (S_ISDIR(mode))
- typeflag = TYPEFLAG_DIR;
- else if (S_ISLNK(mode))
- typeflag = TYPEFLAG_LNK;
- else
- typeflag = TYPEFLAG_REG;
- }
-
- namelen = path_len(S_ISDIR(mode), basepath, prefix, path);
- if (namelen > 100)
- ext_header |= EXT_HEADER_PATH;
- if (typeflag == TYPEFLAG_LNK && size > 100)
- ext_header |= EXT_HEADER_LINKPATH;
-
- /* the extended header must be written before the normal one */
- if (ext_header) {
- char headerfilename[51];
- sprintf(headerfilename, "%s.paxheader", sha1_to_hex(sha1));
- write_extended_header(headerfilename, S_ISDIR(mode),
- ext_header, basepath, prefix, path,
- namelen, buffer, size);
- }
-
- header = get_record();
-
- if (ext_header) {
- sprintf(header, "%s.data", sha1_to_hex(sha1));
+ struct ustar_header header;
+ struct strbuf ext_header;
+
+ memset(&header, 0, sizeof(header));
+ ext_header.buf = NULL;
+ ext_header.len = ext_header.alloc = 0;
+
+ if (!sha1) {
+ *header.typeflag = TYPEFLAG_GLOBAL_HEADER;
+ mode = 0100666;
+ strcpy(header.name, "pax_global_header");
+ } else if (!path) {
+ *header.typeflag = TYPEFLAG_EXT_HEADER;
+ mode = 0100666;
+ sprintf(header.name, "%s.paxheader", sha1_to_hex(sha1));
} else {
- char *p = header;
- append_path(&p, S_ISDIR(mode), basepath, prefix, path);
+ if (S_ISDIR(mode)) {
+ *header.typeflag = TYPEFLAG_DIR;
+ mode |= 0777;
+ } else if (S_ISLNK(mode)) {
+ *header.typeflag = TYPEFLAG_LNK;
+ mode |= 0777;
+ } else if (S_ISREG(mode)) {
+ *header.typeflag = TYPEFLAG_REG;
+ mode |= (mode & 0100) ? 0777 : 0666;
+ } else {
+ error("unsupported file mode: 0%o (SHA1: %s)",
+ mode, sha1_to_hex(sha1));
+ return;
+ }
+ if (path->len > sizeof(header.name)) {
+ int plen = get_path_prefix(path, sizeof(header.prefix));
+ int rest = path->len - plen - 1;
+ if (plen > 0 && rest <= sizeof(header.name)) {
+ memcpy(header.prefix, path->buf, plen);
+ memcpy(header.name, path->buf + plen + 1, rest);
+ } else {
+ sprintf(header.name, "%s.data",
+ sha1_to_hex(sha1));
+ strbuf_append_ext_header(&ext_header, "path",
+ path->buf, path->len);
+ }
+ } else
+ memcpy(header.name, path->buf, path->len);
}
- if (typeflag == TYPEFLAG_LNK) {
- if (ext_header & EXT_HEADER_LINKPATH) {
- sprintf(&header[157], "see %s.paxheader",
+ if (S_ISLNK(mode) && buffer) {
+ if (size > sizeof(header.linkname)) {
+ sprintf(header.linkname, "see %s.paxheader",
sha1_to_hex(sha1));
- } else {
- if (buffer)
- strncpy(&header[157], buffer, size);
- }
+ strbuf_append_ext_header(&ext_header, "linkpath",
+ buffer, size);
+ } else
+ memcpy(header.linkname, buffer, size);
}
- if (S_ISDIR(mode))
- mode |= 0777;
- else if (S_ISREG(mode))
- mode |= (mode & 0100) ? 0777 : 0666;
- else if (S_ISLNK(mode))
- mode |= 0777;
- sprintf(&header[100], "%07o", mode & 07777);
+ sprintf(header.mode, "%07o", mode & 07777);
+ sprintf(header.size, "%011lo", S_ISREG(mode) ? size : 0);
+ sprintf(header.mtime, "%011lo", archive_time);
/* XXX: should we provide more meaningful info here? */
- sprintf(&header[108], "%07o", 0); /* uid */
- sprintf(&header[116], "%07o", 0); /* gid */
- strncpy(&header[265], "git", 31); /* uname */
- strncpy(&header[297], "git", 31); /* gname */
-
- if (S_ISDIR(mode) || S_ISLNK(mode))
- size = 0;
- sprintf(&header[124], "%011lo", size);
- sprintf(&header[136], "%011lo", archive_time);
+ sprintf(header.uid, "%07o", 0);
+ sprintf(header.gid, "%07o", 0);
+ strncpy(header.uname, "git", 31);
+ strncpy(header.gname, "git", 31);
+ sprintf(header.devmajor, "%07o", 0);
+ sprintf(header.devminor, "%07o", 0);
- header[156] = typeflag;
+ memcpy(header.magic, "ustar", 6);
+ memcpy(header.version, "00", 2);
- memcpy(&header[257], "ustar", 6);
- memcpy(&header[263], "00", 2);
+ sprintf(header.chksum, "%07o", ustar_header_chksum(&header));
- sprintf(&header[329], "%07o", 0); /* devmajor */
- sprintf(&header[337], "%07o", 0); /* devminor */
-
- memset(&header[148], ' ', 8);
- for (i = 0; i < RECORDSIZE; i++)
- checksum += header[i];
- sprintf(&header[148], "%07o", checksum & 0x1fffff);
+ if (ext_header.len > 0) {
+ write_entry(sha1, NULL, 0, ext_header.buf, ext_header.len);
+ free(ext_header.buf);
+ }
+ write_blocked(&header, sizeof(header));
+ if (S_ISREG(mode) && buffer && size > 0)
+ write_blocked(buffer, size);
+}
- write_if_needed();
+static void write_global_extended_header(const unsigned char *sha1)
+{
+ struct strbuf ext_header;
+ ext_header.buf = NULL;
+ ext_header.len = ext_header.alloc = 0;
+ strbuf_append_ext_header(&ext_header, "comment", sha1_to_hex(sha1), 40);
+ write_entry(NULL, NULL, 0, ext_header.buf, ext_header.len);
+ free(ext_header.buf);
}
-static void traverse_tree(struct tree_desc *tree,
- struct path_prefix *prefix)
+static void traverse_tree(struct tree_desc *tree, struct strbuf *path)
{
- struct path_prefix this_prefix;
- this_prefix.prev = prefix;
+ int pathlen = path->len;
while (tree->size) {
const char *name;
@@ -358,16 +284,19 @@ static void traverse_tree(struct tree_desc *tree,
eltbuf = read_sha1_file(sha1, elttype, &eltsize);
if (!eltbuf)
die("cannot read %s", sha1_to_hex(sha1));
- write_header(sha1, TYPEFLAG_AUTO, basedir,
- prefix, name, mode, eltbuf, eltsize);
+
+ path->len = pathlen;
+ strbuf_append_string(path, name);
+ if (S_ISDIR(mode))
+ strbuf_append_string(path, "/");
+
+ write_entry(sha1, path, mode, eltbuf, eltsize);
+
if (S_ISDIR(mode)) {
struct tree_desc subtree;
subtree.buf = eltbuf;
subtree.size = eltsize;
- this_prefix.name = name;
- traverse_tree(&subtree, &this_prefix);
- } else if (!S_ISLNK(mode)) {
- write_blocked(eltbuf, eltsize);
+ traverse_tree(&subtree, path);
}
free(eltbuf);
}
@@ -375,16 +304,22 @@ static void traverse_tree(struct tree_desc *tree,
int main(int argc, char **argv)
{
- unsigned char sha1[20];
+ unsigned char sha1[20], tree_sha1[20];
struct commit *commit;
struct tree_desc tree;
+ struct strbuf current_path;
+
+ current_path.buf = xmalloc(PATH_MAX);
+ current_path.alloc = PATH_MAX;
+ current_path.len = current_path.eof = 0;
setup_git_directory();
git_config(git_default_config);
switch (argc) {
case 3:
- basedir = argv[2];
+ strbuf_append_string(&current_path, argv[2]);
+ strbuf_append_string(&current_path, "/");
/* FALLTHROUGH */
case 2:
if (get_sha1(argv[1], sha1) < 0)
@@ -398,17 +333,19 @@ int main(int argc, char **argv)
if (commit) {
write_global_extended_header(commit->object.sha1);
archive_time = commit->date;
- }
- tree.buf = read_object_with_reference(sha1, "tree", &tree.size, NULL);
+ } else
+ archive_time = time(NULL);
+
+ tree.buf = read_object_with_reference(sha1, "tree", &tree.size,
+ tree_sha1);
if (!tree.buf)
die("not a reference to a tag, commit or tree object: %s",
sha1_to_hex(sha1));
- if (!archive_time)
- archive_time = time(NULL);
- if (basedir)
- write_header((unsigned char *)"0", TYPEFLAG_DIR, NULL, NULL,
- basedir, 040777, NULL, 0);
- traverse_tree(&tree, NULL);
+
+ if (current_path.len > 0)
+ write_entry(tree_sha1, &current_path, 040777, NULL, 0);
+ traverse_tree(&tree, &current_path);
write_trailer();
+ free(current_path.buf);
return 0;
}
diff --git a/tar.h b/tar.h
new file mode 100644
index 0000000..3467705
--- /dev/null
+++ b/tar.h
@@ -0,0 +1,25 @@
+#define TYPEFLAG_AUTO '\0'
+#define TYPEFLAG_REG '0'
+#define TYPEFLAG_LNK '2'
+#define TYPEFLAG_DIR '5'
+#define TYPEFLAG_GLOBAL_HEADER 'g'
+#define TYPEFLAG_EXT_HEADER 'x'
+
+struct ustar_header {
+ char name[100]; /* 0 */
+ char mode[8]; /* 100 */
+ char uid[8]; /* 108 */
+ char gid[8]; /* 116 */
+ char size[12]; /* 124 */
+ char mtime[12]; /* 136 */
+ char chksum[8]; /* 148 */
+ char typeflag[1]; /* 156 */
+ char linkname[100]; /* 157 */
+ char magic[6]; /* 257 */
+ char version[2]; /* 263 */
+ char uname[32]; /* 265 */
+ char gname[32]; /* 297 */
+ char devmajor[8]; /* 329 */
+ char devminor[8]; /* 337 */
+ char prefix[155]; /* 345 */
+};
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
new file mode 100644
index 0000000..71cb939
--- /dev/null
+++ b/xdiff/xdiff.h
@@ -0,0 +1,91 @@
+/*
+ * LibXDiff by Davide Libenzi ( File Differential Library )
+ * Copyright (C) 2003 Davide Libenzi
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Davide Libenzi <davidel@xmailserver.org>
+ *
+ */
+
+#if !defined(XDIFF_H)
+#define XDIFF_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* #ifdef __cplusplus */
+
+
+#define XDF_NEED_MINIMAL (1 << 1)
+
+#define XDL_PATCH_NORMAL '-'
+#define XDL_PATCH_REVERSE '+'
+#define XDL_PATCH_MODEMASK ((1 << 8) - 1)
+#define XDL_PATCH_IGNOREBSPACE (1 << 8)
+
+#define XDL_MMB_READONLY (1 << 0)
+
+#define XDL_MMF_ATOMIC (1 << 0)
+
+#define XDL_BDOP_INS 1
+#define XDL_BDOP_CPY 2
+#define XDL_BDOP_INSB 3
+
+
+typedef struct s_mmfile {
+ char *ptr;
+ long size;
+} mmfile_t;
+
+typedef struct s_mmbuffer {
+ char *ptr;
+ long size;
+} mmbuffer_t;
+
+typedef struct s_xpparam {
+ unsigned long flags;
+} xpparam_t;
+
+typedef struct s_xdemitcb {
+ void *priv;
+ int (*outf)(void *, mmbuffer_t *, int);
+} xdemitcb_t;
+
+typedef struct s_xdemitconf {
+ long ctxlen;
+} xdemitconf_t;
+
+typedef struct s_bdiffparam {
+ long bsize;
+} bdiffparam_t;
+
+
+#define xdl_malloc(x) malloc(x)
+#define xdl_free(ptr) free(ptr)
+#define xdl_realloc(ptr,x) realloc(ptr,x)
+
+void *xdl_mmfile_first(mmfile_t *mmf, long *size);
+void *xdl_mmfile_next(mmfile_t *mmf, long *size);
+long xdl_mmfile_size(mmfile_t *mmf);
+
+int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
+ xdemitconf_t const *xecfg, xdemitcb_t *ecb);
+
+#ifdef __cplusplus
+}
+#endif /* #ifdef __cplusplus */
+
+#endif /* #if !defined(XDIFF_H) */
+
diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
new file mode 100644
index 0000000..8ea0483
--- /dev/null
+++ b/xdiff/xdiffi.c
@@ -0,0 +1,469 @@
+/*
+ * LibXDiff by Davide Libenzi ( File Differential Library )
+ * Copyright (C) 2003 Davide Libenzi
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Davide Libenzi <davidel@xmailserver.org>
+ *
+ */
+
+#include "xinclude.h"
+
+
+
+#define XDL_MAX_COST_MIN 256
+#define XDL_HEUR_MIN_COST 256
+#define XDL_LINE_MAX (long)((1UL << (8 * sizeof(long) - 1)) - 1)
+#define XDL_SNAKE_CNT 20
+#define XDL_K_HEUR 4
+
+
+
+typedef struct s_xdpsplit {
+ long i1, i2;
+ int min_lo, min_hi;
+} xdpsplit_t;
+
+
+
+
+static long xdl_split(unsigned long const *ha1, long off1, long lim1,
+ unsigned long const *ha2, long off2, long lim2,
+ long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl,
+ xdalgoenv_t *xenv);
+static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2);
+
+
+
+
+/*
+ * See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers.
+ * Basically considers a "box" (off1, off2, lim1, lim2) and scan from both
+ * the forward diagonal starting from (off1, off2) and the backward diagonal
+ * starting from (lim1, lim2). If the K values on the same diagonal crosses
+ * returns the furthest point of reach. We might end up having to expensive
+ * cases using this algorithm is full, so a little bit of heuristic is needed
+ * to cut the search and to return a suboptimal point.
+ */
+static long xdl_split(unsigned long const *ha1, long off1, long lim1,
+ unsigned long const *ha2, long off2, long lim2,
+ long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl,
+ xdalgoenv_t *xenv) {
+ long dmin = off1 - lim2, dmax = lim1 - off2;
+ long fmid = off1 - off2, bmid = lim1 - lim2;
+ long odd = (fmid - bmid) & 1;
+ long fmin = fmid, fmax = fmid;
+ long bmin = bmid, bmax = bmid;
+ long ec, d, i1, i2, prev1, best, dd, v, k;
+
+ /*
+ * Set initial diagonal values for both forward and backward path.
+ */
+ kvdf[fmid] = off1;
+ kvdb[bmid] = lim1;
+
+ for (ec = 1;; ec++) {
+ int got_snake = 0;
+
+ /*
+ * We need to extent the diagonal "domain" by one. If the next
+ * values exits the box boundaries we need to change it in the
+ * opposite direction because (max - min) must be a power of two.
+ * Also we initialize the extenal K value to -1 so that we can
+ * avoid extra conditions check inside the core loop.
+ */
+ if (fmin > dmin)
+ kvdf[--fmin - 1] = -1;
+ else
+ ++fmin;
+ if (fmax < dmax)
+ kvdf[++fmax + 1] = -1;
+ else
+ --fmax;
+
+ for (d = fmax; d >= fmin; d -= 2) {
+ if (kvdf[d - 1] >= kvdf[d + 1])
+ i1 = kvdf[d - 1] + 1;
+ else
+ i1 = kvdf[d + 1];
+ prev1 = i1;
+ i2 = i1 - d;
+ for (; i1 < lim1 && i2 < lim2 && ha1[i1] == ha2[i2]; i1++, i2++);
+ if (i1 - prev1 > xenv->snake_cnt)
+ got_snake = 1;
+ kvdf[d] = i1;
+ if (odd && bmin <= d && d <= bmax && kvdb[d] <= i1) {
+ spl->i1 = i1;
+ spl->i2 = i2;
+ spl->min_lo = spl->min_hi = 1;
+ return ec;
+ }
+ }
+
+ /*
+ * We need to extent the diagonal "domain" by one. If the next
+ * values exits the box boundaries we need to change it in the
+ * opposite direction because (max - min) must be a power of two.
+ * Also we initialize the extenal K value to -1 so that we can
+ * avoid extra conditions check inside the core loop.
+ */
+ if (bmin > dmin)
+ kvdb[--bmin - 1] = XDL_LINE_MAX;
+ else
+ ++bmin;
+ if (bmax < dmax)
+ kvdb[++bmax + 1] = XDL_LINE_MAX;
+ else
+ --bmax;
+
+ for (d = bmax; d >= bmin; d -= 2) {
+ if (kvdb[d - 1] < kvdb[d + 1])
+ i1 = kvdb[d - 1];
+ else
+ i1 = kvdb[d + 1] - 1;
+ prev1 = i1;
+ i2 = i1 - d;
+ for (; i1 > off1 && i2 > off2 && ha1[i1 - 1] == ha2[i2 - 1]; i1--, i2--);
+ if (prev1 - i1 > xenv->snake_cnt)
+ got_snake = 1;
+ kvdb[d] = i1;
+ if (!odd && fmin <= d && d <= fmax && i1 <= kvdf[d]) {
+ spl->i1 = i1;
+ spl->i2 = i2;
+ spl->min_lo = spl->min_hi = 1;
+ return ec;
+ }
+ }
+
+ if (need_min)
+ continue;
+
+ /*
+ * If the edit cost is above the heuristic trigger and if
+ * we got a good snake, we sample current diagonals to see
+ * if some of the, have reached an "interesting" path. Our
+ * measure is a function of the distance from the diagonal
+ * corner (i1 + i2) penalized with the distance from the
+ * mid diagonal itself. If this value is above the current
+ * edit cost times a magic factor (XDL_K_HEUR) we consider
+ * it interesting.
+ */
+ if (got_snake && ec > xenv->heur_min) {
+ for (best = 0, d = fmax; d >= fmin; d -= 2) {
+ dd = d > fmid ? d - fmid: fmid - d;
+ i1 = kvdf[d];
+ i2 = i1 - d;
+ v = (i1 - off1) + (i2 - off2) - dd;
+
+ if (v > XDL_K_HEUR * ec && v > best &&
+ off1 + xenv->snake_cnt <= i1 && i1 < lim1 &&
+ off2 + xenv->snake_cnt <= i2 && i2 < lim2) {
+ for (k = 1; ha1[i1 - k] == ha2[i2 - k]; k++)
+ if (k == xenv->snake_cnt) {
+ best = v;
+ spl->i1 = i1;
+ spl->i2 = i2;
+ break;
+ }
+ }
+ }
+ if (best > 0) {
+ spl->min_lo = 1;
+ spl->min_hi = 0;
+ return ec;
+ }
+
+ for (best = 0, d = bmax; d >= bmin; d -= 2) {
+ dd = d > bmid ? d - bmid: bmid - d;
+ i1 = kvdb[d];
+ i2 = i1 - d;
+ v = (lim1 - i1) + (lim2 - i2) - dd;
+
+ if (v > XDL_K_HEUR * ec && v > best &&
+ off1 < i1 && i1 <= lim1 - xenv->snake_cnt &&
+ off2 < i2 && i2 <= lim2 - xenv->snake_cnt) {
+ for (k = 0; ha1[i1 + k] == ha2[i2 + k]; k++)
+ if (k == xenv->snake_cnt - 1) {
+ best = v;
+ spl->i1 = i1;
+ spl->i2 = i2;
+ break;
+ }
+ }
+ }
+ if (best > 0) {
+ spl->min_lo = 0;
+ spl->min_hi = 1;
+ return ec;
+ }
+ }
+
+ /*
+ * Enough is enough. We spent too much time here and now we collect
+ * the furthest reaching path using the (i1 + i2) measure.
+ */
+ if (ec >= xenv->mxcost) {
+ long fbest, fbest1, bbest, bbest1;
+
+ fbest = -1;
+ for (d = fmax; d >= fmin; d -= 2) {
+ i1 = XDL_MIN(kvdf[d], lim1);
+ i2 = i1 - d;
+ if (lim2 < i2)
+ i1 = lim2 + d, i2 = lim2;
+ if (fbest < i1 + i2) {
+ fbest = i1 + i2;
+ fbest1 = i1;
+ }
+ }
+
+ bbest = XDL_LINE_MAX;
+ for (d = bmax; d >= bmin; d -= 2) {
+ i1 = XDL_MAX(off1, kvdb[d]);
+ i2 = i1 - d;
+ if (i2 < off2)
+ i1 = off2 + d, i2 = off2;
+ if (i1 + i2 < bbest) {
+ bbest = i1 + i2;
+ bbest1 = i1;
+ }
+ }
+
+ if ((lim1 + lim2) - bbest < fbest - (off1 + off2)) {
+ spl->i1 = fbest1;
+ spl->i2 = fbest - fbest1;
+ spl->min_lo = 1;
+ spl->min_hi = 0;
+ } else {
+ spl->i1 = bbest1;
+ spl->i2 = bbest - bbest1;
+ spl->min_lo = 0;
+ spl->min_hi = 1;
+ }
+ return ec;
+ }
+ }
+
+ return -1;
+}
+
+
+/*
+ * Rule: "Divide et Impera". Recursively split the box in sub-boxes by calling
+ * the box splitting function. Note that the real job (marking changed lines)
+ * is done in the two boundary reaching checks.
+ */
+int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,
+ diffdata_t *dd2, long off2, long lim2,
+ long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv) {
+ unsigned long const *ha1 = dd1->ha, *ha2 = dd2->ha;
+
+ /*
+ * Shrink the box by walking through each diagonal snake (SW and NE).
+ */
+ for (; off1 < lim1 && off2 < lim2 && ha1[off1] == ha2[off2]; off1++, off2++);
+ for (; off1 < lim1 && off2 < lim2 && ha1[lim1 - 1] == ha2[lim2 - 1]; lim1--, lim2--);
+
+ /*
+ * If one dimension is empty, then all records on the other one must
+ * be obviously changed.
+ */
+ if (off1 == lim1) {
+ char *rchg2 = dd2->rchg;
+ long *rindex2 = dd2->rindex;
+
+ for (; off2 < lim2; off2++)
+ rchg2[rindex2[off2]] = 1;
+ } else if (off2 == lim2) {
+ char *rchg1 = dd1->rchg;
+ long *rindex1 = dd1->rindex;
+
+ for (; off1 < lim1; off1++)
+ rchg1[rindex1[off1]] = 1;
+ } else {
+ long ec;
+ xdpsplit_t spl;
+
+ /*
+ * Divide ...
+ */
+ if ((ec = xdl_split(ha1, off1, lim1, ha2, off2, lim2, kvdf, kvdb,
+ need_min, &spl, xenv)) < 0) {
+
+ return -1;
+ }
+
+ /*
+ * ... et Impera.
+ */
+ if (xdl_recs_cmp(dd1, off1, spl.i1, dd2, off2, spl.i2,
+ kvdf, kvdb, spl.min_lo, xenv) < 0 ||
+ xdl_recs_cmp(dd1, spl.i1, lim1, dd2, spl.i2, lim2,
+ kvdf, kvdb, spl.min_hi, xenv) < 0) {
+
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+
+int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
+ xdfenv_t *xe) {
+ long ndiags;
+ long *kvd, *kvdf, *kvdb;
+ xdalgoenv_t xenv;
+ diffdata_t dd1, dd2;
+
+ if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) {
+
+ return -1;
+ }
+
+ /*
+ * Allocate and setup K vectors to be used by the differential algorithm.
+ * One is to store the forward path and one to store the backward path.
+ */
+ ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3;
+ if (!(kvd = (long *) xdl_malloc((2 * ndiags + 2) * sizeof(long)))) {
+
+ xdl_free_env(xe);
+ return -1;
+ }
+ kvdf = kvd;
+ kvdb = kvdf + ndiags;
+ kvdf += xe->xdf2.nreff + 1;
+ kvdb += xe->xdf2.nreff + 1;
+
+ /*
+ * Classical integer square root approximation using shifts.
+ */
+ xenv.mxcost = 1;
+ for (; ndiags; ndiags >>= 2)
+ xenv.mxcost <<= 1;
+ if (xenv.mxcost < XDL_MAX_COST_MIN)
+ xenv.mxcost = XDL_MAX_COST_MIN;
+ xenv.snake_cnt = XDL_SNAKE_CNT;
+ xenv.heur_min = XDL_HEUR_MIN_COST;
+
+ dd1.nrec = xe->xdf1.nreff;
+ dd1.ha = xe->xdf1.ha;
+ dd1.rchg = xe->xdf1.rchg;
+ dd1.rindex = xe->xdf1.rindex;
+ dd2.nrec = xe->xdf2.nreff;
+ dd2.ha = xe->xdf2.ha;
+ dd2.rchg = xe->xdf2.rchg;
+ dd2.rindex = xe->xdf2.rindex;
+
+ if (xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec,
+ kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0, &xenv) < 0) {
+
+ xdl_free(kvd);
+ xdl_free_env(xe);
+ return -1;
+ }
+
+ xdl_free(kvd);
+
+ return 0;
+}
+
+
+static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2) {
+ xdchange_t *xch;
+
+ if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t))))
+ return NULL;
+
+ xch->next = xscr;
+ xch->i1 = i1;
+ xch->i2 = i2;
+ xch->chg1 = chg1;
+ xch->chg2 = chg2;
+
+ return xch;
+}
+
+
+int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr) {
+ xdchange_t *cscr = NULL, *xch;
+ char *rchg1 = xe->xdf1.rchg, *rchg2 = xe->xdf2.rchg;
+ long i1, i2, l1, l2;
+
+ /*
+ * Trivial. Collects "groups" of changes and creates an edit script.
+ */
+ for (i1 = xe->xdf1.nrec, i2 = xe->xdf2.nrec; i1 >= 0 || i2 >= 0; i1--, i2--)
+ if (rchg1[i1 - 1] || rchg2[i2 - 1]) {
+ for (l1 = i1; rchg1[i1 - 1]; i1--);
+ for (l2 = i2; rchg2[i2 - 1]; i2--);
+
+ if (!(xch = xdl_add_change(cscr, i1, i2, l1 - i1, l2 - i2))) {
+ xdl_free_script(cscr);
+ return -1;
+ }
+ cscr = xch;
+ }
+
+ *xscr = cscr;
+
+ return 0;
+}
+
+
+void xdl_free_script(xdchange_t *xscr) {
+ xdchange_t *xch;
+
+ while ((xch = xscr) != NULL) {
+ xscr = xscr->next;
+ xdl_free(xch);
+ }
+}
+
+
+int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
+ xdemitconf_t const *xecfg, xdemitcb_t *ecb) {
+ xdchange_t *xscr;
+ xdfenv_t xe;
+
+ if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0) {
+
+ return -1;
+ }
+
+ if (xdl_build_script(&xe, &xscr) < 0) {
+
+ xdl_free_env(&xe);
+ return -1;
+ }
+
+ if (xscr) {
+ if (xdl_emit_diff(&xe, xscr, ecb, xecfg) < 0) {
+
+ xdl_free_script(xscr);
+ xdl_free_env(&xe);
+ return -1;
+ }
+
+ xdl_free_script(xscr);
+ }
+
+ xdl_free_env(&xe);
+
+ return 0;
+}
+
diff --git a/xdiff/xdiffi.h b/xdiff/xdiffi.h
new file mode 100644
index 0000000..dd8f3c9
--- /dev/null
+++ b/xdiff/xdiffi.h
@@ -0,0 +1,60 @@
+/*
+ * LibXDiff by Davide Libenzi ( File Differential Library )
+ * Copyright (C) 2003 Davide Libenzi
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Davide Libenzi <davidel@xmailserver.org>
+ *
+ */
+
+#if !defined(XDIFFI_H)
+#define XDIFFI_H
+
+
+typedef struct s_diffdata {
+ long nrec;
+ unsigned long const *ha;
+ long *rindex;
+ char *rchg;
+} diffdata_t;
+
+typedef struct s_xdalgoenv {
+ long mxcost;
+ long snake_cnt;
+ long heur_min;
+} xdalgoenv_t;
+
+typedef struct s_xdchange {
+ struct s_xdchange *next;
+ long i1, i2;
+ long chg1, chg2;
+} xdchange_t;
+
+
+
+int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,
+ diffdata_t *dd2, long off2, long lim2,
+ long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv);
+int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
+ xdfenv_t *xe);
+int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr);
+void xdl_free_script(xdchange_t *xscr);
+int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
+ xdemitconf_t const *xecfg);
+
+
+#endif /* #if !defined(XDIFFI_H) */
+
diff --git a/xdiff/xemit.c b/xdiff/xemit.c
new file mode 100644
index 0000000..2e5d54c
--- /dev/null
+++ b/xdiff/xemit.c
@@ -0,0 +1,141 @@
+/*
+ * LibXDiff by Davide Libenzi ( File Differential Library )
+ * Copyright (C) 2003 Davide Libenzi
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Davide Libenzi <davidel@xmailserver.org>
+ *
+ */
+
+#include "xinclude.h"
+
+
+
+
+static long xdl_get_rec(xdfile_t *xdf, long ri, char const **rec);
+static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb);
+static xdchange_t *xdl_get_hunk(xdchange_t *xscr, xdemitconf_t const *xecfg);
+
+
+
+
+static long xdl_get_rec(xdfile_t *xdf, long ri, char const **rec) {
+
+ *rec = xdf->recs[ri]->ptr;
+
+ return xdf->recs[ri]->size;
+}
+
+
+static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb) {
+ long size, psize = strlen(pre);
+ char const *rec;
+
+ size = xdl_get_rec(xdf, ri, &rec);
+ if (xdl_emit_diffrec(rec, size, pre, psize, ecb) < 0) {
+
+ return -1;
+ }
+
+ return 0;
+}
+
+
+/*
+ * Starting at the passed change atom, find the latest change atom to be included
+ * inside the differential hunk according to the specified configuration.
+ */
+static xdchange_t *xdl_get_hunk(xdchange_t *xscr, xdemitconf_t const *xecfg) {
+ xdchange_t *xch, *xchp;
+
+ for (xchp = xscr, xch = xscr->next; xch; xchp = xch, xch = xch->next)
+ if (xch->i1 - (xchp->i1 + xchp->chg1) > 2 * xecfg->ctxlen)
+ break;
+
+ return xchp;
+}
+
+
+int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
+ xdemitconf_t const *xecfg) {
+ long s1, s2, e1, e2, lctx;
+ xdchange_t *xch, *xche;
+
+ for (xch = xche = xscr; xch; xch = xche->next) {
+ xche = xdl_get_hunk(xch, xecfg);
+
+ s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0);
+ s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0);
+
+ lctx = xecfg->ctxlen;
+ lctx = XDL_MIN(lctx, xe->xdf1.nrec - (xche->i1 + xche->chg1));
+ lctx = XDL_MIN(lctx, xe->xdf2.nrec - (xche->i2 + xche->chg2));
+
+ e1 = xche->i1 + xche->chg1 + lctx;
+ e2 = xche->i2 + xche->chg2 + lctx;
+
+ /*
+ * Emit current hunk header.
+ */
+ if (xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2, ecb) < 0)
+ return -1;
+
+ /*
+ * Emit pre-context.
+ */
+ for (; s1 < xch->i1; s1++)
+ if (xdl_emit_record(&xe->xdf1, s1, " ", ecb) < 0)
+ return -1;
+
+ for (s1 = xch->i1, s2 = xch->i2;; xch = xch->next) {
+ /*
+ * Merge previous with current change atom.
+ */
+ for (; s1 < xch->i1 && s2 < xch->i2; s1++, s2++)
+ if (xdl_emit_record(&xe->xdf1, s1, " ", ecb) < 0)
+ return -1;
+
+ /*
+ * Removes lines from the first file.
+ */
+ for (s1 = xch->i1; s1 < xch->i1 + xch->chg1; s1++)
+ if (xdl_emit_record(&xe->xdf1, s1, "-", ecb) < 0)
+ return -1;
+
+ /*
+ * Adds lines from the second file.
+ */
+ for (s2 = xch->i2; s2 < xch->i2 + xch->chg2; s2++)
+ if (xdl_emit_record(&xe->xdf2, s2, "+", ecb) < 0)
+ return -1;
+
+ if (xch == xche)
+ break;
+ s1 = xch->i1 + xch->chg1;
+ s2 = xch->i2 + xch->chg2;
+ }
+
+ /*
+ * Emit post-context.
+ */
+ for (s1 = xche->i1 + xche->chg1; s1 < e1; s1++)
+ if (xdl_emit_record(&xe->xdf1, s1, " ", ecb) < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
diff --git a/xdiff/xemit.h b/xdiff/xemit.h
new file mode 100644
index 0000000..e629417
--- /dev/null
+++ b/xdiff/xemit.h
@@ -0,0 +1,34 @@
+/*
+ * LibXDiff by Davide Libenzi ( File Differential Library )
+ * Copyright (C) 2003 Davide Libenzi
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Davide Libenzi <davidel@xmailserver.org>
+ *
+ */
+
+#if !defined(XEMIT_H)
+#define XEMIT_H
+
+
+
+int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
+ xdemitconf_t const *xecfg);
+
+
+
+#endif /* #if !defined(XEMIT_H) */
+
diff --git a/xdiff/xinclude.h b/xdiff/xinclude.h
new file mode 100644
index 0000000..9490fc5
--- /dev/null
+++ b/xdiff/xinclude.h
@@ -0,0 +1,42 @@
+/*
+ * LibXDiff by Davide Libenzi ( File Differential Library )
+ * Copyright (C) 2003 Davide Libenzi
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Davide Libenzi <davidel@xmailserver.org>
+ *
+ */
+
+#if !defined(XINCLUDE_H)
+#define XINCLUDE_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <limits.h>
+
+#include "xmacros.h"
+#include "xdiff.h"
+#include "xtypes.h"
+#include "xutils.h"
+#include "xprepare.h"
+#include "xdiffi.h"
+#include "xemit.h"
+
+
+#endif /* #if !defined(XINCLUDE_H) */
+
diff --git a/xdiff/xmacros.h b/xdiff/xmacros.h
new file mode 100644
index 0000000..4c2fde8
--- /dev/null
+++ b/xdiff/xmacros.h
@@ -0,0 +1,53 @@
+/*
+ * LibXDiff by Davide Libenzi ( File Differential Library )
+ * Copyright (C) 2003 Davide Libenzi
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Davide Libenzi <davidel@xmailserver.org>
+ *
+ */
+
+#if !defined(XMACROS_H)
+#define XMACROS_H
+
+
+#define GR_PRIME 0x9e370001UL
+
+
+#define XDL_MIN(a, b) ((a) < (b) ? (a): (b))
+#define XDL_MAX(a, b) ((a) > (b) ? (a): (b))
+#define XDL_ABS(v) ((v) >= 0 ? (v): -(v))
+#define XDL_ISDIGIT(c) ((c) >= '0' && (c) <= '9')
+#define XDL_HASHLONG(v, b) (((unsigned long)(v) * GR_PRIME) >> ((CHAR_BIT * sizeof(unsigned long)) - (b)))
+#define XDL_PTRFREE(p) do { if (p) { xdl_free(p); (p) = NULL; } } while (0)
+#define XDL_LE32_PUT(p, v) \
+do { \
+ unsigned char *__p = (unsigned char *) (p); \
+ *__p++ = (unsigned char) (v); \
+ *__p++ = (unsigned char) ((v) >> 8); \
+ *__p++ = (unsigned char) ((v) >> 16); \
+ *__p = (unsigned char) ((v) >> 24); \
+} while (0)
+#define XDL_LE32_GET(p, v) \
+do { \
+ unsigned char const *__p = (unsigned char const *) (p); \
+ (v) = (unsigned long) __p[0] | ((unsigned long) __p[1]) << 8 | \
+ ((unsigned long) __p[2]) << 16 | ((unsigned long) __p[3]) << 24; \
+} while (0)
+
+
+#endif /* #if !defined(XMACROS_H) */
+
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
new file mode 100644
index 0000000..27a0879
--- /dev/null
+++ b/xdiff/xprepare.c
@@ -0,0 +1,436 @@
+/*
+ * LibXDiff by Davide Libenzi ( File Differential Library )
+ * Copyright (C) 2003 Davide Libenzi
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Davide Libenzi <davidel@xmailserver.org>
+ *
+ */
+
+#include "xinclude.h"
+
+
+
+#define XDL_KPDIS_RUN 4
+
+
+
+typedef struct s_xdlclass {
+ struct s_xdlclass *next;
+ unsigned long ha;
+ char const *line;
+ long size;
+ long idx;
+} xdlclass_t;
+
+typedef struct s_xdlclassifier {
+ unsigned int hbits;
+ long hsize;
+ xdlclass_t **rchash;
+ chastore_t ncha;
+ long count;
+} xdlclassifier_t;
+
+
+
+
+static int xdl_init_classifier(xdlclassifier_t *cf, long size);
+static void xdl_free_classifier(xdlclassifier_t *cf);
+static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned int hbits,
+ xrecord_t *rec);
+static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
+ xdlclassifier_t *cf, xdfile_t *xdf);
+static void xdl_free_ctx(xdfile_t *xdf);
+static int xdl_clean_mmatch(char const *dis, long i, long s, long e);
+static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2);
+static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2);
+static int xdl_optimize_ctxs(xdfile_t *xdf1, xdfile_t *xdf2);
+
+
+
+
+static int xdl_init_classifier(xdlclassifier_t *cf, long size) {
+ long i;
+
+ cf->hbits = xdl_hashbits((unsigned int) size);
+ cf->hsize = 1 << cf->hbits;
+
+ if (xdl_cha_init(&cf->ncha, sizeof(xdlclass_t), size / 4 + 1) < 0) {
+
+ return -1;
+ }
+ if (!(cf->rchash = (xdlclass_t **) xdl_malloc(cf->hsize * sizeof(xdlclass_t *)))) {
+
+ xdl_cha_free(&cf->ncha);
+ return -1;
+ }
+ for (i = 0; i < cf->hsize; i++)
+ cf->rchash[i] = NULL;
+
+ cf->count = 0;
+
+ return 0;
+}
+
+
+static void xdl_free_classifier(xdlclassifier_t *cf) {
+
+ xdl_free(cf->rchash);
+ xdl_cha_free(&cf->ncha);
+}
+
+
+static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned int hbits,
+ xrecord_t *rec) {
+ long hi;
+ char const *line;
+ xdlclass_t *rcrec;
+
+ line = rec->ptr;
+ hi = (long) XDL_HASHLONG(rec->ha, cf->hbits);
+ for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next)
+ if (rcrec->ha == rec->ha && rcrec->size == rec->size &&
+ !memcmp(line, rcrec->line, rec->size))
+ break;
+
+ if (!rcrec) {
+ if (!(rcrec = xdl_cha_alloc(&cf->ncha))) {
+
+ return -1;
+ }
+ rcrec->idx = cf->count++;
+ rcrec->line = line;
+ rcrec->size = rec->size;
+ rcrec->ha = rec->ha;
+ rcrec->next = cf->rchash[hi];
+ cf->rchash[hi] = rcrec;
+ }
+
+ rec->ha = (unsigned long) rcrec->idx;
+
+ hi = (long) XDL_HASHLONG(rec->ha, hbits);
+ rec->next = rhash[hi];
+ rhash[hi] = rec;
+
+ return 0;
+}
+
+
+static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
+ xdlclassifier_t *cf, xdfile_t *xdf) {
+ unsigned int hbits;
+ long i, nrec, hsize, bsize;
+ unsigned long hav;
+ char const *blk, *cur, *top, *prev;
+ xrecord_t *crec;
+ xrecord_t **recs, **rrecs;
+ xrecord_t **rhash;
+ unsigned long *ha;
+ char *rchg;
+ long *rindex;
+
+ if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0) {
+
+ return -1;
+ }
+ if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *)))) {
+
+ xdl_cha_free(&xdf->rcha);
+ return -1;
+ }
+
+ hbits = xdl_hashbits((unsigned int) narec);
+ hsize = 1 << hbits;
+ if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) {
+
+ xdl_free(recs);
+ xdl_cha_free(&xdf->rcha);
+ return -1;
+ }
+ for (i = 0; i < hsize; i++)
+ rhash[i] = NULL;
+
+ nrec = 0;
+ if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) {
+ for (top = blk + bsize;;) {
+ if (cur >= top) {
+ if (!(cur = blk = xdl_mmfile_next(mf, &bsize)))
+ break;
+ top = blk + bsize;
+ }
+ prev = cur;
+ hav = xdl_hash_record(&cur, top);
+ if (nrec >= narec) {
+ narec *= 2;
+ if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *)))) {
+
+ xdl_free(rhash);
+ xdl_free(recs);
+ xdl_cha_free(&xdf->rcha);
+ return -1;
+ }
+ recs = rrecs;
+ }
+ if (!(crec = xdl_cha_alloc(&xdf->rcha))) {
+
+ xdl_free(rhash);
+ xdl_free(recs);
+ xdl_cha_free(&xdf->rcha);
+ return -1;
+ }
+ crec->ptr = prev;
+ crec->size = (long) (cur - prev);
+ crec->ha = hav;
+ recs[nrec++] = crec;
+
+ if (xdl_classify_record(cf, rhash, hbits, crec) < 0) {
+
+ xdl_free(rhash);
+ xdl_free(recs);
+ xdl_cha_free(&xdf->rcha);
+ return -1;
+ }
+ }
+ }
+
+ if (!(rchg = (char *) xdl_malloc((nrec + 2) * sizeof(char)))) {
+
+ xdl_free(rhash);
+ xdl_free(recs);
+ xdl_cha_free(&xdf->rcha);
+ return -1;
+ }
+ memset(rchg, 0, (nrec + 2) * sizeof(char));
+
+ if (!(rindex = (long *) xdl_malloc((nrec + 1) * sizeof(long)))) {
+
+ xdl_free(rchg);
+ xdl_free(rhash);
+ xdl_free(recs);
+ xdl_cha_free(&xdf->rcha);
+ return -1;
+ }
+ if (!(ha = (unsigned long *) xdl_malloc((nrec + 1) * sizeof(unsigned long)))) {
+
+ xdl_free(rindex);
+ xdl_free(rchg);
+ xdl_free(rhash);
+ xdl_free(recs);
+ xdl_cha_free(&xdf->rcha);
+ return -1;
+ }
+
+ xdf->nrec = nrec;
+ xdf->recs = recs;
+ xdf->hbits = hbits;
+ xdf->rhash = rhash;
+ xdf->rchg = rchg + 1;
+ xdf->rindex = rindex;
+ xdf->nreff = 0;
+ xdf->ha = ha;
+ xdf->dstart = 0;
+ xdf->dend = nrec - 1;
+
+ return 0;
+}
+
+
+static void xdl_free_ctx(xdfile_t *xdf) {
+
+ xdl_free(xdf->rhash);
+ xdl_free(xdf->rindex);
+ xdl_free(xdf->rchg - 1);
+ xdl_free(xdf->ha);
+ xdl_free(xdf->recs);
+ xdl_cha_free(&xdf->rcha);
+}
+
+
+int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
+ xdfenv_t *xe) {
+ long enl1, enl2;
+ xdlclassifier_t cf;
+
+ enl1 = xdl_guess_lines(mf1) + 1;
+ enl2 = xdl_guess_lines(mf2) + 1;
+
+ if (xdl_init_classifier(&cf, enl1 + enl2 + 1) < 0) {
+
+ return -1;
+ }
+
+ if (xdl_prepare_ctx(mf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
+
+ xdl_free_classifier(&cf);
+ return -1;
+ }
+ if (xdl_prepare_ctx(mf2, enl2, xpp, &cf, &xe->xdf2) < 0) {
+
+ xdl_free_ctx(&xe->xdf1);
+ xdl_free_classifier(&cf);
+ return -1;
+ }
+
+ xdl_free_classifier(&cf);
+
+ if (xdl_optimize_ctxs(&xe->xdf1, &xe->xdf2) < 0) {
+
+ xdl_free_ctx(&xe->xdf2);
+ xdl_free_ctx(&xe->xdf1);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+void xdl_free_env(xdfenv_t *xe) {
+
+ xdl_free_ctx(&xe->xdf2);
+ xdl_free_ctx(&xe->xdf1);
+}
+
+
+static int xdl_clean_mmatch(char const *dis, long i, long s, long e) {
+ long r, rdis, rpdis;
+
+ for (r = 1, rdis = 0, rpdis = 1; (i - r) >= s; r++) {
+ if (!dis[i - r])
+ rdis++;
+ else if (dis[i - r] == 2)
+ rpdis++;
+ else
+ break;
+ }
+ for (r = 1; (i + r) <= e; r++) {
+ if (!dis[i + r])
+ rdis++;
+ else if (dis[i + r] == 2)
+ rpdis++;
+ else
+ break;
+ }
+
+ return rpdis * XDL_KPDIS_RUN < (rpdis + rdis);
+}
+
+
+/*
+ * Try to reduce the problem complexity, discard records that have no
+ * matches on the other file. Also, lines that have multiple matches
+ * might be potentially discarded if they happear in a run of discardable.
+ */
+static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2) {
+ long i, rhi, nreff;
+ unsigned long hav;
+ xrecord_t **recs;
+ xrecord_t *rec;
+ char *dis, *dis1, *dis2;
+
+ if (!(dis = (char *) xdl_malloc((xdf1->nrec + xdf2->nrec + 2) * sizeof(char)))) {
+
+ return -1;
+ }
+ memset(dis, 0, (xdf1->nrec + xdf2->nrec + 2) * sizeof(char));
+ dis1 = dis;
+ dis2 = dis1 + xdf1->nrec + 1;
+
+ for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) {
+ hav = (*recs)->ha;
+ rhi = (long) XDL_HASHLONG(hav, xdf2->hbits);
+ for (rec = xdf2->rhash[rhi]; rec; rec = rec->next)
+ if (rec->ha == hav && ++dis1[i] == 2)
+ break;
+ }
+
+ for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) {
+ hav = (*recs)->ha;
+ rhi = (long) XDL_HASHLONG(hav, xdf1->hbits);
+ for (rec = xdf1->rhash[rhi]; rec; rec = rec->next)
+ if (rec->ha == hav && ++dis2[i] == 2)
+ break;
+ }
+
+ for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart];
+ i <= xdf1->dend; i++, recs++) {
+ if (dis1[i] == 1 ||
+ (dis1[i] == 2 && !xdl_clean_mmatch(dis1, i, xdf1->dstart, xdf1->dend))) {
+ xdf1->rindex[nreff] = i;
+ xdf1->ha[nreff] = (*recs)->ha;
+ nreff++;
+ } else
+ xdf1->rchg[i] = 1;
+ }
+ xdf1->nreff = nreff;
+
+ for (nreff = 0, i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart];
+ i <= xdf2->dend; i++, recs++) {
+ if (dis2[i] == 1 ||
+ (dis2[i] == 2 && !xdl_clean_mmatch(dis2, i, xdf2->dstart, xdf2->dend))) {
+ xdf2->rindex[nreff] = i;
+ xdf2->ha[nreff] = (*recs)->ha;
+ nreff++;
+ } else
+ xdf2->rchg[i] = 1;
+ }
+ xdf2->nreff = nreff;
+
+ xdl_free(dis);
+
+ return 0;
+}
+
+
+/*
+ * Early trim initial and terminal matching records.
+ */
+static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) {
+ long i, lim;
+ xrecord_t **recs1, **recs2;
+
+ recs1 = xdf1->recs;
+ recs2 = xdf2->recs;
+ for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim;
+ i++, recs1++, recs2++)
+ if ((*recs1)->ha != (*recs2)->ha)
+ break;
+
+ xdf1->dstart = xdf2->dstart = i;
+
+ recs1 = xdf1->recs + xdf1->nrec - 1;
+ recs2 = xdf2->recs + xdf2->nrec - 1;
+ for (lim -= i, i = 0; i < lim; i++, recs1--, recs2--)
+ if ((*recs1)->ha != (*recs2)->ha)
+ break;
+
+ xdf1->dend = xdf1->nrec - i - 1;
+ xdf2->dend = xdf2->nrec - i - 1;
+
+ return 0;
+}
+
+
+static int xdl_optimize_ctxs(xdfile_t *xdf1, xdfile_t *xdf2) {
+
+ if (xdl_trim_ends(xdf1, xdf2) < 0 ||
+ xdl_cleanup_records(xdf1, xdf2) < 0) {
+
+ return -1;
+ }
+
+ return 0;
+}
+
diff --git a/xdiff/xprepare.h b/xdiff/xprepare.h
new file mode 100644
index 0000000..344c569
--- /dev/null
+++ b/xdiff/xprepare.h
@@ -0,0 +1,35 @@
+/*
+ * LibXDiff by Davide Libenzi ( File Differential Library )
+ * Copyright (C) 2003 Davide Libenzi
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Davide Libenzi <davidel@xmailserver.org>
+ *
+ */
+
+#if !defined(XPREPARE_H)
+#define XPREPARE_H
+
+
+
+int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
+ xdfenv_t *xe);
+void xdl_free_env(xdfenv_t *xe);
+
+
+
+#endif /* #if !defined(XPREPARE_H) */
+
diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h
new file mode 100644
index 0000000..3593a66
--- /dev/null
+++ b/xdiff/xtypes.h
@@ -0,0 +1,68 @@
+/*
+ * LibXDiff by Davide Libenzi ( File Differential Library )
+ * Copyright (C) 2003 Davide Libenzi
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Davide Libenzi <davidel@xmailserver.org>
+ *
+ */
+
+#if !defined(XTYPES_H)
+#define XTYPES_H
+
+
+
+typedef struct s_chanode {
+ struct s_chanode *next;
+ long icurr;
+} chanode_t;
+
+typedef struct s_chastore {
+ chanode_t *head, *tail;
+ long isize, nsize;
+ chanode_t *ancur;
+ chanode_t *sncur;
+ long scurr;
+} chastore_t;
+
+typedef struct s_xrecord {
+ struct s_xrecord *next;
+ char const *ptr;
+ long size;
+ unsigned long ha;
+} xrecord_t;
+
+typedef struct s_xdfile {
+ chastore_t rcha;
+ long nrec;
+ unsigned int hbits;
+ xrecord_t **rhash;
+ long dstart, dend;
+ xrecord_t **recs;
+ char *rchg;
+ long *rindex;
+ long nreff;
+ unsigned long *ha;
+} xdfile_t;
+
+typedef struct s_xdfenv {
+ xdfile_t xdf1, xdf2;
+} xdfenv_t;
+
+
+
+#endif /* #if !defined(XTYPES_H) */
+
diff --git a/xdiff/xutils.c b/xdiff/xutils.c
new file mode 100644
index 0000000..8221806
--- /dev/null
+++ b/xdiff/xutils.c
@@ -0,0 +1,277 @@
+/*
+ * LibXDiff by Davide Libenzi ( File Differential Library )
+ * Copyright (C) 2003 Davide Libenzi
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Davide Libenzi <davidel@xmailserver.org>
+ *
+ */
+
+#include "xinclude.h"
+
+
+
+#define XDL_GUESS_NLINES 256
+
+
+
+
+int xdl_emit_diffrec(char const *rec, long size, char const *pre, long psize,
+ xdemitcb_t *ecb) {
+ mmbuffer_t mb[3];
+ int i;
+
+ mb[0].ptr = (char *) pre;
+ mb[0].size = psize;
+ mb[1].ptr = (char *) rec;
+ mb[1].size = size;
+ i = 2;
+
+ if (!size || rec[size-1] != '\n') {
+ mb[2].ptr = "\n\\ No newline at end of file\n";
+ mb[2].size = strlen(mb[2].ptr);
+ i = 3;
+ }
+
+ if (ecb->outf(ecb->priv, mb, i) < 0) {
+
+ return -1;
+ }
+
+ return 0;
+}
+
+void *xdl_mmfile_first(mmfile_t *mmf, long *size)
+{
+ *size = mmf->size;
+ return mmf->ptr;
+}
+
+
+void *xdl_mmfile_next(mmfile_t *mmf, long *size)
+{
+ return NULL;
+}
+
+
+long xdl_mmfile_size(mmfile_t *mmf)
+{
+ return mmf->size;
+}
+
+
+int xdl_cha_init(chastore_t *cha, long isize, long icount) {
+
+ cha->head = cha->tail = NULL;
+ cha->isize = isize;
+ cha->nsize = icount * isize;
+ cha->ancur = cha->sncur = NULL;
+ cha->scurr = 0;
+
+ return 0;
+}
+
+
+void xdl_cha_free(chastore_t *cha) {
+ chanode_t *cur, *tmp;
+
+ for (cur = cha->head; (tmp = cur) != NULL;) {
+ cur = cur->next;
+ xdl_free(tmp);
+ }
+}
+
+
+void *xdl_cha_alloc(chastore_t *cha) {
+ chanode_t *ancur;
+ void *data;
+
+ if (!(ancur = cha->ancur) || ancur->icurr == cha->nsize) {
+ if (!(ancur = (chanode_t *) xdl_malloc(sizeof(chanode_t) + cha->nsize))) {
+
+ return NULL;
+ }
+ ancur->icurr = 0;
+ ancur->next = NULL;
+ if (cha->tail)
+ cha->tail->next = ancur;
+ if (!cha->head)
+ cha->head = ancur;
+ cha->tail = ancur;
+ cha->ancur = ancur;
+ }
+
+ data = (char *) ancur + sizeof(chanode_t) + ancur->icurr;
+ ancur->icurr += cha->isize;
+
+ return data;
+}
+
+
+void *xdl_cha_first(chastore_t *cha) {
+ chanode_t *sncur;
+
+ if (!(cha->sncur = sncur = cha->head))
+ return NULL;
+
+ cha->scurr = 0;
+
+ return (char *) sncur + sizeof(chanode_t) + cha->scurr;
+}
+
+
+void *xdl_cha_next(chastore_t *cha) {
+ chanode_t *sncur;
+
+ if (!(sncur = cha->sncur))
+ return NULL;
+ cha->scurr += cha->isize;
+ if (cha->scurr == sncur->icurr) {
+ if (!(sncur = cha->sncur = sncur->next))
+ return NULL;
+ cha->scurr = 0;
+ }
+
+ return (char *) sncur + sizeof(chanode_t) + cha->scurr;
+}
+
+
+long xdl_guess_lines(mmfile_t *mf) {
+ long nl = 0, size, tsize = 0;
+ char const *data, *cur, *top;
+
+ if ((cur = data = xdl_mmfile_first(mf, &size)) != NULL) {
+ for (top = data + size; nl < XDL_GUESS_NLINES;) {
+ if (cur >= top) {
+ tsize += (long) (cur - data);
+ if (!(cur = data = xdl_mmfile_next(mf, &size)))
+ break;
+ top = data + size;
+ }
+ nl++;
+ if (!(cur = memchr(cur, '\n', top - cur)))
+ cur = top;
+ else
+ cur++;
+ }
+ tsize += (long) (cur - data);
+ }
+
+ if (nl && tsize)
+ nl = xdl_mmfile_size(mf) / (tsize / nl);
+
+ return nl + 1;
+}
+
+
+unsigned long xdl_hash_record(char const **data, char const *top) {
+ unsigned long ha = 5381;
+ char const *ptr = *data;
+
+ for (; ptr < top && *ptr != '\n'; ptr++) {
+ ha += (ha << 5);
+ ha ^= (unsigned long) *ptr;
+ }
+ *data = ptr < top ? ptr + 1: ptr;
+
+ return ha;
+}
+
+
+unsigned int xdl_hashbits(unsigned int size) {
+ unsigned int val = 1, bits = 0;
+
+ for (; val < size && bits < CHAR_BIT * sizeof(unsigned int); val <<= 1, bits++);
+ return bits ? bits: 1;
+}
+
+
+int xdl_num_out(char *out, long val) {
+ char *ptr, *str = out;
+ char buf[32];
+
+ ptr = buf + sizeof(buf) - 1;
+ *ptr = '\0';
+ if (val < 0) {
+ *--ptr = '-';
+ val = -val;
+ }
+ for (; val && ptr > buf; val /= 10)
+ *--ptr = "0123456789"[val % 10];
+ if (*ptr)
+ for (; *ptr; ptr++, str++)
+ *str = *ptr;
+ else
+ *str++ = '0';
+ *str = '\0';
+
+ return str - out;
+}
+
+
+long xdl_atol(char const *str, char const **next) {
+ long val, base;
+ char const *top;
+
+ for (top = str; XDL_ISDIGIT(*top); top++);
+ if (next)
+ *next = top;
+ for (val = 0, base = 1, top--; top >= str; top--, base *= 10)
+ val += base * (long)(*top - '0');
+ return val;
+}
+
+
+int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2, xdemitcb_t *ecb) {
+ int nb = 0;
+ mmbuffer_t mb;
+ char buf[128];
+
+ memcpy(buf, "@@ -", 4);
+ nb += 4;
+
+ nb += xdl_num_out(buf + nb, c1 ? s1: 0);
+
+ if (c1 != 1) {
+ memcpy(buf + nb, ",", 1);
+ nb += 1;
+
+ nb += xdl_num_out(buf + nb, c1);
+ }
+
+ memcpy(buf + nb, " +", 2);
+ nb += 2;
+
+ nb += xdl_num_out(buf + nb, c2 ? s2: 0);
+
+ if (c2 != 1) {
+ memcpy(buf + nb, ",", 1);
+ nb += 1;
+
+ nb += xdl_num_out(buf + nb, c2);
+ }
+
+ memcpy(buf + nb, " @@\n", 4);
+ nb += 4;
+
+ mb.ptr = buf;
+ mb.size = nb;
+ if (ecb->outf(ecb->priv, &mb, 1) < 0)
+ return -1;
+
+ return 0;
+}
+
diff --git a/xdiff/xutils.h b/xdiff/xutils.h
new file mode 100644
index 0000000..428a4bb
--- /dev/null
+++ b/xdiff/xutils.h
@@ -0,0 +1,44 @@
+/*
+ * LibXDiff by Davide Libenzi ( File Differential Library )
+ * Copyright (C) 2003 Davide Libenzi
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Davide Libenzi <davidel@xmailserver.org>
+ *
+ */
+
+#if !defined(XUTILS_H)
+#define XUTILS_H
+
+
+int xdl_emit_diffrec(char const *rec, long size, char const *pre, long psize,
+ xdemitcb_t *ecb);
+int xdl_cha_init(chastore_t *cha, long isize, long icount);
+void xdl_cha_free(chastore_t *cha);
+void *xdl_cha_alloc(chastore_t *cha);
+void *xdl_cha_first(chastore_t *cha);
+void *xdl_cha_next(chastore_t *cha);
+long xdl_guess_lines(mmfile_t *mf);
+unsigned long xdl_hash_record(char const **data, char const *top);
+unsigned int xdl_hashbits(unsigned int size);
+int xdl_num_out(char *out, long val);
+long xdl_atol(char const *str, char const **next);
+int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2, xdemitcb_t *ecb);
+
+
+
+#endif /* #if !defined(XUTILS_H) */
+