From 282098506ffb42d151a3c8d324b6b5393a4342a4 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Thu, 19 Jul 2018 11:56:18 -0700 Subject: xdiff/xhistogram: pass arguments directly to fall_back_to_classic_diff By passing the 'xpp' and 'env' argument directly to the function 'fall_back_to_classic_diff', we eliminate an occurrence of the 'index' in histogram_diff, which will prove useful in a bit. While at it, move it up in the file. This will make the diff of one of the next patches more legible. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c index 73210cb..6e20f75 100644 --- a/xdiff/xhistogram.c +++ b/xdiff/xhistogram.c @@ -233,6 +233,16 @@ static int try_lcs(struct histindex *index, struct region *lcs, int b_ptr, return b_next; } +static int fall_back_to_classic_diff(xpparam_t const *xpp, xdfenv_t *env, + int line1, int count1, int line2, int count2) +{ + xpparam_t xpparam; + xpparam.flags = xpp->flags & ~XDF_DIFF_ALGORITHM_MASK; + + return xdl_fall_back_diff(env, &xpparam, + line1, count1, line2, count2); +} + static int find_lcs(struct histindex *index, struct region *lcs, int line1, int count1, int line2, int count2) { int b_ptr; @@ -248,16 +258,6 @@ static int find_lcs(struct histindex *index, struct region *lcs, return index->has_common && index->max_chain_length < index->cnt; } -static int fall_back_to_classic_diff(struct histindex *index, - int line1, int count1, int line2, int count2) -{ - xpparam_t xpp; - xpp.flags = index->xpp->flags & ~XDF_DIFF_ALGORITHM_MASK; - - return xdl_fall_back_diff(index->env, &xpp, - line1, count1, line2, count2); -} - static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, int line1, int count1, int line2, int count2) { @@ -320,7 +320,7 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, memset(&lcs, 0, sizeof(lcs)); if (find_lcs(&index, &lcs, line1, count1, line2, count2)) - result = fall_back_to_classic_diff(&index, line1, count1, line2, count2); + result = fall_back_to_classic_diff(xpp, env, line1, count1, line2, count2); else { if (lcs.begin1 == 0 && lcs.begin2 == 0) { while (count1--) -- cgit v0.10.2-6-g49f6 From c671d4b5990f07ca40b0914ca9be65c626608fca Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Thu, 19 Jul 2018 11:56:19 -0700 Subject: xdiff/xhistogram: factor out memory cleanup into free_index() This will be useful in the next patch as we'll introduce multiple callers. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c index 6e20f75..5098b6c 100644 --- a/xdiff/xhistogram.c +++ b/xdiff/xhistogram.c @@ -243,6 +243,14 @@ static int fall_back_to_classic_diff(xpparam_t const *xpp, xdfenv_t *env, line1, count1, line2, count2); } +static inline void free_index(struct histindex *index) +{ + xdl_free(index->records); + xdl_free(index->line_map); + xdl_free(index->next_ptrs); + xdl_cha_free(&index->rcha); +} + static int find_lcs(struct histindex *index, struct region *lcs, int line1, int count1, int line2, int count2) { int b_ptr; @@ -343,10 +351,7 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, } cleanup: - xdl_free(index.records); - xdl_free(index.line_map); - xdl_free(index.next_ptrs); - xdl_cha_free(&index.rcha); + free_index(&index); return result; } -- cgit v0.10.2-6-g49f6 From 64c4e8bccde9d357f6b7adf5277c3157b2bd0d42 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Thu, 19 Jul 2018 11:56:20 -0700 Subject: xdiff/xhistogram: move index allocation into find_lcs This fixes a memory issue when recursing a lot, which can be reproduced as seq 1 100000 >one seq 1 4 100000 >two git diff --no-index --histogram one two Before this patch, histogram_diff would call itself recursively before calling free_index, which would mean a lot of memory is allocated during the recursion and only freed afterwards. By moving the memory allocation (and its free call) into find_lcs, the memory is free'd before we recurse, such that memory is reused in the next step of the recursion instead of using new memory. This addresses only the memory pressure, not the run time complexity, that is also awful for the corner case outlined above. Helpful in understanding the code (in addition to the sparse history of this file), was https://stackoverflow.com/a/32367597 which reproduces most of the code comments of the JGit implementation. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c index 5098b6c..fc2d3cd 100644 --- a/xdiff/xhistogram.c +++ b/xdiff/xhistogram.c @@ -251,44 +251,13 @@ static inline void free_index(struct histindex *index) xdl_cha_free(&index->rcha); } -static int find_lcs(struct histindex *index, struct region *lcs, - int line1, int count1, int line2, int count2) { - int b_ptr; - - if (scanA(index, line1, count1)) - return -1; - - index->cnt = index->max_chain_length + 1; - - for (b_ptr = line2; b_ptr <= LINE_END(2); ) - b_ptr = try_lcs(index, lcs, b_ptr, line1, count1, line2, count2); - - return index->has_common && index->max_chain_length < index->cnt; -} - -static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, - int line1, int count1, int line2, int count2) +static int find_lcs(xpparam_t const *xpp, xdfenv_t *env, + struct region *lcs, + int line1, int count1, int line2, int count2) { + int b_ptr; + int sz, ret = -1; struct histindex index; - struct region lcs; - int sz; - int result = -1; - - if (count1 <= 0 && count2 <= 0) - return 0; - - if (LINE_END(1) >= MAX_PTR) - return -1; - - if (!count1) { - while(count2--) - env->xdf2.rchg[line2++ - 1] = 1; - return 0; - } else if (!count2) { - while(count1--) - env->xdf1.rchg[line1++ - 1] = 1; - return 0; - } memset(&index, 0, sizeof(index)); @@ -326,8 +295,52 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, index.ptr_shift = line1; index.max_chain_length = 64; + if (scanA(&index, line1, count1)) + goto cleanup; + + index.cnt = index.max_chain_length + 1; + + for (b_ptr = line2; b_ptr <= LINE_END(2); ) + b_ptr = try_lcs(&index, lcs, b_ptr, line1, count1, line2, count2); + + if (index.has_common && index.max_chain_length < index.cnt) + ret = 1; + else + ret = 0; + +cleanup: + free_index(&index); + return ret; +} + +static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, + int line1, int count1, int line2, int count2) +{ + struct region lcs; + int lcs_found; + int result = -1; + + if (count1 <= 0 && count2 <= 0) + return 0; + + if (LINE_END(1) >= MAX_PTR) + return -1; + + if (!count1) { + while(count2--) + env->xdf2.rchg[line2++ - 1] = 1; + return 0; + } else if (!count2) { + while(count1--) + env->xdf1.rchg[line1++ - 1] = 1; + return 0; + } + memset(&lcs, 0, sizeof(lcs)); - if (find_lcs(&index, &lcs, line1, count1, line2, count2)) + lcs_found = find_lcs(xpp, env, &lcs, line1, count1, line2, count2); + if (lcs_found < 0) + goto out; + else if (lcs_found) result = fall_back_to_classic_diff(xpp, env, line1, count1, line2, count2); else { if (lcs.begin1 == 0 && lcs.begin2 == 0) { @@ -341,18 +354,15 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, line1, lcs.begin1 - line1, line2, lcs.begin2 - line2); if (result) - goto cleanup; + goto out; result = histogram_diff(xpp, env, lcs.end1 + 1, LINE_END(1) - lcs.end1, lcs.end2 + 1, LINE_END(2) - lcs.end2); if (result) - goto cleanup; + goto out; } } - -cleanup: - free_index(&index); - +out: return result; } -- cgit v0.10.2-6-g49f6 From 79cb2ebb92c18af11edf5eea238425d86eef173d Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Thu, 19 Jul 2018 15:19:42 -0700 Subject: xdiff/histogram: remove tail recursion When running the same reproduction script as the previous patch, it turns out the stack is too small, which can be easily avoided. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c index fc2d3cd..ec85f59 100644 --- a/xdiff/xhistogram.c +++ b/xdiff/xhistogram.c @@ -318,7 +318,9 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, { struct region lcs; int lcs_found; - int result = -1; + int result; +redo: + result = -1; if (count1 <= 0 && count2 <= 0) return 0; @@ -355,11 +357,17 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, line2, lcs.begin2 - line2); if (result) goto out; - result = histogram_diff(xpp, env, - lcs.end1 + 1, LINE_END(1) - lcs.end1, - lcs.end2 + 1, LINE_END(2) - lcs.end2); - if (result) - goto out; + /* + * result = histogram_diff(xpp, env, + * lcs.end1 + 1, LINE_END(1) - lcs.end1, + * lcs.end2 + 1, LINE_END(2) - lcs.end2); + * but let's optimize tail recursion ourself: + */ + count1 = LINE_END(1) - lcs.end1; + line1 = lcs.end1 + 1; + count2 = LINE_END(2) - lcs.end2; + line2 = lcs.end2 + 1; + goto redo; } } out: -- cgit v0.10.2-6-g49f6