From ac9afcc31cd1f7c00c2747132e5f512173613fd9 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 27 Apr 2014 21:15:47 +0300 Subject: test: add test_write_lines helper API and implementation as suggested by Junio. Signed-off-by: Michael S. Tsirkin Signed-off-by: Junio C Hamano diff --git a/t/README b/t/README index caeeb9d..2d6232f 100644 --- a/t/README +++ b/t/README @@ -596,6 +596,28 @@ library for your script to use. ... ' + - test_write_lines + + Split to white-space separated words and write it out on standard + output, one word per line. + Useful to prepare multi-line files in a compact form. + + Example: + + test_write_lines "a b c d e f g" >foo + + Is a more compact equivalent of: + cat >foo <<-EOF + a + b + c + d + e + f + g + EOF + + - test_pause This command is useful for writing and debugging tests and must be diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh index aeae3ca..213fd0f 100644 --- a/t/test-lib-functions.sh +++ b/t/test-lib-functions.sh @@ -712,6 +712,11 @@ test_ln_s_add () { fi } +# This function writes out its parameters, one per line +test_write_lines () { + printf "%s\n" "$@" +} + perl () { command "$PERL_PATH" "$@" } -- cgit v0.10.2-6-g49f6 From bb98b01ee8c5c6f66a6d70aa453b70184dd3d96f Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Mon, 5 May 2014 16:51:43 -0700 Subject: test doc: test_write_lines does not split its arguments test_write_lines carefully quotes its arguments as "$@", so test_write_lines "a b" c writes two lines as requested, not three. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano diff --git a/t/README b/t/README index 2d6232f..8a9d499 100644 --- a/t/README +++ b/t/README @@ -596,15 +596,14 @@ library for your script to use. ... ' - - test_write_lines + - test_write_lines - Split to white-space separated words and write it out on standard - output, one word per line. + Write on standard output, one line per argument. Useful to prepare multi-line files in a compact form. Example: - test_write_lines "a b c d e f g" >foo + test_write_lines a b c d e f g >foo Is a more compact equivalent of: cat >foo <<-EOF -- cgit v0.10.2-6-g49f6 From 30e12b924b57b15e707f1749f2e5af15f1c7fe09 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 27 Apr 2014 21:15:44 +0300 Subject: patch-id: make it stable against hunk reordering Patch id changes if users reorder file diffs that make up a patch. As the result is functionally equivalent, a different patch id is surprising to many users. In particular, reordering files using diff -O is helpful to make patches more readable (e.g. API header diff before implementation diff). Add an option to change patch-id behaviour making it stable against these kinds of patch change: calculate SHA1 hash for each hunk separately and sum all hashes (using a symmetrical sum) to get patch id We use a 20byte sum and not xor - since xor would give 0 output for patches that have two identical diffs, which isn't all that unlikely (e.g. append the same line in two places). The new behaviour is enabled - when patchid.stable is true - when --stable flag is present Using a new flag --unstable or setting patchid.stable to false force the historical behaviour. In the documentation, clarify that patch ID can now be a sum of hashes, not a hash. Document how command line and config options affect the behaviour. Signed-off-by: Michael S. Tsirkin Signed-off-by: Junio C Hamano diff --git a/Documentation/git-patch-id.txt b/Documentation/git-patch-id.txt index 312c3b1..31efc58 100644 --- a/Documentation/git-patch-id.txt +++ b/Documentation/git-patch-id.txt @@ -8,14 +8,14 @@ git-patch-id - Compute unique ID for a patch SYNOPSIS -------- [verse] -'git patch-id' < +'git patch-id' [--stable | --unstable] < DESCRIPTION ----------- -A "patch ID" is nothing but a SHA-1 of the diff associated with a patch, with -whitespace and line numbers ignored. As such, it's "reasonably stable", but at -the same time also reasonably unique, i.e., two patches that have the same "patch -ID" are almost guaranteed to be the same thing. +A "patch ID" is nothing but a sum of SHA-1 of the file diffs associated with a +patch, with whitespace and line numbers ignored. As such, it's "reasonably +stable", but at the same time also reasonably unique, i.e., two patches that +have the same "patch ID" are almost guaranteed to be the same thing. IOW, you can use this thing to look for likely duplicate commits. @@ -27,6 +27,33 @@ This can be used to make a mapping from patch ID to commit ID. OPTIONS ------- + +--stable:: + Use a "stable" sum of hashes as the patch ID. With this option: + - Reordering file diffs that make up a patch does not affect the ID. + In particular, two patches produced by comparing the same two trees + with two different settings for "-O" result in the same + patch ID signature, thereby allowing the computed result to be used + as a key to index some meta-information about the change between + the two trees; + + - Result is different from the value produced by git 1.9 and older + or produced when an "unstable" hash (see --unstable below) is + configured - even when used on a diff output taken without any use + of "-O", thereby making existing databases storing such + "unstable" or historical patch-ids unusable. + + This is the default if patchid.stable is set to true. + +--unstable:: + Use an "unstable" hash as the patch ID. With this option, + the result produced is compatible with the patch-id value produced + by git 1.9 and older. Users with pre-existing databases storing + patch-ids produced by git 1.9 and older (who do not deal with reordered + patches) may want to use this option. + + This is the default. + :: The diff to create the ID of. diff --git a/builtin/patch-id.c b/builtin/patch-id.c index 3cfe02d..77db873 100644 --- a/builtin/patch-id.c +++ b/builtin/patch-id.c @@ -1,17 +1,14 @@ #include "builtin.h" -static void flush_current_id(int patchlen, unsigned char *id, git_SHA_CTX *c) +static void flush_current_id(int patchlen, unsigned char *id, unsigned char *result) { - unsigned char result[20]; char name[50]; if (!patchlen) return; - git_SHA1_Final(result, c); memcpy(name, sha1_to_hex(id), 41); printf("%s %s\n", sha1_to_hex(result), name); - git_SHA1_Init(c); } static int remove_space(char *line) @@ -56,10 +53,31 @@ static int scan_hunk_header(const char *p, int *p_before, int *p_after) return 1; } -static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct strbuf *line_buf) +static void flush_one_hunk(unsigned char *result, git_SHA_CTX *ctx) +{ + unsigned char hash[20]; + unsigned short carry = 0; + int i; + + git_SHA1_Final(hash, ctx); + git_SHA1_Init(ctx); + /* 20-byte sum, with carry */ + for (i = 0; i < 20; ++i) { + carry += result[i] + hash[i]; + result[i] = carry; + carry >>= 8; + } +} + +static int get_one_patchid(unsigned char *next_sha1, unsigned char *result, + struct strbuf *line_buf, int stable) { int patchlen = 0, found_next = 0; int before = -1, after = -1; + git_SHA_CTX ctx; + + git_SHA1_Init(&ctx); + hashclr(result); while (strbuf_getwholeline(line_buf, stdin, '\n') != EOF) { char *line = line_buf->buf; @@ -107,6 +125,8 @@ static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct st break; /* Else we're parsing another header. */ + if (stable) + flush_one_hunk(result, &ctx); before = after = -1; } @@ -119,39 +139,63 @@ static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct st /* Compute the sha without whitespace */ len = remove_space(line); patchlen += len; - git_SHA1_Update(ctx, line, len); + git_SHA1_Update(&ctx, line, len); } if (!found_next) hashclr(next_sha1); + flush_one_hunk(result, &ctx); + return patchlen; } -static void generate_id_list(void) +static void generate_id_list(int stable) { - unsigned char sha1[20], n[20]; - git_SHA_CTX ctx; + unsigned char sha1[20], n[20], result[20]; int patchlen; struct strbuf line_buf = STRBUF_INIT; - git_SHA1_Init(&ctx); hashclr(sha1); while (!feof(stdin)) { - patchlen = get_one_patchid(n, &ctx, &line_buf); - flush_current_id(patchlen, sha1, &ctx); + patchlen = get_one_patchid(n, result, &line_buf, stable); + flush_current_id(patchlen, sha1, result); hashcpy(sha1, n); } strbuf_release(&line_buf); } -static const char patch_id_usage[] = "git patch-id < patch"; +static const char patch_id_usage[] = "git patch-id [--stable | --unstable] < patch"; + +static int git_patch_id_config(const char *var, const char *value, void *cb) +{ + int *stable = cb; + + if (!strcmp(var, "patchid.stable")) { + *stable = git_config_bool(var, value); + return 0; + } + + return git_default_config(var, value, cb); +} int cmd_patch_id(int argc, const char **argv, const char *prefix) { - if (argc != 1) + int stable = -1; + + git_config(git_patch_id_config, &stable); + + /* If nothing is set, default to unstable. */ + if (stable < 0) + stable = 0; + + if (argc == 2 && !strcmp(argv[1], "--stable")) + stable = 1; + else if (argc == 2 && !strcmp(argv[1], "--unstable")) + stable = 0; + else if (argc != 1) usage(patch_id_usage); - generate_id_list(); + generate_id_list(stable); return 0; } -- cgit v0.10.2-6-g49f6 From 8f2514e95fe142954d1dd4408cc7cb413a275844 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 27 Apr 2014 21:15:51 +0300 Subject: patch-id-test: test stable and unstable behaviour Verify that patch ID supports an algorithm that is stable against diff split and reordering. Signed-off-by: Michael S. Tsirkin Signed-off-by: Junio C Hamano diff --git a/t/t4204-patch-id.sh b/t/t4204-patch-id.sh index d2c930d..fe011f8 100755 --- a/t/t4204-patch-id.sh +++ b/t/t4204-patch-id.sh @@ -5,27 +5,44 @@ test_description='git patch-id' . ./test-lib.sh test_expect_success 'setup' ' - test_commit initial foo a && - test_commit first foo b && - git checkout -b same HEAD^ && - test_commit same-msg foo b && - git checkout -b notsame HEAD^ && - test_commit notsame-msg foo c + as="a a a a a a a a" && # eight a + test_write_lines $as >foo && + test_write_lines $as >bar && + git add foo bar && + git commit -a -m initial && + test_write_lines $as b >foo && + test_write_lines $as b >bar && + git commit -a -m first && + git checkout -b same master && + git commit --amend -m same-msg && + git checkout -b notsame master && + echo c >foo && + echo c >bar && + git commit --amend -a -m notsame-msg && + test_write_lines bar foo >bar-then-foo && + test_write_lines foo bar >foo-then-bar ' test_expect_success 'patch-id output is well-formed' ' - git log -p -1 | git patch-id > output && + git log -p -1 | git patch-id >output && grep "^[a-f0-9]\{40\} $(git rev-parse HEAD)$" output ' +#calculate patch id. Make sure output is not empty. calc_patch_id () { - git patch-id | - sed "s# .*##" > patch-id_"$1" + name="$1" + shift + git patch-id "$@" | + sed "s/ .*//" >patch-id_"$name" && + test_line_count -gt 0 patch-id_"$name" +} + +get_top_diff () { + git log -p -1 "$@" -O bar-then-foo -- } get_patch_id () { - git log -p -1 "$1" | git patch-id | - sed "s# .*##" > patch-id_"$1" + get_top_diff "$1" | calc_patch_id "$@" } test_expect_success 'patch-id detects equality' ' @@ -56,6 +73,69 @@ test_expect_success 'whitespace is irrelevant in footer' ' test_cmp patch-id_master patch-id_same ' +cmp_patch_id () { + if + test "$1" = "relevant" + then + ! test_cmp patch-id_"$2" patch-id_"$3" + else + test_cmp patch-id_"$2" patch-id_"$3" + fi +} + +test_patch_id_file_order () { + relevant="$1" + shift + name="order-${1}-$relevant" + shift + get_top_diff "master" | calc_patch_id "$name" "$@" && + git checkout same && + git format-patch -1 --stdout -O foo-then-bar | + calc_patch_id "ordered-$name" "$@" && + cmp_patch_id $relevant "$name" "ordered-$name" + +} + +# combined test for options: add more tests here to make them +# run with all options +test_patch_id () { + test_patch_id_file_order "$@" +} + +# small tests with detailed diagnostic for basic options. +test_expect_success 'file order is irrelevant with --stable' ' + test_patch_id_file_order irrelevant --stable --stable +' + +test_expect_success 'file order is relevant with --unstable' ' + test_patch_id_file_order relevant --unstable --unstable +' + +#Now test various option combinations. +test_expect_success 'default is unstable' ' + test_patch_id relevant default +' + +test_expect_success 'patchid.stable = true is stable' ' + test_config patchid.stable true && + test_patch_id irrelevant patchid.stable=true +' + +test_expect_success 'patchid.stable = false is unstable' ' + test_config patchid.stable false && + test_patch_id relevant patchid.stable=false +' + +test_expect_success '--unstable overrides patchid.stable = true' ' + test_config patchid.stable true && + test_patch_id relevant patchid.stable=true--unstable --unstable +' + +test_expect_success '--stable overrides patchid.stable = false' ' + test_config patchid.stable false && + test_patch_id irrelevant patchid.stable=false--stable --stable +' + test_expect_success 'patch-id supports git-format-patch MIME output' ' get_patch_id master && git checkout same && -- cgit v0.10.2-6-g49f6