From 78ed710fcf1a67a40eeca1be1a1bb8e8b7296177 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 23 Aug 2009 00:51:09 -0700 Subject: xutils: Fix hashing an incomplete line with whitespaces at the end Upon seeing a whitespace, xdl_hash_record_with_whitespace() first skipped the run of whitespaces (excluding LF) that begins there, ensuring that the pointer points at the last whitespace character in the run, and assumed that the next character must be LF at the end of the line. This does not work when hashing an incomplete line, which lacks the LF at the end. Introduce "at_eol" variable that is true when either we are at the end of line (looking at LF) or at the end of an incomplete line, and use that instead throughout the code. Noticed by Thell Fowler. Signed-off-by: Junio C Hamano diff --git a/xdiff/xutils.c b/xdiff/xutils.c index 04ad468..9411fa9 100644 --- a/xdiff/xutils.c +++ b/xdiff/xutils.c @@ -242,18 +242,20 @@ static unsigned long xdl_hash_record_with_whitespace(char const **data, for (; ptr < top && *ptr != '\n'; ptr++) { if (isspace(*ptr)) { const char *ptr2 = ptr; + int at_eol; while (ptr + 1 < top && isspace(ptr[1]) && ptr[1] != '\n') ptr++; + at_eol = (top <= ptr + 1 || ptr[1] == '\n'); if (flags & XDF_IGNORE_WHITESPACE) ; /* already handled */ else if (flags & XDF_IGNORE_WHITESPACE_CHANGE - && ptr[1] != '\n') { + && !at_eol) { ha += (ha << 5); ha ^= (unsigned long) ' '; } else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL - && ptr[1] != '\n') { + && !at_eol) { while (ptr2 != ptr + 1) { ha += (ha << 5); ha ^= (unsigned long) *ptr2; -- cgit v0.10.2-6-g49f6 From 3b5ef0e216d20a49dac223eb985c22c56512dbec Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 23 Aug 2009 00:57:18 -0700 Subject: xutils: Fix xdl_recmatch() on incomplete lines Thell Fowler noticed that various "ignore whitespace" options to git diff do not work well on an incomplete line. The loop control of the function responsible for these bugs was extremely difficult to follow. This patch restructures the loops for three variants of "ignore whitespace" logic. The basic idea of the re-written logic is: - A loop runs while the characters from both strings we are looking at match. We declare unmatch immediately when we find something that does not match and return false from the function. We break out of the loop if we ran out of either side of the string. The way we skip spaces inside this loop varies depending on the style of ignoring whitespaces. - After the above loop breaks, we know that the parts of the strings we inspected so far match, ignoring the whitespaces. The lines can match only if the remainder consists of nothing but whitespaces. This part of the logic is shared across all three styles. The new code is more obvious and should be much easier to follow. Tested-by: Thell Fowler Signed-off-by: Junio C Hamano diff --git a/xdiff/xutils.c b/xdiff/xutils.c index 9411fa9..bc12f29 100644 --- a/xdiff/xutils.c +++ b/xdiff/xutils.c @@ -190,48 +190,66 @@ int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags) { int i1, i2; + if (!(flags & XDF_WHITESPACE_FLAGS)) + return s1 == s2 && !memcmp(l1, l2, s1); + + i1 = 0; + i2 = 0; + + /* + * -w matches everything that matches with -b, and -b in turn + * matches everything that matches with --ignore-space-at-eol. + * + * Each flavor of ignoring needs different logic to skip whitespaces + * while we have both sides to compare. + */ if (flags & XDF_IGNORE_WHITESPACE) { - for (i1 = i2 = 0; i1 < s1 && i2 < s2; ) { - if (isspace(l1[i1])) - while (isspace(l1[i1]) && i1 < s1) - i1++; - if (isspace(l2[i2])) - while (isspace(l2[i2]) && i2 < s2) - i2++; - if (i1 < s1 && i2 < s2 && l1[i1++] != l2[i2++]) + goto skip_ws; + while (i1 < s1 && i2 < s2) { + if (l1[i1++] != l2[i2++]) return 0; + skip_ws: + while (i1 < s1 && isspace(l1[i1])) + i1++; + while (i2 < s2 && isspace(l2[i2])) + i2++; } - return (i1 >= s1 && i2 >= s2); } else if (flags & XDF_IGNORE_WHITESPACE_CHANGE) { - for (i1 = i2 = 0; i1 < s1 && i2 < s2; ) { - if (isspace(l1[i1])) { - if (!isspace(l2[i2])) - return 0; - while (isspace(l1[i1]) && i1 < s1) - i1++; - while (isspace(l2[i2]) && i2 < s2) - i2++; - } else if (l1[i1++] != l2[i2++]) - return 0; - } - return (i1 >= s1 && i2 >= s2); - } else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL) { - for (i1 = i2 = 0; i1 < s1 && i2 < s2; ) { - if (l1[i1] != l2[i2]) { + while (i1 < s1 && i2 < s2) { + if (isspace(l1[i1]) && isspace(l2[i2])) { + /* Skip matching spaces and try again */ while (i1 < s1 && isspace(l1[i1])) i1++; while (i2 < s2 && isspace(l2[i2])) i2++; - if (i1 < s1 || i2 < s2) - return 0; - return 1; + continue; } + if (l1[i1++] != l2[i2++]) + return 0; + } + } else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL) { + while (i1 < s1 && i2 < s2 && l1[i1++] == l2[i2++]) + ; /* keep going */ + } + + /* + * After running out of one side, the remaining side must have + * nothing but whitespace for the lines to match. Note that + * ignore-whitespace-at-eol case may break out of the loop + * while there still are characters remaining on both lines. + */ + if (i1 < s1) { + while (i1 < s1 && isspace(l1[i1])) i1++; + if (s1 != i1) + return 0; + } + if (i2 < s2) { + while (i2 < s2 && isspace(l2[i2])) i2++; - } - return i1 >= s1 && i2 >= s2; - } else - return s1 == s2 && !memcmp(l1, l2, s1); + return (s2 == i2); + } + return 1; } static unsigned long xdl_hash_record_with_whitespace(char const **data, -- cgit v0.10.2-6-g49f6