summaryrefslogtreecommitdiff
path: root/diff.c
diff options
context:
space:
mode:
authorJohan Herland <johan@herland.net>2011-04-29 09:36:21 (GMT)
committerJunio C Hamano <gitster@pobox.com>2011-04-29 18:22:55 (GMT)
commit1c57a627bf269f3c83c48ad724cd8b14292502ef (patch)
treecf9a49d2ff4665e31f21c4885ce9193dccf87d78 /diff.c
parent712d2c7dd893212756c21787fc12d6f71327e167 (diff)
downloadgit-1c57a627bf269f3c83c48ad724cd8b14292502ef.zip
git-1c57a627bf269f3c83c48ad724cd8b14292502ef.tar.gz
git-1c57a627bf269f3c83c48ad724cd8b14292502ef.tar.bz2
New --dirstat=lines mode, doing dirstat analysis based on diffstat
This patch adds an alternative implementation of show_dirstat(), called show_dirstat_by_line(), which uses the more expensive diffstat analysis (as opposed to show_dirstat()'s own (relatively inexpensive) analysis) to derive the numbers from which the --dirstat output is computed. The alternative implementation is controlled by the new "lines" parameter to the --dirstat option (or the diff.dirstat config variable). For binary files, the diffstat analysis counts bytes instead of lines, so to prevent binary files from dominating the dirstat results, the byte counts for binary files are divided by 64 before being compared to their textual/line-based counterparts. This is a stupid and ugly - but very cheap - heuristic. In linux-2.6.git, running the three different --dirstat modes: time git diff v2.6.20..v2.6.30 --dirstat=changes > /dev/null vs. time git diff v2.6.20..v2.6.30 --dirstat=lines > /dev/null vs. time git diff v2.6.20..v2.6.30 --dirstat=files > /dev/null yields the following average runtimes on my machine: - "changes" (default): ~6.0 s - "lines": ~9.6 s - "files": ~0.1 s So, as expected, there's a considerable performance hit (~60%) by going through the full diffstat analysis as compared to the default "changes" analysis (obviously, "files" is much faster than both). As such, the "lines" mode is probably only useful if you really need the --dirstat numbers to be consistent with the numbers returned from the other --*stat options. The patch also includes documentation and tests for the new dirstat mode. Improved-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Johan Herland <johan@herland.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'diff.c')
-rw-r--r--diff.c61
1 files changed, 59 insertions, 2 deletions
diff --git a/diff.c b/diff.c
index 70464d9..919a16d 100644
--- a/diff.c
+++ b/diff.c
@@ -73,9 +73,15 @@ static int parse_dirstat_params(struct diff_options *options, const char *params
while (*p) {
if (!prefixcmp(p, "changes")) {
p += 7;
+ DIFF_OPT_CLR(options, DIRSTAT_BY_LINE);
+ DIFF_OPT_CLR(options, DIRSTAT_BY_FILE);
+ } else if (!prefixcmp(p, "lines")) {
+ p += 5;
+ DIFF_OPT_SET(options, DIRSTAT_BY_LINE);
DIFF_OPT_CLR(options, DIRSTAT_BY_FILE);
} else if (!prefixcmp(p, "files")) {
p += 5;
+ DIFF_OPT_CLR(options, DIRSTAT_BY_LINE);
DIFF_OPT_SET(options, DIRSTAT_BY_FILE);
} else if (!prefixcmp(p, "noncumulative")) {
p += 13;
@@ -1671,6 +1677,50 @@ found_damage:
gather_dirstat(options, &dir, changed, "", 0);
}
+static void show_dirstat_by_line(struct diffstat_t *data, struct diff_options *options)
+{
+ int i;
+ unsigned long changed;
+ struct dirstat_dir dir;
+
+ if (data->nr == 0)
+ return;
+
+ dir.files = NULL;
+ dir.alloc = 0;
+ dir.nr = 0;
+ dir.permille = options->dirstat_permille;
+ dir.cumulative = DIFF_OPT_TST(options, DIRSTAT_CUMULATIVE);
+
+ changed = 0;
+ for (i = 0; i < data->nr; i++) {
+ struct diffstat_file *file = data->files[i];
+ unsigned long damage = file->added + file->deleted;
+ if (file->is_binary)
+ /*
+ * binary files counts bytes, not lines. Must find some
+ * way to normalize binary bytes vs. textual lines.
+ * The following heuristic assumes that there are 64
+ * bytes per "line".
+ * This is stupid and ugly, but very cheap...
+ */
+ damage = (damage + 63) / 64;
+ ALLOC_GROW(dir.files, dir.nr + 1, dir.alloc);
+ dir.files[dir.nr].name = file->name;
+ dir.files[dir.nr].changed = damage;
+ changed += damage;
+ dir.nr++;
+ }
+
+ /* This can happen even with many files, if everything was renames */
+ if (!changed)
+ return;
+
+ /* Show all directories with more than x% of the changes */
+ qsort(dir.files, dir.nr, sizeof(dir.files[0]), dirstat_compare);
+ gather_dirstat(options, &dir, changed, "", 0);
+}
+
static void free_diffstat_info(struct diffstat_t *diffstat)
{
int i;
@@ -4088,6 +4138,7 @@ void diff_flush(struct diff_options *options)
struct diff_queue_struct *q = &diff_queued_diff;
int i, output_format = options->output_format;
int separator = 0;
+ int dirstat_by_line = 0;
/*
* Order: raw, stat, summary, patch
@@ -4108,7 +4159,11 @@ void diff_flush(struct diff_options *options)
separator++;
}
- if (output_format & (DIFF_FORMAT_DIFFSTAT|DIFF_FORMAT_SHORTSTAT|DIFF_FORMAT_NUMSTAT)) {
+ if (output_format & DIFF_FORMAT_DIRSTAT && DIFF_OPT_TST(options, DIRSTAT_BY_LINE))
+ dirstat_by_line = 1;
+
+ if (output_format & (DIFF_FORMAT_DIFFSTAT|DIFF_FORMAT_SHORTSTAT|DIFF_FORMAT_NUMSTAT) ||
+ dirstat_by_line) {
struct diffstat_t diffstat;
memset(&diffstat, 0, sizeof(struct diffstat_t));
@@ -4123,10 +4178,12 @@ void diff_flush(struct diff_options *options)
show_stats(&diffstat, options);
if (output_format & DIFF_FORMAT_SHORTSTAT)
show_shortstats(&diffstat, options);
+ if (output_format & DIFF_FORMAT_DIRSTAT)
+ show_dirstat_by_line(&diffstat, options);
free_diffstat_info(&diffstat);
separator++;
}
- if (output_format & DIFF_FORMAT_DIRSTAT)
+ if ((output_format & DIFF_FORMAT_DIRSTAT) && !dirstat_by_line)
show_dirstat(options);
if (output_format & DIFF_FORMAT_SUMMARY && !is_summary_empty(q)) {