summaryrefslogtreecommitdiff
path: root/line-range.c
diff options
context:
space:
mode:
Diffstat (limited to 'line-range.c')
-rw-r--r--line-range.c290
1 files changed, 290 insertions, 0 deletions
diff --git a/line-range.c b/line-range.c
new file mode 100644
index 0000000..de4e32f
--- /dev/null
+++ b/line-range.c
@@ -0,0 +1,290 @@
+#include "git-compat-util.h"
+#include "line-range.h"
+#include "xdiff-interface.h"
+#include "strbuf.h"
+#include "userdiff.h"
+
+/*
+ * Parse one item in the -L option
+ *
+ * 'begin' is applicable only to relative range anchors. Absolute anchors
+ * ignore this value.
+ *
+ * When parsing "-L A,B", parse_loc() is called once for A and once for B.
+ *
+ * When parsing A, 'begin' must be a negative number, the absolute value of
+ * which is the line at which relative start-of-range anchors should be
+ * based. Beginning of file is represented by -1.
+ *
+ * When parsing B, 'begin' must be the positive line number immediately
+ * following the line computed for 'A'.
+ */
+static const char *parse_loc(const char *spec, nth_line_fn_t nth_line,
+ void *data, long lines, long begin, long *ret)
+{
+ char *term;
+ const char *line;
+ long num;
+ int reg_error;
+ regex_t regexp;
+ regmatch_t match[1];
+
+ /* Allow "-L <something>,+20" to mean starting at <something>
+ * for 20 lines, or "-L <something>,-5" for 5 lines ending at
+ * <something>.
+ */
+ if (1 <= begin && (spec[0] == '+' || spec[0] == '-')) {
+ num = strtol(spec + 1, &term, 10);
+ if (term != spec + 1) {
+ if (!ret)
+ return term;
+ if (num == 0)
+ die("-L invalid empty range");
+ if (spec[0] == '-')
+ num = 0 - num;
+ if (0 < num)
+ *ret = begin + num - 2;
+ else if (!num)
+ *ret = begin;
+ else
+ *ret = begin + num;
+ return term;
+ }
+ return spec;
+ }
+ num = strtol(spec, &term, 10);
+ if (term != spec) {
+ if (ret) {
+ if (num <= 0)
+ die("-L invalid line number: %ld", num);
+ *ret = num;
+ }
+ return term;
+ }
+
+ if (begin < 0) {
+ if (spec[0] != '^')
+ begin = -begin;
+ else {
+ begin = 1;
+ spec++;
+ }
+ }
+
+ if (spec[0] != '/')
+ return spec;
+
+ /* it could be a regexp of form /.../ */
+ for (term = (char *) spec + 1; *term && *term != '/'; term++) {
+ if (*term == '\\')
+ term++;
+ }
+ if (*term != '/')
+ return spec;
+
+ /* in the scan-only case we are not interested in the regex */
+ if (!ret)
+ return term+1;
+
+ /* try [spec+1 .. term-1] as regexp */
+ *term = 0;
+ begin--; /* input is in human terms */
+ line = nth_line(data, begin);
+
+ if (!(reg_error = regcomp(&regexp, spec + 1, REG_NEWLINE)) &&
+ !(reg_error = regexec(&regexp, line, 1, match, 0))) {
+ const char *cp = line + match[0].rm_so;
+ const char *nline;
+
+ while (begin++ < lines) {
+ nline = nth_line(data, begin);
+ if (line <= cp && cp < nline)
+ break;
+ line = nline;
+ }
+ *ret = begin;
+ regfree(&regexp);
+ *term++ = '/';
+ return term;
+ }
+ else {
+ char errbuf[1024];
+ regerror(reg_error, &regexp, errbuf, 1024);
+ die("-L parameter '%s' starting at line %ld: %s",
+ spec + 1, begin + 1, errbuf);
+ }
+}
+
+static int match_funcname(xdemitconf_t *xecfg, const char *bol, const char *eol)
+{
+ if (xecfg) {
+ char buf[1];
+ return xecfg->find_func(bol, eol - bol, buf, 1,
+ xecfg->find_func_priv) >= 0;
+ }
+
+ if (bol == eol)
+ return 0;
+ if (isalpha(*bol) || *bol == '_' || *bol == '$')
+ return 1;
+ return 0;
+}
+
+static const char *find_funcname_matching_regexp(xdemitconf_t *xecfg, const char *start,
+ regex_t *regexp)
+{
+ int reg_error;
+ regmatch_t match[1];
+ while (1) {
+ const char *bol, *eol;
+ reg_error = regexec(regexp, start, 1, match, 0);
+ if (reg_error == REG_NOMATCH)
+ return NULL;
+ else if (reg_error) {
+ char errbuf[1024];
+ regerror(reg_error, regexp, errbuf, 1024);
+ die("-L parameter: regexec() failed: %s", errbuf);
+ }
+ /* determine extent of line matched */
+ bol = start+match[0].rm_so;
+ eol = start+match[0].rm_eo;
+ while (bol > start && *bol != '\n')
+ bol--;
+ if (*bol == '\n')
+ bol++;
+ while (*eol && *eol != '\n')
+ eol++;
+ if (*eol == '\n')
+ eol++;
+ /* is it a funcname line? */
+ if (match_funcname(xecfg, (char*) bol, (char*) eol))
+ return bol;
+ start = eol;
+ }
+}
+
+static const char *parse_range_funcname(const char *arg, nth_line_fn_t nth_line_cb,
+ void *cb_data, long lines, long anchor, long *begin, long *end,
+ const char *path)
+{
+ char *pattern;
+ const char *term;
+ struct userdiff_driver *drv;
+ xdemitconf_t *xecfg = NULL;
+ const char *start;
+ const char *p;
+ int reg_error;
+ regex_t regexp;
+
+ if (*arg == '^') {
+ anchor = 1;
+ arg++;
+ }
+
+ assert(*arg == ':');
+ term = arg+1;
+ while (*term && *term != ':') {
+ if (*term == '\\' && *(term+1))
+ term++;
+ term++;
+ }
+ if (term == arg+1)
+ return NULL;
+ if (!begin) /* skip_range_arg case */
+ return term;
+
+ pattern = xstrndup(arg+1, term-(arg+1));
+
+ anchor--; /* input is in human terms */
+ start = nth_line_cb(cb_data, anchor);
+
+ drv = userdiff_find_by_path(path);
+ if (drv && drv->funcname.pattern) {
+ const struct userdiff_funcname *pe = &drv->funcname;
+ xecfg = xcalloc(1, sizeof(*xecfg));
+ xdiff_set_find_func(xecfg, pe->pattern, pe->cflags);
+ }
+
+ reg_error = regcomp(&regexp, pattern, REG_NEWLINE);
+ if (reg_error) {
+ char errbuf[1024];
+ regerror(reg_error, &regexp, errbuf, 1024);
+ die("-L parameter '%s': %s", pattern, errbuf);
+ }
+
+ p = find_funcname_matching_regexp(xecfg, (char*) start, &regexp);
+ if (!p)
+ die("-L parameter '%s' starting at line %ld: no match",
+ pattern, anchor + 1);
+ *begin = 0;
+ while (p > nth_line_cb(cb_data, *begin))
+ (*begin)++;
+
+ if (*begin >= lines)
+ die("-L parameter '%s' matches at EOF", pattern);
+
+ *end = *begin+1;
+ while (*end < lines) {
+ const char *bol = nth_line_cb(cb_data, *end);
+ const char *eol = nth_line_cb(cb_data, *end+1);
+ if (match_funcname(xecfg, bol, eol))
+ break;
+ (*end)++;
+ }
+
+ regfree(&regexp);
+ free(xecfg);
+ free(pattern);
+
+ /* compensate for 1-based numbering */
+ (*begin)++;
+
+ return term;
+}
+
+int parse_range_arg(const char *arg, nth_line_fn_t nth_line_cb,
+ void *cb_data, long lines, long anchor,
+ long *begin, long *end, const char *path)
+{
+ *begin = *end = 0;
+
+ if (anchor < 1)
+ anchor = 1;
+ if (anchor > lines)
+ anchor = lines + 1;
+
+ if (*arg == ':' || (*arg == '^' && *(arg + 1) == ':')) {
+ arg = parse_range_funcname(arg, nth_line_cb, cb_data, lines, anchor, begin, end, path);
+ if (!arg || *arg)
+ return -1;
+ return 0;
+ }
+
+ arg = parse_loc(arg, nth_line_cb, cb_data, lines, -anchor, begin);
+
+ if (*arg == ',')
+ arg = parse_loc(arg + 1, nth_line_cb, cb_data, lines, *begin + 1, end);
+
+ if (*arg)
+ return -1;
+
+ if (*begin && *end && *end < *begin) {
+ long tmp;
+ tmp = *end; *end = *begin; *begin = tmp;
+ }
+
+ return 0;
+}
+
+const char *skip_range_arg(const char *arg)
+{
+ if (*arg == ':' || (*arg == '^' && *(arg + 1) == ':'))
+ return parse_range_funcname(arg, NULL, NULL, 0, 0, NULL, NULL, NULL);
+
+ arg = parse_loc(arg, NULL, NULL, 0, -1, NULL);
+
+ if (*arg == ',')
+ arg = parse_loc(arg+1, NULL, NULL, 0, 0, NULL);
+
+ return arg;
+}