summaryrefslogtreecommitdiff
path: root/builtin/fast-export.c
diff options
context:
space:
mode:
authorJeff King <peff@peff.net>2020-06-25 19:48:32 (GMT)
committerJunio C Hamano <gitster@pobox.com>2020-06-25 21:19:23 (GMT)
commit65b5d9fae7684a282f48295b645c2f9da77c2736 (patch)
tree3a5c42c4ec2c542d7327403ad712796f91680669 /builtin/fast-export.c
parentd5bf91fde4430532eb725425c3ef9827048af6b5 (diff)
downloadgit-65b5d9fae7684a282f48295b645c2f9da77c2736.zip
git-65b5d9fae7684a282f48295b645c2f9da77c2736.tar.gz
git-65b5d9fae7684a282f48295b645c2f9da77c2736.tar.bz2
fast-export: allow seeding the anonymized mapping
After you anonymize a repository, it can be hard to find which commits correspond between the original and the result, and thus hard to reproduce commands that triggered bugs in the original. Let's make it possible to seed the anonymization map. This lets users either: - mark names to be retained as-is, if they don't consider them secret (in which case their original commands would just work) - map names to new values, which lets them adapt the reproduction recipe to the new names without revealing the originals The implementation is fairly straight-forward. We already store each anonymized token in a hashmap (so that the same token appearing twice is converted to the same result). We can just introduce a new "seed" hashmap which is consulted first. This does make a few more promises to the user about how we'll anonymize things (e.g., token-splitting pathnames). But it's unlikely that we'd want to change those rules, even if the actual anonymization of a single token changes. And it makes things much easier for the user, who can unblind only a directory name without having to specify each path within it. One alternative to this approach would be to anonymize as we see fit, and then dump the whole refname and pathname mappings to a file. This does work, but it's a bit awkward to use (you have to manually dig the items you care about out of the mapping). Helped-by: Eric Sunshine <sunshine@sunshineco.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'builtin/fast-export.c')
-rw-r--r--builtin/fast-export.c50
1 files changed, 49 insertions, 1 deletions
diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 1cbca5b..b0b09bc 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -45,6 +45,7 @@ static struct string_list extra_refs = STRING_LIST_INIT_NODUP;
static struct string_list tag_refs = STRING_LIST_INIT_NODUP;
static struct refspec refspecs = REFSPEC_INIT_FETCH;
static int anonymize;
+static struct hashmap anonymized_seeds;
static struct revision_sources revision_sources;
static int parse_opt_signed_tag_mode(const struct option *opt,
@@ -168,8 +169,18 @@ static const char *anonymize_str(struct hashmap *map,
hashmap_entry_init(&key.hash, memhash(orig, len));
key.orig = orig;
key.orig_len = len;
- ret = hashmap_get_entry(map, &key, hash, &key);
+ /* First check if it's a token the user configured manually... */
+ if (anonymized_seeds.cmpfn)
+ ret = hashmap_get_entry(&anonymized_seeds, &key, hash, &key);
+ else
+ ret = NULL;
+
+ /* ...otherwise check if we've already seen it in this context... */
+ if (!ret)
+ ret = hashmap_get_entry(map, &key, hash, &key);
+
+ /* ...and finally generate a new mapping if necessary */
if (!ret) {
FLEX_ALLOC_MEM(ret, orig, orig, len);
hashmap_entry_init(&ret->hash, key.hash.hash);
@@ -1147,6 +1158,37 @@ static void handle_deletes(void)
}
}
+static char *anonymize_seed(void *data)
+{
+ return xstrdup(data);
+}
+
+static int parse_opt_anonymize_map(const struct option *opt,
+ const char *arg, int unset)
+{
+ struct hashmap *map = opt->value;
+ const char *delim, *value;
+ size_t keylen;
+
+ BUG_ON_OPT_NEG(unset);
+
+ delim = strchr(arg, ':');
+ if (delim) {
+ keylen = delim - arg;
+ value = delim + 1;
+ } else {
+ keylen = strlen(arg);
+ value = arg;
+ }
+
+ if (!keylen || !*value)
+ return error(_("--anonymize-map token cannot be empty"));
+
+ anonymize_str(map, anonymize_seed, arg, keylen, (void *)value);
+
+ return 0;
+}
+
int cmd_fast_export(int argc, const char **argv, const char *prefix)
{
struct rev_info revs;
@@ -1188,6 +1230,9 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix)
OPT_STRING_LIST(0, "refspec", &refspecs_list, N_("refspec"),
N_("Apply refspec to exported refs")),
OPT_BOOL(0, "anonymize", &anonymize, N_("anonymize output")),
+ OPT_CALLBACK_F(0, "anonymize-map", &anonymized_seeds, N_("from:to"),
+ N_("convert <from> to <to> in anonymized output"),
+ PARSE_OPT_NONEG, parse_opt_anonymize_map),
OPT_BOOL(0, "reference-excluded-parents",
&reference_excluded_commits, N_("Reference parents which are not in fast-export stream by object id")),
OPT_BOOL(0, "show-original-ids", &show_original_ids,
@@ -1215,6 +1260,9 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix)
if (argc > 1)
usage_with_options (fast_export_usage, options);
+ if (anonymized_seeds.cmpfn && !anonymize)
+ die(_("--anonymize-map without --anonymize does not make sense"));
+
if (refspecs_list.nr) {
int i;