summaryrefslogtreecommitdiff
path: root/oidset.h
diff options
context:
space:
mode:
authorJeff King <peff@peff.net>2017-02-08 20:53:07 (GMT)
committerJunio C Hamano <gitster@pobox.com>2017-02-08 23:39:55 (GMT)
commit29c2bd5fa8cb97eedcd463d49cfc7e753feb3145 (patch)
tree8822280e8073c9af3dccb363d81a881382f6f879 /oidset.h
parent41a078c60b82bad4edf9d1bd8e826aae5f020ee5 (diff)
downloadgit-29c2bd5fa8cb97eedcd463d49cfc7e753feb3145.zip
git-29c2bd5fa8cb97eedcd463d49cfc7e753feb3145.tar.gz
git-29c2bd5fa8cb97eedcd463d49cfc7e753feb3145.tar.bz2
add oidset API
This is similar to many of our uses of sha1-array, but it overcomes one limitation of a sha1-array: when you are de-duplicating a large input with relatively few unique entries, sha1-array uses 20 bytes per non-unique entry. Whereas this set will use memory linear in the number of unique entries (albeit a few more than 20 bytes due to hashmap overhead). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'oidset.h')
-rw-r--r--oidset.h45
1 files changed, 45 insertions, 0 deletions
diff --git a/oidset.h b/oidset.h
new file mode 100644
index 0000000..b7eaab5
--- /dev/null
+++ b/oidset.h
@@ -0,0 +1,45 @@
+#ifndef OIDSET_H
+#define OIDSET_H
+
+/**
+ * This API is similar to sha1-array, in that it maintains a set of object ids
+ * in a memory-efficient way. The major differences are:
+ *
+ * 1. It uses a hash, so we can do online duplicate removal, rather than
+ * sort-and-uniq at the end. This can reduce memory footprint if you have
+ * a large list of oids with many duplicates.
+ *
+ * 2. The per-unique-oid memory footprint is slightly higher due to hash
+ * table overhead.
+ */
+
+/**
+ * A single oidset; should be zero-initialized (or use OIDSET_INIT).
+ */
+struct oidset {
+ struct hashmap map;
+};
+
+#define OIDSET_INIT { { NULL } }
+
+/**
+ * Returns true iff `set` contains `oid`.
+ */
+int oidset_contains(const struct oidset *set, const struct object_id *oid);
+
+/**
+ * Insert the oid into the set; a copy is made, so "oid" does not need
+ * to persist after this function is called.
+ *
+ * Returns 1 if the oid was already in the set, 0 otherwise. This can be used
+ * to perform an efficient check-and-add.
+ */
+int oidset_insert(struct oidset *set, const struct object_id *oid);
+
+/**
+ * Remove all entries from the oidset, freeing any resources associated with
+ * it.
+ */
+void oidset_clear(struct oidset *set);
+
+#endif /* OIDSET_H */