summaryrefslogtreecommitdiff
path: root/ewah/ewok_rlw.h
diff options
context:
space:
mode:
authorVicent Marti <tanoku@gmail.com>2013-11-14 12:43:51 (GMT)
committerJunio C Hamano <gitster@pobox.com>2013-12-30 20:17:20 (GMT)
commite1273106f62927e3efdb1cfa107cb1a9f913274c (patch)
tree9a23af0dbec1791e1a0d8b3137e614744ee14f19 /ewah/ewok_rlw.h
parent7e3dae494370b5596a6ea76af1191829ce11bce2 (diff)
downloadgit-e1273106f62927e3efdb1cfa107cb1a9f913274c.zip
git-e1273106f62927e3efdb1cfa107cb1a9f913274c.tar.gz
git-e1273106f62927e3efdb1cfa107cb1a9f913274c.tar.bz2
ewah: compressed bitmap implementation
EWAH is a word-aligned compressed variant of a bitset (i.e. a data structure that acts as a 0-indexed boolean array for many entries). It uses a 64-bit run-length encoding (RLE) compression scheme, trading some compression for better processing speed. The goal of this word-aligned implementation is not to achieve the best compression, but rather to improve query processing time. As it stands right now, this EWAH implementation will always be more efficient storage-wise than its uncompressed alternative. EWAH arrays will be used as the on-disk format to store reachability bitmaps for all objects in a repository while keeping reasonable sizes, in the same way that JGit does. This EWAH implementation is a mostly straightforward port of the original `javaewah` library that JGit currently uses. The library is self-contained and has been embedded whole (4 files) inside the `ewah` folder to ease redistribution. The library is re-licensed under the GPLv2 with the permission of Daniel Lemire, the original author. The source code for the C version can be found on GitHub: https://github.com/vmg/libewok The original Java implementation can also be found on GitHub: https://github.com/lemire/javaewah [jc: stripped debug-only code per Peff's $gmane/239768] Signed-off-by: Vicent Marti <tanoku@gmail.com> Signed-off-by: Jeff King <peff@peff.net> Helped-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'ewah/ewok_rlw.h')
-rw-r--r--ewah/ewok_rlw.h114
1 files changed, 114 insertions, 0 deletions
diff --git a/ewah/ewok_rlw.h b/ewah/ewok_rlw.h
new file mode 100644
index 0000000..63efdf9
--- /dev/null
+++ b/ewah/ewok_rlw.h
@@ -0,0 +1,114 @@
+/**
+ * Copyright 2013, GitHub, Inc
+ * Copyright 2009-2013, Daniel Lemire, Cliff Moon,
+ * David McIntosh, Robert Becho, Google Inc. and Veronika Zenz
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#ifndef __EWOK_RLW_H__
+#define __EWOK_RLW_H__
+
+#define RLW_RUNNING_BITS (sizeof(eword_t) * 4)
+#define RLW_LITERAL_BITS (sizeof(eword_t) * 8 - 1 - RLW_RUNNING_BITS)
+
+#define RLW_LARGEST_RUNNING_COUNT (((eword_t)1 << RLW_RUNNING_BITS) - 1)
+#define RLW_LARGEST_LITERAL_COUNT (((eword_t)1 << RLW_LITERAL_BITS) - 1)
+
+#define RLW_LARGEST_RUNNING_COUNT_SHIFT (RLW_LARGEST_RUNNING_COUNT << 1)
+
+#define RLW_RUNNING_LEN_PLUS_BIT (((eword_t)1 << (RLW_RUNNING_BITS + 1)) - 1)
+
+static int rlw_get_run_bit(const eword_t *word)
+{
+ return *word & (eword_t)1;
+}
+
+static inline void rlw_set_run_bit(eword_t *word, int b)
+{
+ if (b) {
+ *word |= (eword_t)1;
+ } else {
+ *word &= (eword_t)(~1);
+ }
+}
+
+static inline void rlw_xor_run_bit(eword_t *word)
+{
+ if (*word & 1) {
+ *word &= (eword_t)(~1);
+ } else {
+ *word |= (eword_t)1;
+ }
+}
+
+static inline void rlw_set_running_len(eword_t *word, eword_t l)
+{
+ *word |= RLW_LARGEST_RUNNING_COUNT_SHIFT;
+ *word &= (l << 1) | (~RLW_LARGEST_RUNNING_COUNT_SHIFT);
+}
+
+static inline eword_t rlw_get_running_len(const eword_t *word)
+{
+ return (*word >> 1) & RLW_LARGEST_RUNNING_COUNT;
+}
+
+static inline eword_t rlw_get_literal_words(const eword_t *word)
+{
+ return *word >> (1 + RLW_RUNNING_BITS);
+}
+
+static inline void rlw_set_literal_words(eword_t *word, eword_t l)
+{
+ *word |= ~RLW_RUNNING_LEN_PLUS_BIT;
+ *word &= (l << (RLW_RUNNING_BITS + 1)) | RLW_RUNNING_LEN_PLUS_BIT;
+}
+
+static inline eword_t rlw_size(const eword_t *self)
+{
+ return rlw_get_running_len(self) + rlw_get_literal_words(self);
+}
+
+struct rlw_iterator {
+ const eword_t *buffer;
+ size_t size;
+ size_t pointer;
+ size_t literal_word_start;
+
+ struct {
+ const eword_t *word;
+ int literal_words;
+ int running_len;
+ int literal_word_offset;
+ int running_bit;
+ } rlw;
+};
+
+void rlwit_init(struct rlw_iterator *it, struct ewah_bitmap *bitmap);
+void rlwit_discard_first_words(struct rlw_iterator *it, size_t x);
+size_t rlwit_discharge(
+ struct rlw_iterator *it, struct ewah_bitmap *out, size_t max, int negate);
+void rlwit_discharge_empty(struct rlw_iterator *it, struct ewah_bitmap *out);
+
+static inline size_t rlwit_word_size(struct rlw_iterator *it)
+{
+ return it->rlw.running_len + it->rlw.literal_words;
+}
+
+static inline size_t rlwit_literal_words(struct rlw_iterator *it)
+{
+ return it->pointer - it->rlw.literal_words;
+}
+
+#endif