create bulk bit readers/writers
authorGeoffrey Allott <geoffrey@allott.email>
Mon, 1 Aug 2022 21:02:34 +0000 (22:02 +0100)
committerGeoffrey Allott <geoffrey@allott.email>
Mon, 1 Aug 2022 21:02:34 +0000 (22:02 +0100)
Makefile
src/tans_decode_buf_st.h [new file with mode: 0644]
src/tans_decode_st.c
src/tans_decode_st.h
src/tans_encode_st.c
src/tans_encode_st.h
src/tans_static_buf_bit_writer.c [new file with mode: 0644]
src/tans_static_buf_bit_writer.h [new file with mode: 0644]
test/test_tans_bit_read_write.c
test/test_tans_encode_decode.c

index 2a5b509c86d836966b07c40da7ef530d0c41d2b5..e25bfa57926572e1ec9949e1a1824e3aebf7e60d 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-CFLAGS = -Isrc -g -Wall -Wextra -Wconversion -fsanitize=address -fsanitize=undefined
+CFLAGS = -Isrc -g -O2 -Wall -Wextra -Wconversion -fsanitize=undefined -fsanitize=address
 LDFLAGS = -lasan -lubsan
 
 OBJS = $(patsubst %.c, %.o, $(wildcard src/*.o))
@@ -20,8 +20,8 @@ test/test_tans_bit_reader.o: src/tans_buf_bit_reader.h src/tans_file_bit_reader.
 test/test_tans_bit_reader: src/tans_buf_bit_reader.o src/tans_file_bit_reader.o
 test/test_tans_bit_writer.o: src/tans_buf_bit_writer.h src/tans_bit_writer.h
 test/test_tans_bit_writer: src/tans_buf_bit_writer.o
-test/test_tans_bit_read_write.o: src/tans_buf_bit_writer.h src/tans_bit_writer.h src/tans_rev_buf_bit_reader.h src/tans_bit_reader.h
-test/test_tans_bit_read_write: src/tans_buf_bit_writer.o src/tans_rev_buf_bit_reader.o
+test/test_tans_bit_read_write.o: src/tans_buf_bit_writer.h src/tans_bit_writer.h src/tans_rev_buf_bit_reader.h src/tans_bit_reader.h src/tans_static_buf_bit_writer.h
+test/test_tans_bit_read_write: src/tans_buf_bit_writer.o src/tans_rev_buf_bit_reader.o src/tans_static_buf_bit_writer.o
 test/test_tans_encode_st.o: src/tans_buf_bit_writer.h src/tans_bit_writer.h src/tans_encode_st.h src/tans_symbol_tbl.h src/tans_freq_tbl.h
 test/test_tans_encode_st: src/tans_buf_bit_writer.o src/tans_encode_st.o src/tans_encode_tbl.o src/tans_symbol_tbl.o src/tans_freq_tbl.o
 test/test_tans_encode_decode.o: src/tans_rev_buf_bit_reader.h src/tans_bit_reader.h src/tans_decode_st.h src/tans_decode_tbl.h src/tans_buf_bit_writer.h src/tans_bit_writer.h src/tans_encode_st.h src/tans_encode_tbl.h src/tans_symbol_tbl.h src/tans_freq_tbl.h
diff --git a/src/tans_decode_buf_st.h b/src/tans_decode_buf_st.h
new file mode 100644 (file)
index 0000000..59e515a
--- /dev/null
@@ -0,0 +1,13 @@
+#pragma once
+
+#include "tans_decode_tbl.h"
+
+#include <stddef.h>
+
+struct tans_decode_buf_st {
+    struct tans_decode_tbl decode_tbl;
+    uint16_t x;
+};
+
+void tans_decode_buf_st_init(struct tans_decode_buf_st *self, struct tans_symbol_tbl *symbol_tbl);
+uint16_t tans_decode_buf_st_next(struct tans_decode_buf_st *self, struct tans_bit_reader *bit_reader);
index 55d9ddd5819792f60527a6fcaa58d4a207bfa208..285ab23123422672d6923f6f870acc61d38ee388 100644 (file)
@@ -22,3 +22,30 @@ uint16_t tans_decode_st_next(struct tans_decode_st *self, struct tans_bit_reader
 
     return (uint16_t) ret;
 }
+
+static inline uint32_t get_uint32(uint8_t buf[const static 4])
+{
+    return (uint32_t) buf[0] | (uint32_t) buf[1] << 8 | (uint32_t) buf[2] << 16 | (uint32_t) buf[3] << 24;
+}
+
+size_t tans_decode_st_decode(struct tans_decode_st *self, uint8_t *data, size_t len, uint8_t *buf, uint32_t bits)
+{
+    size_t i;
+    struct tans_decode_tbl_entry t;
+    uint32_t bit, byte, value;
+
+    for (i = 0; i < len; ++i) {
+        t = self->decode_tbl.entries[self->x];
+        bit = bits & 7;
+        bit += (uint32_t) 8 * (bit == 0);
+        byte = (uint32_t) ((bits + 7) >> 3);
+        value = get_uint32(buf - 4 + byte);
+        value >>= (24 + bit - t.nb_bits) & 31;
+        value &= (uint32_t) ((1 << t.nb_bits) - 1);
+        self->x = (uint16_t) (t.new_x + value);
+        data[len-i-1] = t.symbol;
+        bits -= t.nb_bits;
+    }
+
+    return bits;
+}
index 09165a84ca852238d863c34d251c2c07ecf5b6c3..238ab56ce261dce291c373cae4d9eee2780435d1 100644 (file)
@@ -3,6 +3,8 @@
 #include "tans_decode_tbl.h"
 #include "tans_bit_reader.h"
 
+#include <stddef.h>
+
 struct tans_decode_st {
     struct tans_decode_tbl decode_tbl;
     uint16_t x;
@@ -10,3 +12,4 @@ struct tans_decode_st {
 
 void tans_decode_st_init(struct tans_decode_st *self, struct tans_symbol_tbl *symbol_tbl);
 uint16_t tans_decode_st_next(struct tans_decode_st *self, struct tans_bit_reader *bit_reader);
+size_t tans_decode_st_decode(struct tans_decode_st *self, uint8_t *data, size_t len, uint8_t *buf, uint32_t bits);
index e2768ed0f088d89bafa0f0c2f82e524736f9a0f9..6c80a8917ce00a7a4c1697014d82db51b8a930fa 100644 (file)
@@ -23,3 +23,40 @@ uint16_t tans_encode_st_next(struct tans_encode_st *self, struct tans_bit_writer
 
     return written;
 }
+
+static inline uint32_t get_uint32(uint8_t buf[const static 4])
+{
+    return (uint32_t) buf[0] | (uint32_t) (buf[1] << 8) | (uint32_t) (buf[2] << 16) | (uint32_t) (buf[3] << 24);
+}
+
+static inline void set_uint32(uint8_t buf[static 4], uint32_t value)
+{
+    buf[0] = (uint8_t) value;
+    buf[1] = (uint8_t) (value >> 8);
+    buf[2] = (uint8_t) (value >> 16);
+    buf[3] = (uint8_t) (value >> 24);
+}
+
+uint32_t tans_encode_st_encode(struct tans_encode_st *self, uint8_t *data, uint32_t len, uint8_t *buf)
+{
+    uint8_t nb_bits;
+    uint32_t i, written = 0;
+    uint32_t bit, byte, value;
+    uint8_t symbol;
+
+    for (i = 0; i < len; ++i) {
+        symbol = data[i];
+        nb_bits = (uint8_t) ((self->x + self->symbol_tbl.entries[symbol].nb) >> (self->symbol_tbl.log2_tblsz + 1));
+        bit = written & 7;
+        byte = (uint32_t) (written >> 3);
+        value = (uint32_t) self->x;
+        value &= (uint32_t) ((1 << nb_bits) - 1);
+        value <<= bit;
+        value |= get_uint32(buf + byte);
+        set_uint32(buf + byte, value);
+        written += nb_bits;
+        self->x = self->encode_tbl.entries[(uint16_t) (self->symbol_tbl.entries[symbol].start + (self->x >> nb_bits))].x;
+    }
+
+    return written;
+}
index e31ef793d19bc89645455e1e00158486e2230f4e..10ca6f3d58d0c83121c883f09b95707f4b890098 100644 (file)
@@ -11,3 +11,4 @@ struct tans_encode_st {
 
 void tans_encode_st_init(struct tans_encode_st *self, const struct tans_symbol_tbl *symbol_tbl);
 uint16_t tans_encode_st_next(struct tans_encode_st *self, struct tans_bit_writer *bit_writer, uint8_t symbol);
+uint32_t tans_encode_st_encode(struct tans_encode_st *self, uint8_t *data, uint32_t len, uint8_t *buf);
diff --git a/src/tans_static_buf_bit_writer.c b/src/tans_static_buf_bit_writer.c
new file mode 100644 (file)
index 0000000..6b05248
--- /dev/null
@@ -0,0 +1,44 @@
+#include "tans_static_buf_bit_writer.h"
+
+static inline uint32_t get_uint32(uint8_t buf[const static 4])
+{
+    return (uint32_t) (buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24));
+}
+
+static inline void set_uint32(uint8_t buf[static 4], uint32_t value)
+{
+    buf[0] = (uint8_t) value;
+    buf[1] = (uint8_t) (value >> 8);
+    buf[2] = (uint8_t) (value >> 16);
+    buf[3] = (uint8_t) (value >> 24);
+}
+
+static uint16_t tans_static_buf_bit_writer_write(struct tans_bit_writer *writer, uint32_t value, uint8_t bits)
+{
+    struct tans_static_buf_bit_writer *self = (struct tans_static_buf_bit_writer *) writer;
+    uint32_t bit = self->bit % 8;
+    uint32_t byte = self->bit / 8;
+
+    if (self->bit + bits > TANS_STATIC_BUF_BIT_WRITER_BUFSZ * 8) {
+        return (uint16_t) -1;
+    }
+
+    self->bit += bits;
+
+    value &= (uint32_t) ((1 << bits) - 1);
+    value <<= bit;
+    value |= get_uint32(self->buf + byte);
+    set_uint32(self->buf + byte, value);
+
+    return bits;
+}
+
+static const struct tans_bit_writer_ops vtable = {
+    .write = tans_static_buf_bit_writer_write,
+};
+
+void tans_static_buf_bit_writer_init(struct tans_static_buf_bit_writer *self)
+{
+    self->vtable = &vtable;
+    self->bit = 0;
+}
diff --git a/src/tans_static_buf_bit_writer.h b/src/tans_static_buf_bit_writer.h
new file mode 100644 (file)
index 0000000..da1b430
--- /dev/null
@@ -0,0 +1,14 @@
+#pragma once
+
+#include "tans_bit_writer.h"
+
+#define TANS_STATIC_BUF_BIT_WRITER_BUFSZ (256 * 1024)
+#define TANS_STATIC_BUF_BIT_WRITER_OVERFLOW 4
+
+struct tans_static_buf_bit_writer {
+    const struct tans_bit_writer_ops *vtable;
+    uint32_t bit;
+    uint8_t buf[TANS_STATIC_BUF_BIT_WRITER_BUFSZ + TANS_STATIC_BUF_BIT_WRITER_OVERFLOW];
+};
+
+void tans_static_buf_bit_writer_init(struct tans_static_buf_bit_writer *self);
index 4ce21532075c6dfdc651140ebd71e3740800a2a4..cdc93961a901f80c3dba01a0640cf7958869e47a 100644 (file)
@@ -2,6 +2,7 @@
 
 #include "tans_buf_bit_writer.h"
 #include "tans_rev_buf_bit_reader.h"
+#include "tans_static_buf_bit_writer.h"
 
 enum test_result test_tans_buf_bit_write_read_rev(void)
 {
@@ -42,7 +43,57 @@ enum test_result test_tans_buf_bit_write_read_rev(void)
     return TEST_SUCCESS;
 }
 
+enum test_result test_tans_static_buf_bit_write_read_rev(void)
+{
+    uint32_t i;
+    struct tans_static_buf_bit_writer buf_bit_writer;
+    struct tans_bit_writer *bit_writer = (struct tans_bit_writer *) &buf_bit_writer;
+    struct tans_rev_buf_bit_reader buf_bit_reader;
+    struct tans_bit_reader *bit_reader = (struct tans_bit_reader *) &buf_bit_reader;
+    uint8_t buf[64] = {0};
+
+    tans_static_buf_bit_writer_init(&buf_bit_writer);
+    tans_rev_buf_bit_reader_init(&buf_bit_reader, buf, sizeof buf);
+    ASSERT_EQ(bit_writer->vtable->write(bit_writer, 0, 0), 0);
+
+    for (i = 0; i < 8; ++i) {
+        ASSERT_EQ(bit_writer->vtable->write(bit_writer, 1, 8), 8);
+        ASSERT_EQ(bit_writer->vtable->write(bit_writer, 0xf, 4), 4);
+        ASSERT_EQ(bit_writer->vtable->write(bit_writer, 0x2f, 6), 6);
+        ASSERT_EQ(bit_writer->vtable->write(bit_writer, 0, 2), 2);
+        ASSERT_EQ(bit_writer->vtable->write(bit_writer, 0x3fe0, 16), 16);
+        ASSERT_EQ(bit_writer->vtable->write(bit_writer, 0xd0, 8), 8);
+        ASSERT_EQ(bit_writer->vtable->write(bit_writer, 0x0f, 7), 7);
+        ASSERT_EQ(bit_writer->vtable->write(bit_writer, 1, 1), 1);
+        ASSERT_EQ(bit_writer->vtable->write(bit_writer, 0, 3), 3);
+        ASSERT_EQ(bit_writer->vtable->write(bit_writer, 0x18, 5), 5);
+        ASSERT_EQ(bit_writer->vtable->write(bit_writer, 0xf, 4), 4);
+    }
+
+    for (i = 0; i < 64; ++i) {
+        buf[i] = buf_bit_writer.buf[i];
+    }
+
+
+    for (i = 0; i < 8; ++i) {
+        ASSERT_EQ(bit_reader->vtable->read(bit_reader, 4), 0xf);
+        ASSERT_EQ(bit_reader->vtable->read(bit_reader, 5), 0x18);
+        ASSERT_EQ(bit_reader->vtable->read(bit_reader, 3), 0);
+        ASSERT_EQ(bit_reader->vtable->read(bit_reader, 1), 1);
+        ASSERT_EQ(bit_reader->vtable->read(bit_reader, 7), 0x0f);
+        ASSERT_EQ(bit_reader->vtable->read(bit_reader, 8), 0xd0);
+        ASSERT_EQ(bit_reader->vtable->read(bit_reader, 16), 0x3fe0);
+        ASSERT_EQ(bit_reader->vtable->read(bit_reader, 2), 0);
+        ASSERT_EQ(bit_reader->vtable->read(bit_reader, 6), 0x2f);
+        ASSERT_EQ(bit_reader->vtable->read(bit_reader, 4), 0xf);
+        ASSERT_EQ(bit_reader->vtable->read(bit_reader, 8), 1);
+    }
+
+    return TEST_SUCCESS;
+}
+
 int main(void)
 {
     RUN_TEST(test_tans_buf_bit_write_read_rev);
+    RUN_TEST(test_tans_static_buf_bit_write_read_rev);
 }
index cdca52ef707bfce635696fcd9cb3d9848a932a88..e7d15ff944e6fe3d0221174e75243661b899bdad 100644 (file)
@@ -131,9 +131,45 @@ enum test_result test_tans_encode_decode_long_stream(void)
     return TEST_SUCCESS;
 }
 
+enum test_result test_tans_encode_decode_long_stream_bulk(void)
+{
+    struct tans_freq_tbl freq_tbl;
+    struct tans_symbol_tbl symbol_tbl;
+    struct tans_encode_st encode_st;
+    struct tans_decode_st decode_st;
+    struct tans_rev_buf_bit_reader buf_bit_reader;
+    uint8_t data[65536];
+    double p[256];
+    uint8_t rec[65536];
+    uint8_t buf[4 + 32768] = {0};
+    uint16_t n_symbols = 256;
+    uint16_t log2_tblsz = 10;
+    uint32_t i;
+
+    p[0] = 0.75;
+    for (i = 1; i < n_symbols; ++i) p[i] = 0.25 / n_symbols;
+    ASSERT_NE(tans_freq_tbl_init(&freq_tbl, n_symbols, p, log2_tblsz), -1);
+    ASSERT_EQ(tans_symbol_tbl_init(&symbol_tbl, &freq_tbl), 0);
+    tans_encode_st_init(&encode_st, &symbol_tbl);
+    tans_decode_st_init(&decode_st, &symbol_tbl);
+    tans_rev_buf_bit_reader_init(&buf_bit_reader, buf, sizeof buf);
+
+    for (i = 0; i < 65536; ++i) {
+        data[i] = (uint8_t) (i % 4 == 3 ? i / 4 : 0);
+    }
+
+    buf_bit_reader.bit = (uint32_t) tans_encode_st_encode(&encode_st, data, sizeof data, buf + 4);
+    decode_st.x = (uint16_t) (encode_st.x - (1 << log2_tblsz));
+
+    ASSERT_EQ(tans_decode_st_decode(&decode_st, rec, sizeof rec, buf + 4, buf_bit_reader.bit), 0);
+
+    return TEST_SUCCESS;
+}
+
 int main(void)
 {
     RUN_TEST(test_tans_encode_decode_equal_freq);
     RUN_TEST(test_tans_encode_decode_high_zero_probability);
     RUN_TEST(test_tans_encode_decode_long_stream);
+    RUN_TEST(test_tans_encode_decode_long_stream_bulk);
 }