From: Geoffrey Allott Date: Mon, 1 Aug 2022 21:02:34 +0000 (+0100) Subject: create bulk bit readers/writers X-Git-Url: https://git.pointlesshacks.com/?a=commitdiff_plain;h=d5972aa32ef27b4bb9c00676a9f34df9af247a8b;p=tANS.git create bulk bit readers/writers --- diff --git a/Makefile b/Makefile index 2a5b509..e25bfa5 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -CFLAGS = -Isrc -g -Wall -Wextra -Wconversion -fsanitize=address -fsanitize=undefined +CFLAGS = -Isrc -g -O2 -Wall -Wextra -Wconversion -fsanitize=undefined -fsanitize=address LDFLAGS = -lasan -lubsan OBJS = $(patsubst %.c, %.o, $(wildcard src/*.o)) @@ -20,8 +20,8 @@ test/test_tans_bit_reader.o: src/tans_buf_bit_reader.h src/tans_file_bit_reader. test/test_tans_bit_reader: src/tans_buf_bit_reader.o src/tans_file_bit_reader.o test/test_tans_bit_writer.o: src/tans_buf_bit_writer.h src/tans_bit_writer.h test/test_tans_bit_writer: src/tans_buf_bit_writer.o -test/test_tans_bit_read_write.o: src/tans_buf_bit_writer.h src/tans_bit_writer.h src/tans_rev_buf_bit_reader.h src/tans_bit_reader.h -test/test_tans_bit_read_write: src/tans_buf_bit_writer.o src/tans_rev_buf_bit_reader.o +test/test_tans_bit_read_write.o: src/tans_buf_bit_writer.h src/tans_bit_writer.h src/tans_rev_buf_bit_reader.h src/tans_bit_reader.h src/tans_static_buf_bit_writer.h +test/test_tans_bit_read_write: src/tans_buf_bit_writer.o src/tans_rev_buf_bit_reader.o src/tans_static_buf_bit_writer.o test/test_tans_encode_st.o: src/tans_buf_bit_writer.h src/tans_bit_writer.h src/tans_encode_st.h src/tans_symbol_tbl.h src/tans_freq_tbl.h test/test_tans_encode_st: src/tans_buf_bit_writer.o src/tans_encode_st.o src/tans_encode_tbl.o src/tans_symbol_tbl.o src/tans_freq_tbl.o test/test_tans_encode_decode.o: src/tans_rev_buf_bit_reader.h src/tans_bit_reader.h src/tans_decode_st.h src/tans_decode_tbl.h src/tans_buf_bit_writer.h src/tans_bit_writer.h src/tans_encode_st.h src/tans_encode_tbl.h src/tans_symbol_tbl.h src/tans_freq_tbl.h diff --git a/src/tans_decode_buf_st.h b/src/tans_decode_buf_st.h new file mode 100644 index 0000000..59e515a --- /dev/null +++ b/src/tans_decode_buf_st.h @@ -0,0 +1,13 @@ +#pragma once + +#include "tans_decode_tbl.h" + +#include + +struct tans_decode_buf_st { + struct tans_decode_tbl decode_tbl; + uint16_t x; +}; + +void tans_decode_buf_st_init(struct tans_decode_buf_st *self, struct tans_symbol_tbl *symbol_tbl); +uint16_t tans_decode_buf_st_next(struct tans_decode_buf_st *self, struct tans_bit_reader *bit_reader); diff --git a/src/tans_decode_st.c b/src/tans_decode_st.c index 55d9ddd..285ab23 100644 --- a/src/tans_decode_st.c +++ b/src/tans_decode_st.c @@ -22,3 +22,30 @@ uint16_t tans_decode_st_next(struct tans_decode_st *self, struct tans_bit_reader return (uint16_t) ret; } + +static inline uint32_t get_uint32(uint8_t buf[const static 4]) +{ + return (uint32_t) buf[0] | (uint32_t) buf[1] << 8 | (uint32_t) buf[2] << 16 | (uint32_t) buf[3] << 24; +} + +size_t tans_decode_st_decode(struct tans_decode_st *self, uint8_t *data, size_t len, uint8_t *buf, uint32_t bits) +{ + size_t i; + struct tans_decode_tbl_entry t; + uint32_t bit, byte, value; + + for (i = 0; i < len; ++i) { + t = self->decode_tbl.entries[self->x]; + bit = bits & 7; + bit += (uint32_t) 8 * (bit == 0); + byte = (uint32_t) ((bits + 7) >> 3); + value = get_uint32(buf - 4 + byte); + value >>= (24 + bit - t.nb_bits) & 31; + value &= (uint32_t) ((1 << t.nb_bits) - 1); + self->x = (uint16_t) (t.new_x + value); + data[len-i-1] = t.symbol; + bits -= t.nb_bits; + } + + return bits; +} diff --git a/src/tans_decode_st.h b/src/tans_decode_st.h index 09165a8..238ab56 100644 --- a/src/tans_decode_st.h +++ b/src/tans_decode_st.h @@ -3,6 +3,8 @@ #include "tans_decode_tbl.h" #include "tans_bit_reader.h" +#include + struct tans_decode_st { struct tans_decode_tbl decode_tbl; uint16_t x; @@ -10,3 +12,4 @@ struct tans_decode_st { void tans_decode_st_init(struct tans_decode_st *self, struct tans_symbol_tbl *symbol_tbl); uint16_t tans_decode_st_next(struct tans_decode_st *self, struct tans_bit_reader *bit_reader); +size_t tans_decode_st_decode(struct tans_decode_st *self, uint8_t *data, size_t len, uint8_t *buf, uint32_t bits); diff --git a/src/tans_encode_st.c b/src/tans_encode_st.c index e2768ed..6c80a89 100644 --- a/src/tans_encode_st.c +++ b/src/tans_encode_st.c @@ -23,3 +23,40 @@ uint16_t tans_encode_st_next(struct tans_encode_st *self, struct tans_bit_writer return written; } + +static inline uint32_t get_uint32(uint8_t buf[const static 4]) +{ + return (uint32_t) buf[0] | (uint32_t) (buf[1] << 8) | (uint32_t) (buf[2] << 16) | (uint32_t) (buf[3] << 24); +} + +static inline void set_uint32(uint8_t buf[static 4], uint32_t value) +{ + buf[0] = (uint8_t) value; + buf[1] = (uint8_t) (value >> 8); + buf[2] = (uint8_t) (value >> 16); + buf[3] = (uint8_t) (value >> 24); +} + +uint32_t tans_encode_st_encode(struct tans_encode_st *self, uint8_t *data, uint32_t len, uint8_t *buf) +{ + uint8_t nb_bits; + uint32_t i, written = 0; + uint32_t bit, byte, value; + uint8_t symbol; + + for (i = 0; i < len; ++i) { + symbol = data[i]; + nb_bits = (uint8_t) ((self->x + self->symbol_tbl.entries[symbol].nb) >> (self->symbol_tbl.log2_tblsz + 1)); + bit = written & 7; + byte = (uint32_t) (written >> 3); + value = (uint32_t) self->x; + value &= (uint32_t) ((1 << nb_bits) - 1); + value <<= bit; + value |= get_uint32(buf + byte); + set_uint32(buf + byte, value); + written += nb_bits; + self->x = self->encode_tbl.entries[(uint16_t) (self->symbol_tbl.entries[symbol].start + (self->x >> nb_bits))].x; + } + + return written; +} diff --git a/src/tans_encode_st.h b/src/tans_encode_st.h index e31ef79..10ca6f3 100644 --- a/src/tans_encode_st.h +++ b/src/tans_encode_st.h @@ -11,3 +11,4 @@ struct tans_encode_st { void tans_encode_st_init(struct tans_encode_st *self, const struct tans_symbol_tbl *symbol_tbl); uint16_t tans_encode_st_next(struct tans_encode_st *self, struct tans_bit_writer *bit_writer, uint8_t symbol); +uint32_t tans_encode_st_encode(struct tans_encode_st *self, uint8_t *data, uint32_t len, uint8_t *buf); diff --git a/src/tans_static_buf_bit_writer.c b/src/tans_static_buf_bit_writer.c new file mode 100644 index 0000000..6b05248 --- /dev/null +++ b/src/tans_static_buf_bit_writer.c @@ -0,0 +1,44 @@ +#include "tans_static_buf_bit_writer.h" + +static inline uint32_t get_uint32(uint8_t buf[const static 4]) +{ + return (uint32_t) (buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24)); +} + +static inline void set_uint32(uint8_t buf[static 4], uint32_t value) +{ + buf[0] = (uint8_t) value; + buf[1] = (uint8_t) (value >> 8); + buf[2] = (uint8_t) (value >> 16); + buf[3] = (uint8_t) (value >> 24); +} + +static uint16_t tans_static_buf_bit_writer_write(struct tans_bit_writer *writer, uint32_t value, uint8_t bits) +{ + struct tans_static_buf_bit_writer *self = (struct tans_static_buf_bit_writer *) writer; + uint32_t bit = self->bit % 8; + uint32_t byte = self->bit / 8; + + if (self->bit + bits > TANS_STATIC_BUF_BIT_WRITER_BUFSZ * 8) { + return (uint16_t) -1; + } + + self->bit += bits; + + value &= (uint32_t) ((1 << bits) - 1); + value <<= bit; + value |= get_uint32(self->buf + byte); + set_uint32(self->buf + byte, value); + + return bits; +} + +static const struct tans_bit_writer_ops vtable = { + .write = tans_static_buf_bit_writer_write, +}; + +void tans_static_buf_bit_writer_init(struct tans_static_buf_bit_writer *self) +{ + self->vtable = &vtable; + self->bit = 0; +} diff --git a/src/tans_static_buf_bit_writer.h b/src/tans_static_buf_bit_writer.h new file mode 100644 index 0000000..da1b430 --- /dev/null +++ b/src/tans_static_buf_bit_writer.h @@ -0,0 +1,14 @@ +#pragma once + +#include "tans_bit_writer.h" + +#define TANS_STATIC_BUF_BIT_WRITER_BUFSZ (256 * 1024) +#define TANS_STATIC_BUF_BIT_WRITER_OVERFLOW 4 + +struct tans_static_buf_bit_writer { + const struct tans_bit_writer_ops *vtable; + uint32_t bit; + uint8_t buf[TANS_STATIC_BUF_BIT_WRITER_BUFSZ + TANS_STATIC_BUF_BIT_WRITER_OVERFLOW]; +}; + +void tans_static_buf_bit_writer_init(struct tans_static_buf_bit_writer *self); diff --git a/test/test_tans_bit_read_write.c b/test/test_tans_bit_read_write.c index 4ce2153..cdc9396 100644 --- a/test/test_tans_bit_read_write.c +++ b/test/test_tans_bit_read_write.c @@ -2,6 +2,7 @@ #include "tans_buf_bit_writer.h" #include "tans_rev_buf_bit_reader.h" +#include "tans_static_buf_bit_writer.h" enum test_result test_tans_buf_bit_write_read_rev(void) { @@ -42,7 +43,57 @@ enum test_result test_tans_buf_bit_write_read_rev(void) return TEST_SUCCESS; } +enum test_result test_tans_static_buf_bit_write_read_rev(void) +{ + uint32_t i; + struct tans_static_buf_bit_writer buf_bit_writer; + struct tans_bit_writer *bit_writer = (struct tans_bit_writer *) &buf_bit_writer; + struct tans_rev_buf_bit_reader buf_bit_reader; + struct tans_bit_reader *bit_reader = (struct tans_bit_reader *) &buf_bit_reader; + uint8_t buf[64] = {0}; + + tans_static_buf_bit_writer_init(&buf_bit_writer); + tans_rev_buf_bit_reader_init(&buf_bit_reader, buf, sizeof buf); + ASSERT_EQ(bit_writer->vtable->write(bit_writer, 0, 0), 0); + + for (i = 0; i < 8; ++i) { + ASSERT_EQ(bit_writer->vtable->write(bit_writer, 1, 8), 8); + ASSERT_EQ(bit_writer->vtable->write(bit_writer, 0xf, 4), 4); + ASSERT_EQ(bit_writer->vtable->write(bit_writer, 0x2f, 6), 6); + ASSERT_EQ(bit_writer->vtable->write(bit_writer, 0, 2), 2); + ASSERT_EQ(bit_writer->vtable->write(bit_writer, 0x3fe0, 16), 16); + ASSERT_EQ(bit_writer->vtable->write(bit_writer, 0xd0, 8), 8); + ASSERT_EQ(bit_writer->vtable->write(bit_writer, 0x0f, 7), 7); + ASSERT_EQ(bit_writer->vtable->write(bit_writer, 1, 1), 1); + ASSERT_EQ(bit_writer->vtable->write(bit_writer, 0, 3), 3); + ASSERT_EQ(bit_writer->vtable->write(bit_writer, 0x18, 5), 5); + ASSERT_EQ(bit_writer->vtable->write(bit_writer, 0xf, 4), 4); + } + + for (i = 0; i < 64; ++i) { + buf[i] = buf_bit_writer.buf[i]; + } + + + for (i = 0; i < 8; ++i) { + ASSERT_EQ(bit_reader->vtable->read(bit_reader, 4), 0xf); + ASSERT_EQ(bit_reader->vtable->read(bit_reader, 5), 0x18); + ASSERT_EQ(bit_reader->vtable->read(bit_reader, 3), 0); + ASSERT_EQ(bit_reader->vtable->read(bit_reader, 1), 1); + ASSERT_EQ(bit_reader->vtable->read(bit_reader, 7), 0x0f); + ASSERT_EQ(bit_reader->vtable->read(bit_reader, 8), 0xd0); + ASSERT_EQ(bit_reader->vtable->read(bit_reader, 16), 0x3fe0); + ASSERT_EQ(bit_reader->vtable->read(bit_reader, 2), 0); + ASSERT_EQ(bit_reader->vtable->read(bit_reader, 6), 0x2f); + ASSERT_EQ(bit_reader->vtable->read(bit_reader, 4), 0xf); + ASSERT_EQ(bit_reader->vtable->read(bit_reader, 8), 1); + } + + return TEST_SUCCESS; +} + int main(void) { RUN_TEST(test_tans_buf_bit_write_read_rev); + RUN_TEST(test_tans_static_buf_bit_write_read_rev); } diff --git a/test/test_tans_encode_decode.c b/test/test_tans_encode_decode.c index cdca52e..e7d15ff 100644 --- a/test/test_tans_encode_decode.c +++ b/test/test_tans_encode_decode.c @@ -131,9 +131,45 @@ enum test_result test_tans_encode_decode_long_stream(void) return TEST_SUCCESS; } +enum test_result test_tans_encode_decode_long_stream_bulk(void) +{ + struct tans_freq_tbl freq_tbl; + struct tans_symbol_tbl symbol_tbl; + struct tans_encode_st encode_st; + struct tans_decode_st decode_st; + struct tans_rev_buf_bit_reader buf_bit_reader; + uint8_t data[65536]; + double p[256]; + uint8_t rec[65536]; + uint8_t buf[4 + 32768] = {0}; + uint16_t n_symbols = 256; + uint16_t log2_tblsz = 10; + uint32_t i; + + p[0] = 0.75; + for (i = 1; i < n_symbols; ++i) p[i] = 0.25 / n_symbols; + ASSERT_NE(tans_freq_tbl_init(&freq_tbl, n_symbols, p, log2_tblsz), -1); + ASSERT_EQ(tans_symbol_tbl_init(&symbol_tbl, &freq_tbl), 0); + tans_encode_st_init(&encode_st, &symbol_tbl); + tans_decode_st_init(&decode_st, &symbol_tbl); + tans_rev_buf_bit_reader_init(&buf_bit_reader, buf, sizeof buf); + + for (i = 0; i < 65536; ++i) { + data[i] = (uint8_t) (i % 4 == 3 ? i / 4 : 0); + } + + buf_bit_reader.bit = (uint32_t) tans_encode_st_encode(&encode_st, data, sizeof data, buf + 4); + decode_st.x = (uint16_t) (encode_st.x - (1 << log2_tblsz)); + + ASSERT_EQ(tans_decode_st_decode(&decode_st, rec, sizeof rec, buf + 4, buf_bit_reader.bit), 0); + + return TEST_SUCCESS; +} + int main(void) { RUN_TEST(test_tans_encode_decode_equal_freq); RUN_TEST(test_tans_encode_decode_high_zero_probability); RUN_TEST(test_tans_encode_decode_long_stream); + RUN_TEST(test_tans_encode_decode_long_stream_bulk); }