From c11a76c6b15599df2d9ec6f69a9dd2ac85707fa3 Mon Sep 17 00:00:00 2001 From: Geoffrey Allott Date: Mon, 15 Aug 2022 21:08:08 +0100 Subject: [PATCH] add tANS application --- Makefile | 3 +- src/tANS.c | 120 +++++++++++++++++++++++++++++++++++++++++++ src/tANS_encode_st.c | 2 +- src/tANS_freq_tbl.c | 1 + 4 files changed, 124 insertions(+), 2 deletions(-) create mode 100644 src/tANS.c diff --git a/Makefile b/Makefile index 1c73663..bedc284 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -CFLAGS = -Isrc -g -O2 -Wall -Wextra -Wconversion -fsanitize=undefined -fsanitize=address +CFLAGS = -Isrc -g -O2 -Wall -Wextra -Wconversion -fsanitize=undefined -fsanitize=address -ftrivial-auto-var-init=pattern LDFLAGS = -lasan -lubsan OBJS = $(patsubst %.c, %.o, $(wildcard src/*.o)) @@ -18,6 +18,7 @@ $(TEST_OBJS): test/test.h $(OBJS) test/test_tANS.o: src/tANS_decode_st.h src/tANS_decode_tbl.h src/tANS_encode_st.h src/tANS_encode_tbl.h src/tANS_symbol_tbl.h src/tANS_freq_tbl.h test/test_tANS: src/tANS_decode_st.o src/tANS_decode_tbl.o src/tANS_encode_st.o src/tANS_encode_tbl.o src/tANS_symbol_tbl.o src/tANS_freq_tbl.o +src/tANS: $(OBJS) clean: rm -f $(OBJS) diff --git a/src/tANS.c b/src/tANS.c new file mode 100644 index 0000000..371e2ae --- /dev/null +++ b/src/tANS.c @@ -0,0 +1,120 @@ +/* +tabled Asymmetric Numeral Systems + +author: Geoffrey Allott +ref: https://arxiv.org/abs/1311.2540 +*/ + +#include "tANS_encode_st.h" +#include "tANS_decode_st.h" + +#include +#include +#include + +static void usage(void) +{ + printf( + "usage: tANS [-hcd] [file...]\n" + "\n" + "Compress the given files using tabled Asymmetric Numeral Systems\n" + "If `-d' is given, decompress instead.\n" + ); +} + +static int tANS_compress_file(FILE* input, FILE *output) +{ + uint32_t i, len, bits; + uint8_t read_buf[1048576]; + uint8_t write_buf[2097152] = {0}; + double p[256] = {0}; + struct tANS_freq_tbl freq_tbl; + struct tANS_symbol_tbl symbol_tbl; + struct tANS_encode_st st; + const uint16_t log2_tblsz = 10; + uint32_t read_sz = 8; + + while (!feof(input)) { + if (tANS_freq_tbl_init(&freq_tbl, 256, p, log2_tblsz) != 0) return -1; + if (tANS_symbol_tbl_init(&symbol_tbl, &freq_tbl) != 0) return -1; + tANS_encode_st_init(&st, &symbol_tbl); + + len = (uint32_t) fread(read_buf, 1, read_sz, input); + for (i = 0; i < len; ++i) ++p[read_buf[i]]; + if (fwrite(&len, sizeof len, 1, output) != 1) return -1; + bits = tANS_encode(&st, read_buf, len, write_buf); + if (fwrite(&bits, sizeof bits, 1, output) != 1) return -1; + if (fwrite(&st.x, sizeof st.x, 1, output) != 1) return -1; + if (fwrite(write_buf, (bits + 7) / 8, 1, output) != 1) return -1; + + memset(write_buf, 0, (bits + 7) / 8); + + read_sz *= 2; + if (read_sz > sizeof read_buf) read_sz = sizeof read_buf; + } + + return 0; +} + +static int tANS_decompress_file(FILE* input, FILE *output) +{ + uint32_t i, len, bits; + uint8_t read_buf[2097152]; + uint8_t write_buf[1048576]; + double p[256] = {0}; + struct tANS_freq_tbl freq_tbl; + struct tANS_symbol_tbl symbol_tbl; + struct tANS_decode_st st; + const uint16_t log2_tblsz = 10; + + while (!feof(input)) { + if (tANS_freq_tbl_init(&freq_tbl, 256, p, log2_tblsz) != 0) return -1; + if (tANS_symbol_tbl_init(&symbol_tbl, &freq_tbl) != 0) return -1; + tANS_decode_st_init(&st, &symbol_tbl); + + if (fread(&len, sizeof len, 1, input) != 1) return -1; + if (fread(&bits, sizeof bits, 1, input) != 1) return -1; + if (fread(&st.x, sizeof st.x, 1, input) != 1) return -1; + if (fread(read_buf + 4, (bits + 7) / 8, 1, input) != 1) return -1; + st.x &= symbol_tbl.tblsz - 1; + bits = tANS_decode(&st, write_buf, len, read_buf + 4, bits); + if (bits != 0) { + fprintf(stderr, "tANS: corrupted file\n"); + return -1; + } + if (fwrite(write_buf, len, 1, output) != 1) return -1; + for (i = 0; i < len; ++i) ++p[write_buf[i]]; + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + int opt, compress = 1; + + while ((opt = getopt(argc, argv, "hcd")) != -1) { + switch (opt) { + case 'h': + usage(); + return 0; + case 'c': + compress = 1; + break; + case 'd': + compress = 0; + break; + } + } + + argv += optind; + argc -= optind; + + if (argc == 0) { + if (compress) { + return tANS_compress_file(stdin, stdout) == 0; + } else { + return tANS_decompress_file(stdin, stdout) == 0; + } + } +} diff --git a/src/tANS_encode_st.c b/src/tANS_encode_st.c index 8eb9d01..763c284 100644 --- a/src/tANS_encode_st.c +++ b/src/tANS_encode_st.c @@ -9,7 +9,7 @@ void tANS_encode_st_init(struct tANS_encode_st *self, const struct tANS_symbol_t static inline uint32_t get_uint32(uint8_t buf[const static 4]) { - return (uint32_t) buf[0] | (uint32_t) (buf[1] << 8) | (uint32_t) (buf[2] << 16) | (uint32_t) (buf[3] << 24); + return (uint32_t) buf[0] | (uint32_t) buf[1] << 8 | (uint32_t) buf[2] << 16 | (uint32_t) buf[3] << 24; } static inline void set_uint32(uint8_t buf[static 4], uint32_t value) diff --git a/src/tANS_freq_tbl.c b/src/tANS_freq_tbl.c index 841b4f9..41c5bb2 100644 --- a/src/tANS_freq_tbl.c +++ b/src/tANS_freq_tbl.c @@ -19,6 +19,7 @@ int tANS_freq_tbl_init(struct tANS_freq_tbl *self, uint16_t n_symbols, double *p for (i = 0; i < n_symbols; ++i) { total_p += p[i]; } + if (total_p == 0.0) total_p = 1.0; total = 0; for (i = 0; i < n_symbols; ++i) { -- 2.34.1