From ced69720c73b8ab08cc4b4729b5b54e88ddb80e7 Mon Sep 17 00:00:00 2001 From: Geoffrey Allott Date: Sat, 3 Sep 2022 17:55:30 +0100 Subject: [PATCH] improve compression ratios by: resetting probability each block; reducing the size of each tANS block; encoding len and bits as one variable; including one byte in the tANS initial state --- src/tANS.c | 63 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/src/tANS.c b/src/tANS.c index c81fa2a..0a24c8a 100644 --- a/src/tANS.c +++ b/src/tANS.c @@ -15,7 +15,7 @@ ref: https://arxiv.org/abs/1311.2540 #include #include -#define MAX_BUFSZ 1048576 +#define MAX_BUFSZ 16384 #define STREE_READSZ 16777216 #define INIT_READSZ 1024 #define N_SYMBOLS 256 @@ -86,12 +86,13 @@ static int stree_tANS_compress_file(FILE* input, FILE *output) for (i = 0; i < len; ++i) { ++p[0][enc_buf[i]]; if (enc_buf[i] == 0) { - while (enc_buf[i] == 0) { + do { count = 0; - while (enc_buf[++i] == 0 && count < 255) ++count; + while (i < len - 1 && enc_buf[++i] == 0 && count < 255) ++count; ++p[1][count]; - } - ++p[2][enc_buf[i]]; + } while (i < len - 1 && enc_buf[i] == 0); + if (i < len) + ++p[2][enc_buf[i]]; } } @@ -181,12 +182,13 @@ static int stree_tANS_decompress_file(FILE* input, FILE *output) for (i = 0; i < len; ++i) { ++p[0][enc_buf[i]]; if (enc_buf[i] == 0) { - while (enc_buf[i] == 0) { + do { count = 0; - while (enc_buf[++i] == 0 && count < 255) ++count; + while (i < len - 1 && enc_buf[++i] == 0 && count < 255) ++count; ++p[1][count]; - } - ++p[2][enc_buf[i]]; + } while (i < len - 1 && enc_buf[i] == 0); + if (i < len) + ++p[2][enc_buf[i]]; } } } @@ -213,7 +215,7 @@ fail: static int tANS_compress_file(FILE* input, FILE *output) { - uint32_t i, len, bits, count; + uint32_t i, len, bits, count, len_and_bits; uint8_t *read_buf; uint8_t *write_buf; double p[N_AUX][N_SYMBOLS] = {0}; @@ -221,7 +223,7 @@ static int tANS_compress_file(FILE* input, FILE *output) struct tANS_symbol_tbl *symbol_tbls; struct tANS_rl_encode_st *st; const uint16_t log2_tblsz = LOG2_TBLSZ; - uint32_t read_sz = 1024; + uint32_t read_sz = INIT_READSZ; uint32_t magic = TANS_ONLY_MAGIC; freq_tbls = malloc(sizeof(struct tANS_freq_tbl) * N_AUX); @@ -244,21 +246,25 @@ static int tANS_compress_file(FILE* input, FILE *output) len = (uint32_t) fread(read_buf, 1, read_sz, input); if (len == 0) break; + memset(p, 0, sizeof p); + for (i = 0; i < len; ++i) { ++p[0][read_buf[i]]; if (read_buf[i] == 0) { - while (read_buf[i] == 0) { + do { count = 0; - while (read_buf[++i] == 0 && count < 255) ++count; + while (i < len - 1 && read_buf[++i] == 0 && count < 255) ++count; ++p[1][count]; - } - ++p[2][read_buf[i]]; + } while (i < len - 1 && read_buf[i] == 0); + if (i < len) + ++p[2][read_buf[i]]; } } - if (fwrite(&len, sizeof len, 1, output) != 1) goto fail; - bits = tANS_rl_encode(st, read_buf, len, write_buf); - if (fwrite(&bits, sizeof bits, 1, output) != 1) goto fail; + st->x += read_buf[len-1]; + bits = tANS_rl_encode(st, read_buf, len - 1, write_buf); + len_and_bits = (len - 1) | (bits << 14); + if (fwrite(&len_and_bits, sizeof len_and_bits, 1, output) != 1) goto fail; if (fwrite(&st->x, sizeof st->x, 1, output) != 1) goto fail; if (fwrite(write_buf, (bits + 7) / 8, 1, output) != 1) goto fail; @@ -288,7 +294,7 @@ fail: static int tANS_decompress_file(FILE* input, FILE *output) { - uint32_t i, len, bits, count; + uint32_t i, len, bits, count, len_and_bits; uint8_t *read_buf; uint8_t *write_buf; double p[N_AUX][N_SYMBOLS] = {0}; @@ -319,26 +325,31 @@ static int tANS_decompress_file(FILE* input, FILE *output) } tANS_rl_decode_st_init(st, symbol_tbls); - if (fread(&len, sizeof len, 1, input) != 1) break; - if (fread(&bits, sizeof bits, 1, input) != 1) goto fail; + if (fread(&len_and_bits, sizeof len_and_bits, 1, input) != 1) break; + len = (len_and_bits & ((1 << 14) - 1)) + 1; + bits = len_and_bits >> 14; if (fread(&st->x, sizeof st->x, 1, input) != 1) goto fail; if (fread(read_buf + 4, (bits + 7) / 8, 1, input) != 1) goto fail; st->x &= symbol_tbls[0].tblsz - 1; - bits = tANS_rl_decode(st, write_buf, len, read_buf + 4, bits); + bits = tANS_rl_decode(st, write_buf, len - 1, read_buf + 4, bits); if (bits != 0) { fprintf(stderr, "tANS: corrupted file\n"); goto fail; } + write_buf[len-1] = (uint8_t) st->x; if (fwrite(write_buf, len, 1, output) != 1) goto fail; + memset(p, 0, sizeof p); + for (i = 0; i < len; ++i) { ++p[0][write_buf[i]]; if (write_buf[i] == 0) { - while (write_buf[i] == 0) { + do { count = 0; - while (write_buf[++i] == 0 && count < 255) ++count; + while (i < len - 1 && write_buf[++i] == 0 && count < 255) ++count; ++p[1][count]; - } - ++p[2][write_buf[i]]; + } while (i < len - 1 && write_buf[i] == 0); + if (i < len) + ++p[2][write_buf[i]]; } } } -- 2.34.1