From: Geoffrey Allott Date: Sat, 3 Sep 2022 22:03:33 +0000 (+0100) Subject: fix combined stree/tANS so that it matches the individual output X-Git-Url: https://git.pointlesshacks.com/?a=commitdiff_plain;h=a4212d7385c7dad00cc6a6eef6f7f06c3f755552;p=tANS.git fix combined stree/tANS so that it matches the individual output --- diff --git a/src/floor_log2.h b/src/floor_log2.h index a1311b2..a0dfda5 100644 --- a/src/floor_log2.h +++ b/src/floor_log2.h @@ -4,5 +4,5 @@ static inline uint32_t floor_log2(uint32_t x) { - return 31 - (uint32_t) __builtin_clz(x); + return x == 0 ? 0 : 31 - (uint32_t) __builtin_clz(x); } diff --git a/src/tANS.c b/src/tANS.c index 4674408..c7db81c 100644 --- a/src/tANS.c +++ b/src/tANS.c @@ -121,21 +121,22 @@ static int tANS_init_symbol_tbls(struct tANS_symbol_tbl symbol_tbls[static 3], c static int stree_tANS_compress_file(FILE* input, FILE *output) { - uint32_t len, bits; + uint32_t stree_len, pos, len, bits, bits_and_x; + uint16_t u16_len; uint8_t *read_buf; uint8_t *enc_buf; uint8_t *aux_buf; uint8_t *write_buf; struct tANS_symbol_tbl *symbol_tbls; struct tANS_rl_encode_st *st; - uint32_t read_sz = 1024; + uint32_t read_sz = INIT_READSZ; uint32_t magic = TANS_MAGIC; symbol_tbls = malloc(sizeof(struct tANS_symbol_tbl) * N_AUX); st = malloc(sizeof(struct tANS_rl_encode_st)); - read_buf = malloc(MAX_BUFSZ); - enc_buf = malloc(MAX_BUFSZ); - aux_buf = malloc(MAX_BUFSZ); + read_buf = malloc(STREE_READSZ); + enc_buf = malloc(STREE_READSZ); + aux_buf = malloc(STREE_READSZ); write_buf = calloc(tANS_max_compressed_size(MAX_BUFSZ), 1); if (!symbol_tbls || !st || !read_buf || !enc_buf || !aux_buf || !write_buf) goto fail; @@ -145,22 +146,28 @@ static int stree_tANS_compress_file(FILE* input, FILE *output) if (tANS_init_symbol_tbls(symbol_tbls, ref_enc, sizeof ref_enc) != 0) goto fail; while (!feof(input)) { - tANS_rl_encode_st_init(st, symbol_tbls); - - len = (uint32_t) fread(read_buf, 1, read_sz, input); - if (len == 0) break; - if (stree_encode(len, read_buf, enc_buf, aux_buf) != 0) goto fail; - if (fwrite(&len, sizeof len, 1, output) != 1) goto fail; - bits = tANS_rl_encode(st, enc_buf, len, write_buf); - if (fwrite(&bits, sizeof bits, 1, output) != 1) goto fail; - if (fwrite(&st->x, sizeof st->x, 1, output) != 1) goto fail; - if (fwrite(write_buf, (bits + 7) / 8, 1, output) != 1) goto fail; - - memset(write_buf, 0, (bits + 7) / 8); - - read_sz *= 2; - if (read_sz > MAX_BUFSZ) read_sz = MAX_BUFSZ; - if (tANS_init_symbol_tbls(symbol_tbls, enc_buf, len) != 0) goto fail; + stree_len = (uint32_t) fread(read_buf, 1, STREE_READSZ, input); + if (stree_len == 0) break; + if (stree_encode(stree_len, read_buf, enc_buf, aux_buf) != 0) goto fail; + + for (pos = 0; pos < stree_len; pos += len) { + tANS_rl_encode_st_init(st, symbol_tbls); + len = stree_len - pos < read_sz ? stree_len - pos : read_sz; + + st->x += enc_buf[pos+len-1]; + bits = tANS_rl_encode(st, enc_buf + pos, len - 1, write_buf); + u16_len = (uint16_t) (len - 1); + bits_and_x = (st->x & ((1 << LOG2_TBLSZ) - 1)) | (bits << LOG2_TBLSZ); + if (fwrite(&u16_len, sizeof u16_len, 1, output) != 1) goto fail; + if (fwrite(&bits_and_x, sizeof bits_and_x, 1, output) != 1) goto fail; + if (fwrite(write_buf, (bits + 7) / 8, 1, output) != 1) goto fail; + + memset(write_buf, 0, (bits + 7) / 8); + + read_sz *= 2; + if (read_sz > MAX_BUFSZ) read_sz = MAX_BUFSZ; + if (tANS_init_symbol_tbls(symbol_tbls, enc_buf + pos, len - 1) != 0) goto fail; + } } if (ferror(input)) goto fail; @@ -185,7 +192,8 @@ fail: static int stree_tANS_decompress_file(FILE* input, FILE *output) { - uint32_t len, bits; + uint32_t stree_len, len, bits, bits_and_x; + uint16_t u16_len; uint8_t *read_buf; uint8_t *enc_buf; uint8_t *aux_buf; @@ -197,9 +205,9 @@ static int stree_tANS_decompress_file(FILE* input, FILE *output) symbol_tbls = malloc(sizeof(struct tANS_symbol_tbl) * N_AUX); st = malloc(sizeof(struct tANS_rl_decode_st)); read_buf = malloc(tANS_max_compressed_size(MAX_BUFSZ)); - enc_buf = malloc(MAX_BUFSZ); - aux_buf = malloc(MAX_BUFSZ); - write_buf = malloc(MAX_BUFSZ); + enc_buf = malloc(STREE_READSZ); + aux_buf = malloc(STREE_READSZ); + write_buf = malloc(STREE_READSZ); if (!symbol_tbls || !st || !read_buf || !enc_buf || !aux_buf || !write_buf) goto fail; @@ -212,23 +220,29 @@ static int stree_tANS_decompress_file(FILE* input, FILE *output) if (tANS_init_symbol_tbls(symbol_tbls, ref_enc, sizeof ref_enc) != 0) goto fail; while (!feof(input)) { - tANS_rl_decode_st_init(st, symbol_tbls); - - if (fread(&len, sizeof len, 1, input) != 1) break; - if (fread(&bits, sizeof bits, 1, input) != 1) goto fail; - if (fread(&st->x, sizeof st->x, 1, input) != 1) goto fail; - if (fread(read_buf + 4, (bits + 7) / 8, 1, input) != 1) goto fail; - st->x &= symbol_tbls[0].tblsz - 1; - bits = tANS_rl_decode(st, enc_buf, len, read_buf + 4, bits); - if (bits != 0) { - fprintf(stderr, "tANS: corrupted file\n"); - goto fail; + for (stree_len = 0; stree_len < STREE_READSZ; stree_len += len) { + tANS_rl_decode_st_init(st, symbol_tbls); + if (fread(&u16_len, sizeof u16_len, 1, input) != 1) break; + if (fread(&bits_and_x, sizeof bits_and_x, 1, input) != 1) goto fail; + len = (uint32_t) u16_len + 1; + bits = bits_and_x >> LOG2_TBLSZ; + st->x = bits_and_x & ((1 << LOG2_TBLSZ) - 1); + if (fread(read_buf + 4, (bits + 7) / 8, 1, input) != 1) goto fail; + bits = tANS_rl_decode(st, enc_buf + stree_len, len - 1, read_buf + 4, bits); + if (bits != 0) { + fprintf(stderr, "tANS: corrupted file\n"); + goto fail; + } + enc_buf[stree_len+len-1] = (uint8_t) st->x; + if (tANS_init_symbol_tbls(symbol_tbls, enc_buf + stree_len, len - 1) != 0) goto fail; } - if (stree_decode(len, enc_buf, write_buf, aux_buf) != 0) goto fail; - if (fwrite(write_buf, len, 1, output) != 1) goto fail; - if (tANS_init_symbol_tbls(symbol_tbls, enc_buf, len) != 0) goto fail; + if (stree_len == 0) break; + if (stree_decode(stree_len, enc_buf, write_buf, aux_buf) != 0) goto fail; + if (fwrite(write_buf, stree_len, 1, output) != 1) goto fail; } + if (ferror(input)) goto fail; + free(symbol_tbls); free(st); free(read_buf); @@ -256,7 +270,7 @@ static int tANS_compress_file(FILE* input, FILE *output) struct tANS_symbol_tbl *symbol_tbls; struct tANS_rl_encode_st *st; uint32_t read_sz = INIT_READSZ; - uint32_t magic = TANS_ONLY_MAGIC; + uint32_t magic = TANS_MAGIC; symbol_tbls = malloc(sizeof(struct tANS_symbol_tbl) * N_AUX); st = malloc(sizeof(struct tANS_rl_encode_st)); @@ -324,8 +338,8 @@ static int tANS_decompress_file(FILE* input, FILE *output) if (!symbol_tbls || !st || !read_buf || !write_buf) goto fail; if (fread(&magic, sizeof magic, 1, input) != 1) goto fail; - if (magic != TANS_ONLY_MAGIC) { - fprintf(stderr, "tANS: not a valid tANS only file\n"); + if (magic != TANS_MAGIC) { + fprintf(stderr, "tANS: not a valid tANS file\n"); goto fail; } diff --git a/src/tANS_constants.h b/src/tANS_constants.h index 726df7f..462c18b 100644 --- a/src/tANS_constants.h +++ b/src/tANS_constants.h @@ -1,7 +1,6 @@ #pragma once #define TANS_MAGIC 0xfac0162a -#define TANS_ONLY_MAGIC 0xfac0162b #define TANS_LOG2_MAX_TBLSZ 12 #define TANS_MAX_TBLSZ (1 << TANS_LOG2_MAX_TBLSZ) #define TANS_MAX_SYMBOLS 1024