From: Geoffrey Allott Date: Sat, 3 Sep 2022 15:39:32 +0000 (+0100) Subject: implement tANS-only mode X-Git-Url: https://git.pointlesshacks.com/?a=commitdiff_plain;h=c81393795730bb4b8504b0d4058a77660d2b3945;p=tANS.git implement tANS-only mode --- diff --git a/itest/test_roundtrip.sh b/itest/test_roundtrip.sh index 42491af..2fba644 100755 --- a/itest/test_roundtrip.sh +++ b/itest/test_roundtrip.sh @@ -15,6 +15,16 @@ compare_stree_only_roundtrip() { trap '' EXIT } +compare_tans_only_roundtrip() { + TEMP=$(mktemp) + trap "echo 'FAIL: compare_tans_only_roundtrip $1' >&2; rm -f $TEMP" EXIT + $TANS -ct $1 | $TANS -dt > $TEMP + diff $1 $TEMP + echo "PASS: compare_tans_only_roundtrip $1" >&2 + rm -f $TEMP + trap '' EXIT +} + compare_roundtrip() { TEMP=$(mktemp) trap "echo 'FAIL: compare_roundtrip $1' >&2; rm -f $TEMP" EXIT @@ -27,5 +37,6 @@ compare_roundtrip() { for FILE in $TEST_FILES; do compare_stree_only_roundtrip itest/$FILE + compare_tans_only_roundtrip itest/$FILE compare_roundtrip itest/$FILE done diff --git a/src/tANS.c b/src/tANS.c index e06911c..c81fa2a 100644 --- a/src/tANS.c +++ b/src/tANS.c @@ -15,11 +15,12 @@ ref: https://arxiv.org/abs/1311.2540 #include #include -#define MAX_BUFSZ 1048576 -#define INIT_READSZ 1024 -#define N_SYMBOLS 256 -#define N_AUX 3 -#define LOG2_TBLSZ 12 +#define MAX_BUFSZ 1048576 +#define STREE_READSZ 16777216 +#define INIT_READSZ 1024 +#define N_SYMBOLS 256 +#define N_AUX 3 +#define LOG2_TBLSZ 12 static void usage(void) { @@ -44,7 +45,7 @@ static size_t tANS_max_compressed_size(size_t len) return len * 2; } -static int tANS_compress_file(FILE* input, FILE *output) +static int stree_tANS_compress_file(FILE* input, FILE *output) { uint32_t i, len, bits, count; uint8_t *read_buf; @@ -128,7 +129,7 @@ fail: return -1; } -static int tANS_decompress_file(FILE* input, FILE *output) +static int stree_tANS_decompress_file(FILE* input, FILE *output) { uint32_t i, len, bits, count; uint8_t *read_buf; @@ -210,12 +211,161 @@ fail: return -1; } -static int stree_compress_file(FILE *input, FILE *output, uint32_t read_sz) +static int tANS_compress_file(FILE* input, FILE *output) +{ + uint32_t i, len, bits, count; + uint8_t *read_buf; + uint8_t *write_buf; + double p[N_AUX][N_SYMBOLS] = {0}; + struct tANS_freq_tbl *freq_tbls; + struct tANS_symbol_tbl *symbol_tbls; + struct tANS_rl_encode_st *st; + const uint16_t log2_tblsz = LOG2_TBLSZ; + uint32_t read_sz = 1024; + uint32_t magic = TANS_ONLY_MAGIC; + + freq_tbls = malloc(sizeof(struct tANS_freq_tbl) * N_AUX); + symbol_tbls = malloc(sizeof(struct tANS_symbol_tbl) * N_AUX); + st = malloc(sizeof(struct tANS_rl_encode_st)); + read_buf = malloc(MAX_BUFSZ); + write_buf = calloc(tANS_max_compressed_size(MAX_BUFSZ), 1); + + if (!freq_tbls || !symbol_tbls || !st || !read_buf || !write_buf) goto fail; + + if (fwrite(&magic, sizeof magic, 1, output) != 1) goto fail; + + while (!feof(input)) { + for (i = 0; i < N_AUX; ++i) { + if (tANS_freq_tbl_init(freq_tbls + i, N_SYMBOLS, p[i], log2_tblsz) != 0) goto fail; + if (tANS_symbol_tbl_init(symbol_tbls + i, freq_tbls + i) != 0) goto fail; + } + tANS_rl_encode_st_init(st, symbol_tbls); + + len = (uint32_t) fread(read_buf, 1, read_sz, input); + if (len == 0) break; + + for (i = 0; i < len; ++i) { + ++p[0][read_buf[i]]; + if (read_buf[i] == 0) { + while (read_buf[i] == 0) { + count = 0; + while (read_buf[++i] == 0 && count < 255) ++count; + ++p[1][count]; + } + ++p[2][read_buf[i]]; + } + } + + if (fwrite(&len, sizeof len, 1, output) != 1) goto fail; + bits = tANS_rl_encode(st, read_buf, len, write_buf); + if (fwrite(&bits, sizeof bits, 1, output) != 1) goto fail; + if (fwrite(&st->x, sizeof st->x, 1, output) != 1) goto fail; + if (fwrite(write_buf, (bits + 7) / 8, 1, output) != 1) goto fail; + + memset(write_buf, 0, (bits + 7) / 8); + + read_sz *= 2; + if (read_sz > MAX_BUFSZ) read_sz = MAX_BUFSZ; + } + + if (ferror(input)) goto fail; + + free(freq_tbls); + free(symbol_tbls); + free(st); + free(read_buf); + free(write_buf); + return 0; + +fail: + free(freq_tbls); + free(symbol_tbls); + free(st); + free(read_buf); + free(write_buf); + return -1; +} + +static int tANS_decompress_file(FILE* input, FILE *output) +{ + uint32_t i, len, bits, count; + uint8_t *read_buf; + uint8_t *write_buf; + double p[N_AUX][N_SYMBOLS] = {0}; + struct tANS_freq_tbl *freq_tbls; + struct tANS_symbol_tbl *symbol_tbls; + struct tANS_rl_decode_st *st; + const uint16_t log2_tblsz = LOG2_TBLSZ; + uint32_t magic; + + freq_tbls = malloc(sizeof(struct tANS_freq_tbl) * N_AUX); + symbol_tbls = malloc(sizeof(struct tANS_symbol_tbl) * N_AUX); + st = malloc(sizeof(struct tANS_rl_decode_st)); + read_buf = malloc(tANS_max_compressed_size(MAX_BUFSZ)); + write_buf = malloc(MAX_BUFSZ); + + if (!freq_tbls || !symbol_tbls || !st || !read_buf || !write_buf) goto fail; + + if (fread(&magic, sizeof magic, 1, input) != 1) goto fail; + if (magic != TANS_ONLY_MAGIC) { + fprintf(stderr, "tANS: not a valid tANS only file\n"); + goto fail; + } + + while (!feof(input)) { + for (i = 0; i < N_AUX; ++i) { + if (tANS_freq_tbl_init(freq_tbls + i, N_SYMBOLS, p[i], log2_tblsz) != 0) goto fail; + if (tANS_symbol_tbl_init(symbol_tbls + i, freq_tbls + i) != 0) goto fail; + } + tANS_rl_decode_st_init(st, symbol_tbls); + + if (fread(&len, sizeof len, 1, input) != 1) break; + if (fread(&bits, sizeof bits, 1, input) != 1) goto fail; + if (fread(&st->x, sizeof st->x, 1, input) != 1) goto fail; + if (fread(read_buf + 4, (bits + 7) / 8, 1, input) != 1) goto fail; + st->x &= symbol_tbls[0].tblsz - 1; + bits = tANS_rl_decode(st, write_buf, len, read_buf + 4, bits); + if (bits != 0) { + fprintf(stderr, "tANS: corrupted file\n"); + goto fail; + } + if (fwrite(write_buf, len, 1, output) != 1) goto fail; + for (i = 0; i < len; ++i) { + ++p[0][write_buf[i]]; + if (write_buf[i] == 0) { + while (write_buf[i] == 0) { + count = 0; + while (write_buf[++i] == 0 && count < 255) ++count; + ++p[1][count]; + } + ++p[2][write_buf[i]]; + } + } + } + + free(freq_tbls); + free(symbol_tbls); + free(st); + free(read_buf); + free(write_buf); + return 0; + +fail: + free(freq_tbls); + free(symbol_tbls); + free(st); + free(read_buf); + free(write_buf); + return -1; +} + +static int stree_compress_file(FILE *input, FILE *output) { uint32_t len; uint8_t *read_buf; uint8_t *write_buf; uint8_t *aux_buf; + uint32_t read_sz = STREE_READSZ; read_buf = malloc(read_sz); write_buf = malloc(read_sz); @@ -243,12 +393,13 @@ fail: return -1; } -static int stree_decompress_file(FILE *input, FILE *output, uint32_t read_sz) +static int stree_decompress_file(FILE *input, FILE *output) { uint32_t len; uint8_t *read_buf; uint8_t *write_buf; uint8_t *aux_buf; + uint32_t read_sz = STREE_READSZ; read_buf = malloc(read_sz); write_buf = malloc(read_sz); @@ -282,12 +433,41 @@ enum mode { mode_tans_only, }; +static int compress_file(FILE *input, FILE *output, enum mode mode) +{ + switch (mode) { + case mode_both: + return stree_tANS_compress_file(input, output); + case mode_stree_only: + return stree_compress_file(input, output); + case mode_tans_only: + return tANS_compress_file(input, output); + default: + return -1; + } +} + +static int decompress_file(FILE *input, FILE *output, enum mode mode) +{ + switch (mode) { + case mode_both: + return stree_tANS_decompress_file(input, output); + case mode_stree_only: + return stree_decompress_file(input, output); + case mode_tans_only: + return tANS_decompress_file(input, output); + default: + return -1; + } +} + int main(int argc, char *argv[]) { - int ret, opt, to_stdout = 0, compress = 1; + int ret, opt, from_stdin, to_stdout = 0, compress = 1; enum mode mode = mode_both; - FILE *input = stdin, *output = stdout; - const char *suffix = ".ans"; + FILE *input, *output; + const char *default_suffix = ".ans"; + const char *suffix = (const char *) 0; char outpath[1024]; while ((opt = getopt(argc, argv, "hcdstzS:")) != -1) { @@ -303,9 +483,11 @@ int main(int argc, char *argv[]) break; case 's': mode = mode_stree_only; + default_suffix = ".stree"; break; case 't': mode = mode_tans_only; + default_suffix = ".tans"; break; case 'z': compress = 1; @@ -316,61 +498,52 @@ int main(int argc, char *argv[]) } } + suffix = suffix ? suffix : default_suffix; + argv += optind; argc -= optind; - if (argc == 0) { - if (compress) { - if (mode == mode_stree_only) { - return stree_compress_file(input, output, MAX_BUFSZ) != 0; - } else { - return tANS_compress_file(input, output) != 0; - } - } else { - if (mode == mode_stree_only) { - return stree_decompress_file(input, output, MAX_BUFSZ) != 0; - } else { - return tANS_decompress_file(input, output) != 0; - } - } - } else { - for (; argc >= 1; --argc, ++argv) { + from_stdin = argc == 0; + to_stdout |= from_stdin; + + do { + if (from_stdin) + input = stdin; + else input = fopen(argv[0], "rb"); - if (!input) { + + if (!input) { + fprintf(stderr, "tANS: fopen: %s: %s\n", argv[0], strerror(errno)); + return 2; + } + + if (to_stdout) { + output = stdout; + } else { + strncpy(outpath, argv[0], sizeof outpath - 1); + strncpy(outpath + strlen(outpath), suffix, sizeof outpath - 1 - strlen(outpath)); + output = fopen(outpath, "wb"); + if (!output) { fprintf(stderr, "tANS: fopen: %s: %s\n", argv[0], strerror(errno)); return 2; } + } - if (!to_stdout) { - strncpy(outpath, argv[0], sizeof outpath - 1); - strncpy(outpath + strlen(outpath), suffix, sizeof outpath - 1 - strlen(outpath)); - output = fopen(outpath, "wb"); - if (!output) { - fprintf(stderr, "tANS: fopen: %s: %s\n", argv[0], strerror(errno)); - return 2; - } - } - - if (compress) { - if (mode == mode_stree_only) { - ret = stree_compress_file(input, output, MAX_BUFSZ) != 0; - } else { - ret = tANS_compress_file(input, output) != 0; - } - } else { - if (mode == mode_stree_only) { - ret = stree_decompress_file(input, output, MAX_BUFSZ) != 0; - } else { - ret = tANS_decompress_file(input, output); - } - } + if (compress) + ret = compress_file(input, output, mode); + else + ret = decompress_file(input, output, mode); + if (!from_stdin) fclose(input); - if (!to_stdout) fclose(output); - if (ret != 0) return 1; - } - } + if (!to_stdout) + fclose(output); + + if (ret != 0) return 1; + + ++argv; + } while (--argc > 0); return 0; } diff --git a/src/tANS_constants.h b/src/tANS_constants.h index 462c18b..726df7f 100644 --- a/src/tANS_constants.h +++ b/src/tANS_constants.h @@ -1,6 +1,7 @@ #pragma once #define TANS_MAGIC 0xfac0162a +#define TANS_ONLY_MAGIC 0xfac0162b #define TANS_LOG2_MAX_TBLSZ 12 #define TANS_MAX_TBLSZ (1 << TANS_LOG2_MAX_TBLSZ) #define TANS_MAX_SYMBOLS 1024