#include <string.h>
#include <unistd.h>
-#define MAX_BUFSZ 1048576
-#define INIT_READSZ 1024
-#define N_SYMBOLS 256
-#define N_AUX 3
-#define LOG2_TBLSZ 12
+#define MAX_BUFSZ 1048576
+#define STREE_READSZ 16777216
+#define INIT_READSZ 1024
+#define N_SYMBOLS 256
+#define N_AUX 3
+#define LOG2_TBLSZ 12
static void usage(void)
{
return len * 2;
}
-static int tANS_compress_file(FILE* input, FILE *output)
+static int stree_tANS_compress_file(FILE* input, FILE *output)
{
uint32_t i, len, bits, count;
uint8_t *read_buf;
return -1;
}
-static int tANS_decompress_file(FILE* input, FILE *output)
+static int stree_tANS_decompress_file(FILE* input, FILE *output)
{
uint32_t i, len, bits, count;
uint8_t *read_buf;
return -1;
}
-static int stree_compress_file(FILE *input, FILE *output, uint32_t read_sz)
+static int tANS_compress_file(FILE* input, FILE *output)
+{
+ uint32_t i, len, bits, count;
+ uint8_t *read_buf;
+ uint8_t *write_buf;
+ double p[N_AUX][N_SYMBOLS] = {0};
+ struct tANS_freq_tbl *freq_tbls;
+ struct tANS_symbol_tbl *symbol_tbls;
+ struct tANS_rl_encode_st *st;
+ const uint16_t log2_tblsz = LOG2_TBLSZ;
+ uint32_t read_sz = 1024;
+ uint32_t magic = TANS_ONLY_MAGIC;
+
+ freq_tbls = malloc(sizeof(struct tANS_freq_tbl) * N_AUX);
+ symbol_tbls = malloc(sizeof(struct tANS_symbol_tbl) * N_AUX);
+ st = malloc(sizeof(struct tANS_rl_encode_st));
+ read_buf = malloc(MAX_BUFSZ);
+ write_buf = calloc(tANS_max_compressed_size(MAX_BUFSZ), 1);
+
+ if (!freq_tbls || !symbol_tbls || !st || !read_buf || !write_buf) goto fail;
+
+ if (fwrite(&magic, sizeof magic, 1, output) != 1) goto fail;
+
+ while (!feof(input)) {
+ for (i = 0; i < N_AUX; ++i) {
+ if (tANS_freq_tbl_init(freq_tbls + i, N_SYMBOLS, p[i], log2_tblsz) != 0) goto fail;
+ if (tANS_symbol_tbl_init(symbol_tbls + i, freq_tbls + i) != 0) goto fail;
+ }
+ tANS_rl_encode_st_init(st, symbol_tbls);
+
+ len = (uint32_t) fread(read_buf, 1, read_sz, input);
+ if (len == 0) break;
+
+ for (i = 0; i < len; ++i) {
+ ++p[0][read_buf[i]];
+ if (read_buf[i] == 0) {
+ while (read_buf[i] == 0) {
+ count = 0;
+ while (read_buf[++i] == 0 && count < 255) ++count;
+ ++p[1][count];
+ }
+ ++p[2][read_buf[i]];
+ }
+ }
+
+ if (fwrite(&len, sizeof len, 1, output) != 1) goto fail;
+ bits = tANS_rl_encode(st, read_buf, len, write_buf);
+ if (fwrite(&bits, sizeof bits, 1, output) != 1) goto fail;
+ if (fwrite(&st->x, sizeof st->x, 1, output) != 1) goto fail;
+ if (fwrite(write_buf, (bits + 7) / 8, 1, output) != 1) goto fail;
+
+ memset(write_buf, 0, (bits + 7) / 8);
+
+ read_sz *= 2;
+ if (read_sz > MAX_BUFSZ) read_sz = MAX_BUFSZ;
+ }
+
+ if (ferror(input)) goto fail;
+
+ free(freq_tbls);
+ free(symbol_tbls);
+ free(st);
+ free(read_buf);
+ free(write_buf);
+ return 0;
+
+fail:
+ free(freq_tbls);
+ free(symbol_tbls);
+ free(st);
+ free(read_buf);
+ free(write_buf);
+ return -1;
+}
+
+static int tANS_decompress_file(FILE* input, FILE *output)
+{
+ uint32_t i, len, bits, count;
+ uint8_t *read_buf;
+ uint8_t *write_buf;
+ double p[N_AUX][N_SYMBOLS] = {0};
+ struct tANS_freq_tbl *freq_tbls;
+ struct tANS_symbol_tbl *symbol_tbls;
+ struct tANS_rl_decode_st *st;
+ const uint16_t log2_tblsz = LOG2_TBLSZ;
+ uint32_t magic;
+
+ freq_tbls = malloc(sizeof(struct tANS_freq_tbl) * N_AUX);
+ symbol_tbls = malloc(sizeof(struct tANS_symbol_tbl) * N_AUX);
+ st = malloc(sizeof(struct tANS_rl_decode_st));
+ read_buf = malloc(tANS_max_compressed_size(MAX_BUFSZ));
+ write_buf = malloc(MAX_BUFSZ);
+
+ if (!freq_tbls || !symbol_tbls || !st || !read_buf || !write_buf) goto fail;
+
+ if (fread(&magic, sizeof magic, 1, input) != 1) goto fail;
+ if (magic != TANS_ONLY_MAGIC) {
+ fprintf(stderr, "tANS: not a valid tANS only file\n");
+ goto fail;
+ }
+
+ while (!feof(input)) {
+ for (i = 0; i < N_AUX; ++i) {
+ if (tANS_freq_tbl_init(freq_tbls + i, N_SYMBOLS, p[i], log2_tblsz) != 0) goto fail;
+ if (tANS_symbol_tbl_init(symbol_tbls + i, freq_tbls + i) != 0) goto fail;
+ }
+ tANS_rl_decode_st_init(st, symbol_tbls);
+
+ if (fread(&len, sizeof len, 1, input) != 1) break;
+ if (fread(&bits, sizeof bits, 1, input) != 1) goto fail;
+ if (fread(&st->x, sizeof st->x, 1, input) != 1) goto fail;
+ if (fread(read_buf + 4, (bits + 7) / 8, 1, input) != 1) goto fail;
+ st->x &= symbol_tbls[0].tblsz - 1;
+ bits = tANS_rl_decode(st, write_buf, len, read_buf + 4, bits);
+ if (bits != 0) {
+ fprintf(stderr, "tANS: corrupted file\n");
+ goto fail;
+ }
+ if (fwrite(write_buf, len, 1, output) != 1) goto fail;
+ for (i = 0; i < len; ++i) {
+ ++p[0][write_buf[i]];
+ if (write_buf[i] == 0) {
+ while (write_buf[i] == 0) {
+ count = 0;
+ while (write_buf[++i] == 0 && count < 255) ++count;
+ ++p[1][count];
+ }
+ ++p[2][write_buf[i]];
+ }
+ }
+ }
+
+ free(freq_tbls);
+ free(symbol_tbls);
+ free(st);
+ free(read_buf);
+ free(write_buf);
+ return 0;
+
+fail:
+ free(freq_tbls);
+ free(symbol_tbls);
+ free(st);
+ free(read_buf);
+ free(write_buf);
+ return -1;
+}
+
+static int stree_compress_file(FILE *input, FILE *output)
{
uint32_t len;
uint8_t *read_buf;
uint8_t *write_buf;
uint8_t *aux_buf;
+ uint32_t read_sz = STREE_READSZ;
read_buf = malloc(read_sz);
write_buf = malloc(read_sz);
return -1;
}
-static int stree_decompress_file(FILE *input, FILE *output, uint32_t read_sz)
+static int stree_decompress_file(FILE *input, FILE *output)
{
uint32_t len;
uint8_t *read_buf;
uint8_t *write_buf;
uint8_t *aux_buf;
+ uint32_t read_sz = STREE_READSZ;
read_buf = malloc(read_sz);
write_buf = malloc(read_sz);
mode_tans_only,
};
+static int compress_file(FILE *input, FILE *output, enum mode mode)
+{
+ switch (mode) {
+ case mode_both:
+ return stree_tANS_compress_file(input, output);
+ case mode_stree_only:
+ return stree_compress_file(input, output);
+ case mode_tans_only:
+ return tANS_compress_file(input, output);
+ default:
+ return -1;
+ }
+}
+
+static int decompress_file(FILE *input, FILE *output, enum mode mode)
+{
+ switch (mode) {
+ case mode_both:
+ return stree_tANS_decompress_file(input, output);
+ case mode_stree_only:
+ return stree_decompress_file(input, output);
+ case mode_tans_only:
+ return tANS_decompress_file(input, output);
+ default:
+ return -1;
+ }
+}
+
int main(int argc, char *argv[])
{
- int ret, opt, to_stdout = 0, compress = 1;
+ int ret, opt, from_stdin, to_stdout = 0, compress = 1;
enum mode mode = mode_both;
- FILE *input = stdin, *output = stdout;
- const char *suffix = ".ans";
+ FILE *input, *output;
+ const char *default_suffix = ".ans";
+ const char *suffix = (const char *) 0;
char outpath[1024];
while ((opt = getopt(argc, argv, "hcdstzS:")) != -1) {
break;
case 's':
mode = mode_stree_only;
+ default_suffix = ".stree";
break;
case 't':
mode = mode_tans_only;
+ default_suffix = ".tans";
break;
case 'z':
compress = 1;
}
}
+ suffix = suffix ? suffix : default_suffix;
+
argv += optind;
argc -= optind;
- if (argc == 0) {
- if (compress) {
- if (mode == mode_stree_only) {
- return stree_compress_file(input, output, MAX_BUFSZ) != 0;
- } else {
- return tANS_compress_file(input, output) != 0;
- }
- } else {
- if (mode == mode_stree_only) {
- return stree_decompress_file(input, output, MAX_BUFSZ) != 0;
- } else {
- return tANS_decompress_file(input, output) != 0;
- }
- }
- } else {
- for (; argc >= 1; --argc, ++argv) {
+ from_stdin = argc == 0;
+ to_stdout |= from_stdin;
+
+ do {
+ if (from_stdin)
+ input = stdin;
+ else
input = fopen(argv[0], "rb");
- if (!input) {
+
+ if (!input) {
+ fprintf(stderr, "tANS: fopen: %s: %s\n", argv[0], strerror(errno));
+ return 2;
+ }
+
+ if (to_stdout) {
+ output = stdout;
+ } else {
+ strncpy(outpath, argv[0], sizeof outpath - 1);
+ strncpy(outpath + strlen(outpath), suffix, sizeof outpath - 1 - strlen(outpath));
+ output = fopen(outpath, "wb");
+ if (!output) {
fprintf(stderr, "tANS: fopen: %s: %s\n", argv[0], strerror(errno));
return 2;
}
+ }
- if (!to_stdout) {
- strncpy(outpath, argv[0], sizeof outpath - 1);
- strncpy(outpath + strlen(outpath), suffix, sizeof outpath - 1 - strlen(outpath));
- output = fopen(outpath, "wb");
- if (!output) {
- fprintf(stderr, "tANS: fopen: %s: %s\n", argv[0], strerror(errno));
- return 2;
- }
- }
-
- if (compress) {
- if (mode == mode_stree_only) {
- ret = stree_compress_file(input, output, MAX_BUFSZ) != 0;
- } else {
- ret = tANS_compress_file(input, output) != 0;
- }
- } else {
- if (mode == mode_stree_only) {
- ret = stree_decompress_file(input, output, MAX_BUFSZ) != 0;
- } else {
- ret = tANS_decompress_file(input, output);
- }
- }
+ if (compress)
+ ret = compress_file(input, output, mode);
+ else
+ ret = decompress_file(input, output, mode);
+ if (!from_stdin)
fclose(input);
- if (!to_stdout) fclose(output);
- if (ret != 0) return 1;
- }
- }
+ if (!to_stdout)
+ fclose(output);
+
+ if (ret != 0) return 1;
+
+ ++argv;
+ } while (--argc > 0);
return 0;
}