From: Geoffrey Allott Date: Sun, 25 Sep 2022 16:53:54 +0000 (+0100) Subject: output aux data with a flag X-Git-Url: https://git.pointlesshacks.com/?a=commitdiff_plain;h=c38e580616434546aa79b765e386d5ba9b47b7d6;p=tANS.git output aux data with a flag --- diff --git a/src/stree.c b/src/stree.c index 77aa7f1..66116aa 100644 --- a/src/stree.c +++ b/src/stree.c @@ -65,6 +65,17 @@ static void node_split_son(struct node *self, struct node *edge, struct node *br self->son = split; } +static size_t node_count_sons(const struct node *self) +{ + size_t count = 0; + const struct node *node; + + for (node = self->son; node; node = node->brother) + ++count; + + return count; +} + static void node_sort_sons(struct node *self) { struct node *node, *brother, *tmp; @@ -156,18 +167,6 @@ static size_t stree_max_size(size_t len) return len * 2 + 1; } -static uint8_t stree_aux(size_t rem) -{ - if (rem <= 0) return 0; - if (rem <= 1) return 1; - if (rem <= 2) return 2; - if (rem <= 3) return 3; - if (rem <= 4) return 4; - if (rem <= 5) return 5; - if (rem <= 6) return 6; - return 7; -} - int stree_encode(size_t len, const uint8_t *in, uint8_t *out, uint8_t *aux) { struct node *nodes; @@ -193,7 +192,7 @@ int stree_encode(size_t len, const uint8_t *in, uint8_t *out, uint8_t *aux) active_node = root; for (i = 0; i < len; ++i) { - aux[i] = stree_aux(rem); + aux[i] = active_len == 0 ? 1 : 0; prev = (struct node *) 0; ++rem; memset(seen, 0, sizeof seen); @@ -285,12 +284,11 @@ int stree_decode(size_t len, const uint8_t *in, uint8_t *out, uint8_t *aux) active_node = root; for (i = 0; i < len; ++i) { - aux[i] = stree_aux(rem); + aux[i] = active_len == 0 ? 1 : 0; prev = (struct node *) 0; ++rem; memset(seen, 0, sizeof seen); code = in[i]; - if (active_len == 0) active_edge = node_edge_decode(active_node, out, &brother, seen, &code); else diff --git a/src/tANS.c b/src/tANS.c index 00a0999..db759f4 100644 --- a/src/tANS.c +++ b/src/tANS.c @@ -25,7 +25,7 @@ ref: https://arxiv.org/abs/1311.2540 static void usage(void) { printf( - "usage: tANS [-hcdstz] [-S .suf] [file...]\n" + "usage: tANS [-hcdstxz] [-S .suf] [file...]\n" "\n" "Compress the given files using suffix trees and tabled Asymmetric Numeral Systems\n" "\n" @@ -33,6 +33,7 @@ static void usage(void) " -c - Send output to stdout\n" " -d - Decompress\n" " -s - Perform suffix tree transformation only\n" + " -x - Output the suffix tree auxilliary data\n" " -t - Perform tANS-rl (tabled Asymmetric Numeral Systems with run-length encoding) compression only\n" " -z - Compress (default)\n" "\n" @@ -447,10 +448,45 @@ fail: return -1; } +static int stree_aux_data(FILE *input, FILE *output) +{ + uint32_t len; + uint8_t *read_buf; + uint8_t *enc_buf; + uint8_t *aux_buf; + uint32_t read_sz = STREE_READSZ; + + read_buf = malloc(read_sz); + enc_buf = malloc(read_sz); + aux_buf = malloc(read_sz); + + if (!read_buf || !enc_buf || !aux_buf) goto fail; + + do { + len = (uint32_t) fread(read_buf, 1, read_sz, input); + if (stree_encode(len, read_buf, enc_buf, aux_buf) != 0) goto fail; + if (len > 0 && fwrite(aux_buf, len, 1, output) != 1) goto fail; + } while (len > 0); + + if (ferror(input)) goto fail; + + free(read_buf); + free(enc_buf); + free(aux_buf); + return 0; + +fail: + free(read_buf); + free(enc_buf); + free(aux_buf); + return -1; +} + enum mode { mode_both, mode_stree_only, mode_tans_only, + mode_aux_data, }; static int compress_file(FILE *input, FILE *output, enum mode mode) @@ -462,6 +498,8 @@ static int compress_file(FILE *input, FILE *output, enum mode mode) return stree_compress_file(input, output); case mode_tans_only: return tANS_compress_file(input, output); + case mode_aux_data: + return stree_aux_data(input, output); default: return -1; } @@ -476,6 +514,9 @@ static int decompress_file(FILE *input, FILE *output, enum mode mode) return stree_decompress_file(input, output); case mode_tans_only: return tANS_decompress_file(input, output); + case mode_aux_data: + fprintf(stderr, "tANS: aux_data cannot be decompressed\n"); + return -1; default: return -1; } @@ -490,7 +531,7 @@ int main(int argc, char *argv[]) const char *suffix = (const char *) 0; char outpath[1024]; - while ((opt = getopt(argc, argv, "hcdstzS:")) != -1) { + while ((opt = getopt(argc, argv, "hcdstxzS:")) != -1) { switch (opt) { case 'h': usage(); @@ -509,6 +550,10 @@ int main(int argc, char *argv[]) mode = mode_tans_only; default_suffix = ".tans"; break; + case 'x': + mode = mode_aux_data; + default_suffix = ".aux"; + break; case 'z': compress = 1; break; diff --git a/test/test_stree.c b/test/test_stree.c index 5ba1575..b55e5fa 100644 --- a/test/test_stree.c +++ b/test/test_stree.c @@ -22,10 +22,6 @@ static enum test_result test_stree_encode_simple(void) ASSERT_EQ('b', out[1]); ASSERT_EQ('c', out[2]); - ASSERT_EQ(0, aux[0]); - ASSERT_EQ(0, aux[1]); - ASSERT_EQ(0, aux[2]); - return TEST_SUCCESS; } @@ -1275,10 +1271,6 @@ static enum test_result test_stree_decode_simple(void) ASSERT_EQ('b', out[1]); ASSERT_EQ('c', out[2]); - ASSERT_EQ(0, aux[0]); - ASSERT_EQ(0, aux[1]); - ASSERT_EQ(0, aux[2]); - return TEST_SUCCESS; }