From: Geoffrey Allott Date: Tue, 23 Aug 2022 19:06:19 +0000 (+0100) Subject: fix corner case with duplicated edges X-Git-Url: https://git.pointlesshacks.com/?a=commitdiff_plain;h=73a8957da39d3ccd22881119912ae76542e645fd;p=tANS.git fix corner case with duplicated edges --- diff --git a/src/stree.c b/src/stree.c index 7e5a0f8..6995689 100644 --- a/src/stree.c +++ b/src/stree.c @@ -24,7 +24,7 @@ static void node_init(struct node *self, size_t from) self->link = (struct node *) 0; } -static void node_dbg(const struct node *self, const struct node *root, size_t indent, const uint8_t *str, size_t len) +void node_dbg(const struct node *self, const struct node *root, size_t indent, const uint8_t *str, size_t len) { const struct node *node; size_t i; @@ -69,8 +69,6 @@ static struct node *node_edge_inv(struct node *self, uint8_t edge, const uint8_t { struct node *node; - printf("calling node_edge_inv\n"); - if (record_seen) { for (node = self->son; node; node = node->brother) { if (!seen[str[node->from]]) { @@ -80,7 +78,6 @@ static struct node *node_edge_inv(struct node *self, uint8_t edge, const uint8_t break; } --*code; - printf("decrementing code to %d\n", *code); } } if (!node) { @@ -159,6 +156,7 @@ int stree_encode(size_t len, const uint8_t *in, uint8_t *out, size_t *aux) size_t n; bool present; bool seen[256]; + uint8_t code; nodes = (struct node *) malloc(sizeof(struct node) * stree_max_size(len)); if (!nodes) return -1; @@ -173,8 +171,8 @@ int stree_encode(size_t len, const uint8_t *in, uint8_t *out, size_t *aux) prev = (struct node *) 0; ++rem; memset(seen, 0, sizeof seen); - out[i] = 0; - active_edge = node_edge(active_node, in[i-active_len], in, &brother, seen, out + i, active_len == 0); + code = 0; + active_edge = node_edge(active_node, in[i-active_len], in, &brother, seen, &code, active_len == 0); do { present = active_edge && in[active_edge->from+active_len] == in[i]; if (present) { @@ -186,10 +184,9 @@ int stree_encode(size_t len, const uint8_t *in, uint8_t *out, size_t *aux) new_leaf = nodes + n; ++n, --rem; } else { - printf("seen? %d\n", seen[in[active_edge->from+active_len]]); if (!seen[in[active_edge->from+active_len]]) { seen[in[active_edge->from+active_len]] = true; - ++out[i]; + ++code; } node_init(nodes + n + 1, i); node_split_son(active_node, active_edge, brother, nodes + n, nodes + n + 1, active_len); @@ -199,18 +196,25 @@ int stree_encode(size_t len, const uint8_t *in, uint8_t *out, size_t *aux) new_leaf = (struct node *) 0; } active_node = active_node->link ? active_node->link : (active_len = rem - 1, root); - active_edge = node_edge(active_node, in[i-active_len], in, &brother, seen, out + i, active_len == 0 && active_node->son != new_leaf); + if (active_len == 0 && active_node->son == new_leaf) + brother = active_edge = (struct node *) 0; + else + active_edge = node_edge(active_node, in[i-active_len], in, &brother, seen, &code, active_len == 0 && active_node->son != new_leaf); } while (active_edge && active_edge->from + active_len >= active_edge->to) { active_node = active_edge; active_len -= active_node->to - active_node->from; - active_edge = node_edge(active_node, in[i-active_len], in, &brother, seen, out + i, active_len == 0 && !present); + if (active_len == 0 && present) + brother = active_edge = (struct node *) 0; + else + active_edge = node_edge(active_node, in[i-active_len], in, &brother, seen, &code, active_len == 0 && !present); } } while (rem > 0 && !present); if (!present) for (int a = 0; a < in[i]; ++a) if (!seen[a]) - ++out[i]; + ++code; + out[i] = code; } #ifndef NDEBUG @@ -256,15 +260,12 @@ int stree_decode(size_t len, const uint8_t *in, uint8_t *out, size_t *aux) ++rem; memset(seen, 0, sizeof seen); code = in[i]; - printf("i = %d - decoding %d\n", (int) i, code); active_edge = node_edge_inv(active_node, out[i-active_len], out, &brother, seen, &code, active_len == 0); do { - printf("loop iteration: active_node = %d, active_edge = %d, active_len = %d\n", (int) (active_node - root), (int) (active_edge - root), (int) active_len); - present = active_edge && code == 0; + present = active_edge && code == 0 && (active_len == 0 || !seen[out[active_edge->from+active_len]]); if (present) { out[i] = out[active_edge->from+active_len]; - printf("setting out[%d] = %c\n", (int) i, (char) out[active_edge->from+active_len]); ++active_len; } else { if (!active_edge) { @@ -273,7 +274,6 @@ int stree_decode(size_t len, const uint8_t *in, uint8_t *out, size_t *aux) new_leaf = nodes + n; ++n, --rem; } else { - printf("seen? %d\n", seen[out[active_edge->from+active_len]]); if (!seen[out[active_edge->from+active_len]]) { seen[out[active_edge->from+active_len]] = true; --code; @@ -286,13 +286,18 @@ int stree_decode(size_t len, const uint8_t *in, uint8_t *out, size_t *aux) new_leaf = (struct node *) 0; } active_node = active_node->link ? active_node->link : (active_len = rem - 1, root); - active_edge = node_edge_inv(active_node, out[i-active_len], out, &brother, seen, &code, active_len == 0 && active_node->son != new_leaf); - printf("!setting active_edge = %d, active_len = %d\n", (int) (active_edge - root), (int) active_len); + if (active_len == 0 && active_node->son == new_leaf) + brother = active_edge = (struct node *) 0; + else + active_edge = node_edge_inv(active_node, out[i-active_len], out, &brother, seen, &code, active_len == 0); } while (active_edge && active_edge->from + active_len >= active_edge->to) { active_node = active_edge; active_len -= active_node->to - active_node->from; - active_edge = node_edge_inv(active_node, out[i-active_len], out, &brother, seen, &code, active_len == 0 && !present); + if (active_len == 0 && present) + brother = active_edge = (struct node *) 0; + else + active_edge = node_edge_inv(active_node, out[i-active_len], out, &brother, seen, &code, active_len == 0); } } while (rem > 0 && !present); if (!present) @@ -309,7 +314,7 @@ int stree_decode(size_t len, const uint8_t *in, uint8_t *out, size_t *aux) return -1; } - node_dbg(root, root, 0, out, len); + //node_dbg(root, root, 0, out, len); #endif free(nodes); diff --git a/test/test_stree.c b/test/test_stree.c index 893b3c0..e47347e 100644 --- a/test/test_stree.c +++ b/test/test_stree.c @@ -192,6 +192,26 @@ static enum test_result test_stree_minimal_5(void) return TEST_SUCCESS; } +static enum test_result test_stree_minimal_6(void) +{ + const uint8_t *in = (const uint8_t *) "ccdebdaabddaeeadeeccacbecdddaaddcccdebbddaeccbdaeebbdcaaeaadadda"; + uint8_t enc[64]; + uint8_t dec[64]; + size_t aux1[64]; + size_t aux2[64]; + size_t i; + + ASSERT_EQ(0, stree_encode(44, in, enc, aux1)); + ASSERT_EQ(0, stree_decode(44, enc, dec, aux2)); + + for (i = 0; i < 44; ++i) { + ASSERT_EQ(in[i], dec[i]); + ASSERT_EQ(aux1[i], aux2[i]); + } + + return TEST_SUCCESS; +} + static enum test_result test_stree_repeating(void) { const uint8_t *in = (const uint8_t *) "abcabcabcabcabcdabcabcabcabcabcdababababab"; @@ -1258,6 +1278,11 @@ static enum test_result test_stree_very_long(void) ASSERT_EQ(0, stree_encode(65536, in, enc, aux1)); ASSERT_EQ(0, stree_decode(65536, enc, dec, aux2)); + for (i = 0; i < 65536; ++i) { + ASSERT_EQ(in[i], dec[i]); + ASSERT_EQ(aux1[i], aux2[i]); + } + return TEST_SUCCESS; } @@ -1312,7 +1337,6 @@ static enum test_result test_stree_decode_nontrivial(void) static enum test_result test_stree_roundtrip_so_example(void) { - int i; const uint8_t *in = (const uint8_t *) "abcabxabcd"; uint8_t enc[10]; uint8_t dec[10]; @@ -1321,10 +1345,6 @@ static enum test_result test_stree_roundtrip_so_example(void) ASSERT_EQ(0, stree_encode(10, in, enc, aux1)); ASSERT_EQ(0, stree_decode(10, enc, dec, aux2)); - for (i = 0; i < 10; ++i) - printf("%d ", enc[i]); - printf("\n"); - ASSERT_EQ(in[0], dec[0]); ASSERT_EQ(in[1], dec[1]); ASSERT_EQ(in[2], dec[2]); @@ -1362,6 +1382,7 @@ int main(void) RUN_TEST(test_stree_minimal_3); RUN_TEST(test_stree_minimal_4); RUN_TEST(test_stree_minimal_5); + RUN_TEST(test_stree_minimal_6); RUN_TEST(test_stree_repeating); RUN_TEST(test_stree_long); RUN_TEST(test_stree_very_long);