#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
struct node;
son->brother = brother;
}
-static void node_split_son(struct node *self, uint8_t edge, size_t len, struct node *split, struct node *son, const uint8_t *str)
+static void node_split_son(struct node *self, struct node *edge, struct node *brother, struct node *split, struct node *son, size_t len)
{
- struct node *node, *prev;
-
- for (prev = (struct node *) 0, node = self->son; node; prev = node, node = node->brother) {
- if (str[node->from] == edge) {
- split->from = node->from;
- split->to = node->from + len;
- split->brother = node->brother;
- split->son = node;
- split->link = (struct node *) 0;
-
- node->from = node->from + len;
- node->brother = son;
-
- if (prev)
- prev->brother = split;
- else
- self->son = split;
-
- return;
- }
- }
-}
+ split->from = edge->from;
+ split->to = edge->from + len;
+ split->brother = edge->brother;
+ split->son = edge;
+ split->link = (struct node *) 0;
-static bool node_edge_present(const struct node *self, const uint8_t *str, size_t i)
-{
- const struct node *node;
+ edge->from = edge->from + len;
+ edge->brother = son;
- for (node = self->son; node; node = node->brother)
- if (str[node->from] == str[i])
- return true;
- return false;
+ if (brother)
+ brother->brother = split;
+ else
+ self->son = split;
}
-static bool node_edge_next_present(const struct node *self, size_t len, const uint8_t *str, size_t i)
+static struct node *node_edge_inv(struct node *self, uint8_t edge, const uint8_t *str, struct node **brother, bool seen[], uint8_t *code, bool record_seen)
{
- return str[self->from+len] == str[i];
-}
+ struct node *node;
-static bool node_edge_end(const struct node *self, size_t len)
-{
- return self && self->from + len >= self->to;
+ printf("calling node_edge_inv\n");
+
+ if (record_seen) {
+ for (node = self->son; node; node = node->brother) {
+ if (!seen[str[node->from]]) {
+ seen[str[node->from]] = true;
+ if (*code == 0) {
+ edge = str[node->from];
+ break;
+ }
+ --*code;
+ printf("decrementing code to %d\n", *code);
+ }
+ }
+ if (!node) {
+ return (struct node *) 0;
+ }
+ }
+
+ for (*brother = (struct node *) 0, node = self->son; node; *brother = node, node = node->brother)
+ if (str[node->from] == edge)
+ return node;
+
+ return (struct node *) 0;
}
-static struct node *node_edge(struct node *self, uint8_t edge, const uint8_t *str)
+static struct node *node_edge(struct node *self, uint8_t edge, const uint8_t *str, struct node **brother, bool seen[], uint8_t *code, bool record_seen)
{
struct node *node;
- for (node = self->son; node; node = node->brother)
+ if (record_seen) {
+ for (node = self->son; node; node = node->brother) {
+ if (str[node->from] == edge)
+ break;
+ if (!seen[str[node->from]]) {
+ seen[str[node->from]] = true;
+ ++*code;
+ }
+ }
+ }
+
+ for (*brother = (struct node *) 0, node = self->son; node; *brother = node, node = node->brother)
if (str[node->from] == edge)
return node;
static bool node_validate_suffix(struct node *self, size_t len, const uint8_t *str, size_t i)
{
- struct node *node;
+ struct node *node, *brother;
size_t j;
if (len == 0) return true;
- if (!node_edge_present(self, str, i)) return false;
-
- node = node_edge(self, str[i], str);
+ node = node_edge(self, str[i], str, &brother, (bool *) 0, (uint8_t *) 0, false);
+ if (!node) return false;
for (j = node->from; j < node->to && len > 0; --len, ++j, ++i) {
if (str[j] != str[i]) return false;
}
static bool node_validate_suffixes(struct node *self, size_t len, const uint8_t *str, size_t i)
{
if (len == 0) return true;
- if (!node_validate_suffixes(self, len - 1, str, i + 1)) return false;
- return node_validate_suffix(self, len, str, i);
+ for (; len; ++i, --len)
+ if (!node_validate_suffix(self, len, str, i)) return false;
+ return true;
}
static size_t stree_max_size(size_t len)
struct node *prev;
struct node *active_node;
struct node *active_edge = (struct node *) 0;
+ struct node *brother = (struct node *) 0;
+ struct node *new_leaf;
size_t active_len = 0;
size_t rem = 0;
size_t i;
size_t n;
- bool cont;
+ bool present;
+ bool seen[256];
nodes = (struct node *) malloc(sizeof(struct node) * stree_max_size(len));
if (!nodes) return -1;
root = nodes + 0;
node_init(root, (size_t) -1);
+ root->link = root;
n = 1;
active_node = root;
aux[i] = (size_t) (active_node - root);
prev = (struct node *) 0;
++rem;
- cont = false;
- while (rem > 0 && !cont) {
- if (active_len == 0) {
- if (node_edge_present(active_node, in, i))
- cont = true;
- else {
+ memset(seen, 0, sizeof seen);
+ out[i] = 0;
+ active_edge = node_edge(active_node, in[i-active_len], in, &brother, seen, out + i, active_len == 0);
+ do {
+ present = active_edge && in[active_edge->from+active_len] == in[i];
+ if (present) {
+ ++active_len;
+ } else {
+ if (!active_edge) {
node_init(nodes + n, i);
node_add_son(active_node, nodes + n);
- ++n;
- --rem;
- }
- } else if (active_len > 0) {
- if (node_edge_next_present(active_edge, active_len, in, i))
- cont = true;
- else {
+ new_leaf = nodes + n;
+ ++n, --rem;
+ } else {
+ printf("seen? %d\n", seen[in[active_edge->from+active_len]]);
+ if (!seen[in[active_edge->from+active_len]]) {
+ seen[in[active_edge->from+active_len]] = true;
+ ++out[i];
+ }
node_init(nodes + n + 1, i);
- node_split_son(active_node, in[active_edge->from], active_len, nodes + n, nodes + n + 1, in);
+ node_split_son(active_node, active_edge, brother, nodes + n, nodes + n + 1, active_len);
if (prev) prev->link = nodes + n;
- prev = nodes + n;
- n += 2;
- --rem;
- if (active_node == root) {
- --active_len;
- }
+ prev = nodes + n, n += 2, --rem;
+ if (active_node == root) --active_len;
+ new_leaf = (struct node *) 0;
}
- }
- if (!cont && active_node != root) {
active_node = active_node->link ? active_node->link : (active_len = rem - 1, root);
+ active_edge = node_edge(active_node, in[i-active_len], in, &brother, seen, out + i, active_len == 0 && active_node->son != new_leaf);
}
- active_edge = node_edge(active_node, in[i-active_len], in);
- if (cont) ++active_len;
- while (node_edge_end(active_edge, active_len)) {
+ while (active_edge && active_edge->from + active_len >= active_edge->to) {
active_node = active_edge;
active_len -= active_node->to - active_node->from;
- active_edge = node_edge(active_node, in[i-active_len], in);
+ active_edge = node_edge(active_node, in[i-active_len], in, &brother, seen, out + i, active_len == 0 && !present);
}
- }
+ } while (rem > 0 && !present);
+ if (!present)
+ for (int a = 0; a < in[i]; ++a)
+ if (!seen[a])
+ ++out[i];
}
#ifndef NDEBUG
free(nodes);
return -1;
}
+
+ //node_dbg(root, root, 0, in, len);
+#endif
+
+ free(nodes);
+ return 0;
+}
+
+int stree_decode(size_t len, const uint8_t *in, uint8_t *out, size_t *aux)
+{
+ struct node *nodes;
+ struct node *root;
+ struct node *prev;
+ struct node *active_node;
+ struct node *active_edge = (struct node *) 0;
+ struct node *brother = (struct node *) 0;
+ struct node *new_leaf;
+ size_t active_len = 0;
+ size_t rem = 0;
+ size_t i, n;
+ bool present;
+ bool seen[256];
+ uint8_t code, a;
+
+ nodes = (struct node *) malloc(sizeof(struct node) * stree_max_size(len));
+ if (!nodes) return -1;
+ root = nodes + 0;
+ node_init(root, (size_t) -1);
+ root->link = root;
+ n = 1;
+ active_node = root;
+
+ for (i = 0; i < len; ++i) {
+ aux[i] = (size_t) (active_node - root);
+ prev = (struct node *) 0;
+ ++rem;
+ memset(seen, 0, sizeof seen);
+ code = in[i];
+ printf("i = %d - decoding %d\n", (int) i, code);
+
+ active_edge = node_edge_inv(active_node, out[i-active_len], out, &brother, seen, &code, active_len == 0);
+ do {
+ printf("loop iteration: active_node = %d, active_edge = %d, active_len = %d\n", (int) (active_node - root), (int) (active_edge - root), (int) active_len);
+ present = active_edge && code == 0;
+ if (present) {
+ out[i] = out[active_edge->from+active_len];
+ printf("setting out[%d] = %c\n", (int) i, (char) out[active_edge->from+active_len]);
+ ++active_len;
+ } else {
+ if (!active_edge) {
+ node_init(nodes + n, i);
+ node_add_son(active_node, nodes + n);
+ new_leaf = nodes + n;
+ ++n, --rem;
+ } else {
+ printf("seen? %d\n", seen[out[active_edge->from+active_len]]);
+ if (!seen[out[active_edge->from+active_len]]) {
+ seen[out[active_edge->from+active_len]] = true;
+ --code;
+ }
+ node_init(nodes + n + 1, i);
+ node_split_son(active_node, active_edge, brother, nodes + n, nodes + n + 1, active_len);
+ if (prev) prev->link = nodes + n;
+ prev = nodes + n, n += 2, --rem;
+ if (active_node == root) --active_len;
+ new_leaf = (struct node *) 0;
+ }
+ active_node = active_node->link ? active_node->link : (active_len = rem - 1, root);
+ active_edge = node_edge_inv(active_node, out[i-active_len], out, &brother, seen, &code, active_len == 0 && active_node->son != new_leaf);
+ printf("!setting active_edge = %d, active_len = %d\n", (int) (active_edge - root), (int) active_len);
+ }
+ while (active_edge && active_edge->from + active_len >= active_edge->to) {
+ active_node = active_edge;
+ active_len -= active_node->to - active_node->from;
+ active_edge = node_edge_inv(active_node, out[i-active_len], out, &brother, seen, &code, active_len == 0 && !present);
+ }
+ } while (rem > 0 && !present);
+ if (!present)
+ for (a = 0;; ++a)
+ if (!seen[a] && code-- == 0) {
+ out[i] = a;
+ break;
+ }
+ }
+
+#ifndef NDEBUG
+ if (!node_validate_suffixes(root, len, out, 0)) {
+ free(nodes);
+ return -1;
+ }
+
+ node_dbg(root, root, 0, out, len);
#endif
- (void) out; // TODO
free(nodes);
return 0;
}
#include "stree.h"
-enum test_result test_stree_encode_empty(void)
+static enum test_result test_stree_encode_empty(void)
{
const uint8_t *in = (const uint8_t *) "";
uint8_t out[1];
return TEST_SUCCESS;
}
-enum test_result test_stree_encode_simple(void)
+static enum test_result test_stree_encode_simple(void)
{
const uint8_t *in = (const uint8_t *) "abc";
uint8_t out[3];
size_t aux[3];
ASSERT_EQ(0, stree_encode(3, in, out, aux));
- /*
- ASSERT_EQ(0, out[0]);
- ASSERT_EQ(1, out[1]);
- ASSERT_EQ(2, out[2]);
- */
+ ASSERT_EQ('a', out[0]);
+ ASSERT_EQ('b', out[1]);
+ ASSERT_EQ('c', out[2]);
ASSERT_EQ(0, aux[0]);
ASSERT_EQ(0, aux[1]);
return TEST_SUCCESS;
}
-enum test_result test_stree_encode_nontrivial(void)
+static enum test_result test_stree_encode_nontrivial(void)
{
const uint8_t *in = (const uint8_t *) "abaaa";
uint8_t out[5];
return TEST_SUCCESS;
}
-enum test_result test_stree_encode_so_example(void)
+static enum test_result test_stree_encode_so_example(void)
{
const uint8_t *in = (const uint8_t *) "abcabxabcd";
uint8_t out[10];
return TEST_SUCCESS;
}
-enum test_result test_stree_tricky_suffix_link(void)
+static enum test_result test_stree_tricky_suffix_link(void)
{
const uint8_t *in = (const uint8_t *) "cdddcdc";
uint8_t out[7];
return TEST_SUCCESS;
}
-enum test_result test_stree_minimal(void)
+static enum test_result test_stree_minimal(void)
{
const uint8_t *in = (const uint8_t *) "abcdeabacacabb";
uint8_t out[14];
return TEST_SUCCESS;
}
-enum test_result test_stree_minimal_2(void)
+static enum test_result test_stree_minimal_2(void)
{
const uint8_t *in = (const uint8_t *) "abcdeabacacabbabcdcccacc";
uint8_t out[24];
return TEST_SUCCESS;
}
-enum test_result test_stree_minimal_3(void)
+static enum test_result test_stree_minimal_3(void)
{
const uint8_t *in = (const uint8_t *) "abcdeabacacabbabcdcccaccccaa";
uint8_t out[28];
return TEST_SUCCESS;
}
-enum test_result test_stree_minimal_4(void)
+static enum test_result test_stree_minimal_4(void)
{
const uint8_t *in = (const uint8_t *) "dbbcaccbdbdbde";
uint8_t out[14];
return TEST_SUCCESS;
}
-enum test_result test_stree_long(void)
+static enum test_result test_stree_minimal_5(void)
+{
+ const uint8_t *in = (const uint8_t *) "acaaeabdecd";
+ uint8_t enc[11];
+ uint8_t dec[11];
+ size_t aux1[11];
+ size_t aux2[11];
+ ASSERT_EQ(0, stree_encode(11, in, enc, aux1));
+ ASSERT_EQ(0, stree_decode(11, enc, dec, aux2));
+
+ ASSERT_EQ(in[0], dec[0]);
+ ASSERT_EQ(in[1], dec[1]);
+ ASSERT_EQ(in[2], dec[2]);
+ ASSERT_EQ(in[3], dec[3]);
+ ASSERT_EQ(in[4], dec[4]);
+ ASSERT_EQ(in[5], dec[5]);
+ ASSERT_EQ(in[6], dec[6]);
+ ASSERT_EQ(in[7], dec[7]);
+ ASSERT_EQ(in[8], dec[8]);
+ ASSERT_EQ(in[9], dec[9]);
+ ASSERT_EQ(in[10], dec[10]);
+
+ ASSERT_EQ(aux1[0], aux2[0]);
+ ASSERT_EQ(aux1[1], aux2[1]);
+ ASSERT_EQ(aux1[2], aux2[2]);
+ ASSERT_EQ(aux1[3], aux2[3]);
+ ASSERT_EQ(aux1[4], aux2[4]);
+ ASSERT_EQ(aux1[5], aux2[5]);
+ ASSERT_EQ(aux1[6], aux2[6]);
+ ASSERT_EQ(aux1[7], aux2[7]);
+ ASSERT_EQ(aux1[8], aux2[8]);
+ ASSERT_EQ(aux1[9], aux2[9]);
+ ASSERT_EQ(aux1[10], aux2[10]);
+
+ return TEST_SUCCESS;
+}
+
+static enum test_result test_stree_repeating(void)
+{
+ const uint8_t *in = (const uint8_t *) "abcabcabcabcabcdabcabcabcabcabcdababababab";
+ uint8_t out[42];
+ size_t aux[42];
+ ASSERT_EQ(0, stree_encode(42, in, out, aux));
+
+ return TEST_SUCCESS;
+}
+
+static enum test_result test_stree_long(void)
{
const uint8_t *in = (const uint8_t *) "abcdeabacacabbabcdcccaccccaabbbaababdadbaccabbdadbadbabaccacbbbcbadddbdababddddddabdabddddddabbbbccc";
uint8_t out[100];
return TEST_SUCCESS;
}
-enum test_result test_stree_very_long(void)
+static enum test_result test_stree_very_long(void)
{
const uint8_t *in = (const uint8_t *)
"acaaeabdecdcebcadbebddeaebeceacbbbaeeedbddeedcbdaaeddbadabaedeae"
"ceddaeabcbbbadcbdbeaaedcceaeddbbbebeabaedebccceeebeeabacdabcddcd"
"dcddcbddcdadaadbbadeedabadbaaeeeeacbaeacdecabcbdccecdededdadddec"
;
- uint8_t out[65536];
- size_t aux[65536];
- size_t i;
+ uint8_t enc[65536];
+ uint8_t dec[65536];
+ size_t aux1[65536];
+ size_t aux2[65536];
+ size_t i, j;
for (i = 0; i < 1024; ++i) {
- ASSERT_EQ(0, stree_encode(64, in + i * 64, out, aux));
+ ASSERT_EQ(0, stree_encode(64, in + i * 64, enc, aux1));
+ ASSERT_EQ(0, stree_decode(64, enc, dec, aux2));
+ for (j = 0; j < 64; ++j) {
+ ASSERT_EQ(in[i*64+j], dec[j]);
+ ASSERT_EQ(aux1[j], aux2[j]);
+ }
}
- ASSERT_EQ(0, stree_encode(65536, in, out, aux));
+ ASSERT_EQ(0, stree_encode(65536, in, enc, aux1));
+ ASSERT_EQ(0, stree_decode(65536, enc, dec, aux2));
+
+ return TEST_SUCCESS;
+}
+
+static enum test_result test_stree_decode_empty(void)
+{
+ const uint8_t *in = (const uint8_t *) "";
+ uint8_t out[1];
+ size_t aux[1];
+ ASSERT_EQ(0, stree_decode(0, in, out, aux));
+ return TEST_SUCCESS;
+}
+
+static enum test_result test_stree_decode_simple(void)
+{
+ const uint8_t *in = (const uint8_t *) "abc";
+ uint8_t out[3];
+ size_t aux[3];
+ ASSERT_EQ(0, stree_decode(3, in, out, aux));
+
+ ASSERT_EQ('a', out[0]);
+ ASSERT_EQ('b', out[1]);
+ ASSERT_EQ('c', out[2]);
+
+ ASSERT_EQ(0, aux[0]);
+ ASSERT_EQ(0, aux[1]);
+ ASSERT_EQ(0, aux[2]);
+
+ return TEST_SUCCESS;
+}
+
+static enum test_result test_stree_decode_nontrivial(void)
+{
+ const uint8_t *in = (const uint8_t *) "ab\1\1\1";
+ uint8_t out[5];
+ size_t aux[5];
+ ASSERT_EQ(0, stree_decode(5, in, out, aux));
+
+ ASSERT_EQ('a', out[0]);
+ ASSERT_EQ('b', out[1]);
+ ASSERT_EQ('a', out[2]);
+ ASSERT_EQ('a', out[3]);
+ ASSERT_EQ('a', out[4]);
+
+ ASSERT_EQ(0, aux[0]);
+ ASSERT_EQ(0, aux[1]);
+ ASSERT_EQ(0, aux[2]);
+ ASSERT_EQ(0, aux[3]);
+ ASSERT_EQ(3, aux[4]);
+
+ return TEST_SUCCESS;
+}
+
+static enum test_result test_stree_roundtrip_so_example(void)
+{
+ int i;
+ const uint8_t *in = (const uint8_t *) "abcabxabcd";
+ uint8_t enc[10];
+ uint8_t dec[10];
+ size_t aux1[10];
+ size_t aux2[10];
+ ASSERT_EQ(0, stree_encode(10, in, enc, aux1));
+ ASSERT_EQ(0, stree_decode(10, enc, dec, aux2));
+
+ for (i = 0; i < 10; ++i)
+ printf("%d ", enc[i]);
+ printf("\n");
+
+ ASSERT_EQ(in[0], dec[0]);
+ ASSERT_EQ(in[1], dec[1]);
+ ASSERT_EQ(in[2], dec[2]);
+ ASSERT_EQ(in[3], dec[3]);
+ ASSERT_EQ(in[4], dec[4]);
+ ASSERT_EQ(in[5], dec[5]);
+ ASSERT_EQ(in[6], dec[6]);
+ ASSERT_EQ(in[7], dec[7]);
+ ASSERT_EQ(in[8], dec[8]);
+ ASSERT_EQ(in[9], dec[9]);
+
+ ASSERT_EQ(aux1[0], aux2[0]);
+ ASSERT_EQ(aux1[1], aux2[1]);
+ ASSERT_EQ(aux1[2], aux2[2]);
+ ASSERT_EQ(aux1[3], aux2[3]);
+ ASSERT_EQ(aux1[4], aux2[4]);
+ ASSERT_EQ(aux1[5], aux2[5]);
+ ASSERT_EQ(aux1[6], aux2[6]);
+ ASSERT_EQ(aux1[7], aux2[7]);
+ ASSERT_EQ(aux1[8], aux2[8]);
+ ASSERT_EQ(aux1[9], aux2[9]);
return TEST_SUCCESS;
}
RUN_TEST(test_stree_minimal_2);
RUN_TEST(test_stree_minimal_3);
RUN_TEST(test_stree_minimal_4);
+ RUN_TEST(test_stree_minimal_5);
+ RUN_TEST(test_stree_repeating);
RUN_TEST(test_stree_long);
RUN_TEST(test_stree_very_long);
+ RUN_TEST(test_stree_decode_empty);
+ RUN_TEST(test_stree_decode_simple);
+ RUN_TEST(test_stree_decode_nontrivial);
+ RUN_TEST(test_stree_roundtrip_so_example);
}