--- /dev/null
+#include "stree.h"
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+struct node;
+
+struct node {
+ size_t from;
+ size_t to;
+ struct node *brother;
+ struct node *son;
+ struct node *father;
+ struct node *link;
+};
+
+static void node_init(struct node *self, size_t from)
+{
+ self->from = from;
+ self->to = (size_t) -1;
+ self->brother = (struct node *) 0;
+ self->son = (struct node *) 0;
+ self->father = (struct node *) 0;
+ self->link = (struct node *) 0;
+}
+
+static void node_dbg(const struct node *self, size_t indent, const uint8_t *str, size_t len)
+{
+ const struct node *node;
+ size_t i;
+
+ for (i = 0; i < indent; ++i)
+ printf(" ");
+ printf("%d->%d \"", (int) self->from, (int) self->to);
+ for (i = self->from; i < self->to && i < len; ++i)
+ printf("%c", (char) str[i]);
+ printf("\"\n");
+ for (node = self->son; node; node = node->brother)
+ node_dbg(node, indent + 2, str, len);
+}
+
+static void node_add_son(struct node *self, struct node *son)
+{
+ struct node *brother;
+
+ brother = self->son;
+ self->son = son;
+ son->brother = brother;
+ son->father = self;
+}
+
+static void node_split_son(struct node *self, uint8_t edge, size_t len, struct node *split, struct node *son, const uint8_t *str)
+{
+ struct node *node, *prev;
+
+ for (prev = (struct node *) 0, node = self->son; node; prev = node, node = node->brother) {
+ if (str[node->from] == edge) {
+ split->from = node->from;
+ split->to = node->from + len;
+ split->brother = node->brother;
+ split->son = node;
+ split->father = self;
+ split->link = (struct node *) 0;
+
+ node->from = node->from + len;
+ node->father = split;
+ node->brother = son;
+
+ son->father = split;
+
+ if (prev)
+ prev->brother = split;
+ else
+ self->son = split;
+
+ return;
+ }
+ }
+}
+
+static bool node_edge_present(const struct node *self, const uint8_t *str, size_t i)
+{
+ const struct node *node;
+
+ for (node = self->son; node; node = node->brother)
+ if (str[node->from] == str[i])
+ return true;
+ return false;
+}
+
+static bool node_edge_present_2(const struct node *self, uint8_t edge, size_t len, const uint8_t *str, size_t i)
+{
+ const struct node *node;
+
+ for (node = self->son; node; node = node->brother)
+ if (str[node->from] == edge)
+ return str[node->from+len] == str[i];
+ return false;
+}
+
+static bool node_edge_end(const struct node *self, uint8_t edge, size_t len, const uint8_t *str)
+{
+ const struct node *node;
+
+ for (node = self->son; node; node = node->brother)
+ if (str[node->from] == edge)
+ return node->from + len >= node->to;
+
+ return false;
+}
+
+static struct node *node_edge(struct node *self, uint8_t edge, const uint8_t *str)
+{
+ struct node *node;
+
+ for (node = self->son; node; node = node->brother)
+ if (str[node->from] == edge)
+ return node;
+
+ return (struct node *) 0;
+}
+
+static bool node_validate_suffix(struct node *self, size_t len, const uint8_t *str, size_t i)
+{
+ struct node *node;
+ size_t j;
+
+ if (len == 0) return true;
+ if (!node_edge_present(self, str, i)) return false;
+
+ node = node_edge(self, str[i], str);
+ for (j = node->from; j < node->to && len > 0; --len, ++j, ++i) {
+ if (str[j] != str[i]) return false;
+ }
+ return node_validate_suffix(node, len, str, i);
+}
+
+static bool node_validate_suffixes(struct node *self, size_t len, const uint8_t *str, size_t i)
+{
+ if (len == 0) return true;
+ if (!node_validate_suffixes(self, len - 1, str, i + 1)) return false;
+ return node_validate_suffix(self, len, str, i);
+}
+
+static size_t stree_max_size(size_t len)
+{
+ return len * 2 + 1;
+}
+
+int stree_encode(size_t len, const uint8_t *in, uint8_t *out, size_t *aux)
+{
+ struct node *nodes;
+ struct node *root;
+ struct node *prev;
+ struct node *active_node;
+ uint8_t active_edge = '^';
+ size_t active_len = 0;
+ size_t rem = 0;
+ size_t i;
+ size_t n;
+
+ nodes = (struct node *) malloc(sizeof(struct node) * stree_max_size(len));
+ if (!nodes) return -1;
+ root = nodes + 0;
+ node_init(root, (size_t) -1);
+ n = 1;
+ active_node = root;
+
+ for (i = 0; i < len; ++i) {
+ if (!node_validate_suffixes(root, i, in, 0)) {
+ free(nodes);
+ return -1;
+ }
+ aux[i] = (size_t) (active_node - root);
+ prev = (struct node *) 0;
+ ++rem;
+ while (rem > 0) {
+ printf("i = %d, active_node = %d, active_edge = %c, active_len = %d, rem = %d\n", (int) i, (int) (active_node - root), (char) active_edge, (int) active_len, (int) (rem - 1));
+ node_dbg(root, 0, in, i);
+ if (active_len == 0) {
+ if (!node_edge_present(active_node, in, i)) {
+ printf("creating node %d as child of %d\n", (int) n, (int) (active_node - root));
+ printf("remainder is currently %d\n", (int) rem);
+ printf("i is currently %d\n", (int) i);
+ node_init(nodes + n, i);
+ node_add_son(active_node, nodes + n);
+ ++n;
+ --rem;
+ if (active_node != root) {
+ if (active_node->link) {
+ printf("following link from node %d to %d\n", (int) (active_node - root), (int) (active_node->link - root));
+ }
+ active_node = active_node->link ? active_node->link : root;
+ if (active_node == root) {
+ active_len = rem - 1;
+ active_edge = in[i-active_len];
+ while (node_edge_end(active_node, active_edge, active_len, in)) {
+ active_node = node_edge(active_node, active_edge, in);
+ active_edge = in[i-active_len+1];
+ printf("setting active_edge in first loop to %c\n", (char) active_edge);
+ active_len -= active_node->to - active_node->from;
+ if (active_len == 0) active_edge = '^';
+ }
+ }
+ }
+ } else {
+ printf("active_edge is currently %c\n", (char) active_edge);
+ active_edge = in[i];
+ printf("setting active_edge in first branch to %c\n", (char) active_edge);
+ printf("i is currently %d\n", (int) i);
+ ++active_len;
+ if (node_edge_end(active_node, active_edge, active_len, in)) {
+ active_node = node_edge(active_node, active_edge, in);
+ active_edge = '^';
+ active_len = 0;
+ }
+ break;
+ }
+ } else if (active_len > 0) {
+ if (!node_edge_present_2(active_node, active_edge, active_len, in, i)) {
+ printf("creating nodes %d and %d\n", (int) n, (int) n + 1);
+ node_init(nodes + n + 1, i);
+ node_split_son(active_node, active_edge, active_len, nodes + n, nodes + n + 1, in);
+ if (prev) {
+ prev->link = nodes + n;
+ printf("creating link from node %d to node %d\n", (int) (prev - root), (int) n);
+ }
+ prev = nodes + n;
+ n += 2;
+ --rem;
+ printf("active_edge is currently %c\n", (char) active_edge);
+ if (active_node == root) {
+ --active_len;
+ printf("setting active_edge in second branch\n");
+ active_edge = in[i-active_len];
+ if (active_len == 0) active_edge = '^';
+ } else {
+ if (active_node->link) {
+ printf("following link from node %d to %d\n", (int) (active_node - root), (int) (active_node->link - root));
+ }
+ active_node = active_node->link ? active_node->link : root;
+ if (active_node == root) {
+ active_len = rem - 1;
+ active_edge = in[i-active_len];
+ }
+ }
+ printf("active_edge is currently %c\n", (char) active_edge);
+ printf("remainder is currently %d\n", (int) rem);
+ printf("active_node is currently %d\n", (int) (active_node - root));
+ while (node_edge_end(active_node, active_edge, active_len, in)) {
+ active_node = node_edge(active_node, active_edge, in);
+ active_edge = in[i-active_len+1];
+ printf("setting active_edge in second loop to %c\n", (char) active_edge);
+ active_len -= active_node->to - active_node->from;
+ if (active_len == 0) active_edge = '^';
+ }
+ } else {
+ ++active_len;
+ if (node_edge_end(active_node, active_edge, active_len, in)) {
+ active_node = node_edge(active_node, active_edge, in);
+ active_edge = '^';
+ active_len = 0;
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ node_dbg(root, 0, in, len);
+
+ if (!node_validate_suffixes(root, len, in, 0)) {
+ free(nodes);
+ return -1;
+ }
+
+ (void) out; // TODO
+ free(nodes);
+ return 0;
+}
--- /dev/null
+#include "test.h"
+
+#include "stree.h"
+
+enum test_result test_stree_encode_empty(void)
+{
+ const uint8_t *in = (const uint8_t *) "";
+ uint8_t out[1];
+ size_t aux[1];
+ ASSERT_EQ(0, stree_encode(0, in, out, aux));
+ return TEST_SUCCESS;
+}
+
+enum test_result test_stree_encode_simple(void)
+{
+ const uint8_t *in = (const uint8_t *) "abc";
+ uint8_t out[3];
+ size_t aux[3];
+ ASSERT_EQ(0, stree_encode(3, in, out, aux));
+
+ /*
+ ASSERT_EQ(0, out[0]);
+ ASSERT_EQ(1, out[1]);
+ ASSERT_EQ(2, out[2]);
+ */
+
+ ASSERT_EQ(0, aux[0]);
+ ASSERT_EQ(0, aux[1]);
+ ASSERT_EQ(0, aux[2]);
+
+ return TEST_SUCCESS;
+}
+
+enum test_result test_stree_encode_nontrivial(void)
+{
+ const uint8_t *in = (const uint8_t *) "abaaa";
+ uint8_t out[5];
+ size_t aux[5];
+ ASSERT_EQ(0, stree_encode(5, in, out, aux));
+
+ /*
+ ASSERT_EQ(0, out[0]);
+ ASSERT_EQ(1, out[1]);
+ ASSERT_EQ(0, out[2]);
+ ASSERT_EQ(1, out[3]);
+ ASSERT_EQ(0, out[4]);
+ */
+
+ ASSERT_EQ(0, aux[0]);
+ ASSERT_EQ(0, aux[1]);
+ ASSERT_EQ(0, aux[2]);
+ ASSERT_EQ(0, aux[3]);
+ ASSERT_EQ(3, aux[4]);
+
+ return TEST_SUCCESS;
+}
+
+enum test_result test_stree_encode_so_example(void)
+{
+ const uint8_t *in = (const uint8_t *) "abcabxabcd";
+ uint8_t out[10];
+ size_t aux[10];
+ ASSERT_EQ(0, stree_encode(10, in, out, aux));
+
+ /*
+ ASSERT_EQ(0, out[0]);
+ ASSERT_EQ(1, out[1]);
+ ASSERT_EQ(2, out[2]);
+ ASSERT_EQ(0, out[3]);
+ ASSERT_EQ(0, out[4]);
+ ASSERT_EQ(3, out[5]);
+ ASSERT_EQ(0, out[6]);
+ ASSERT_EQ(0, out[7]);
+ ASSERT_EQ(1, out[8]);
+ ASSERT_EQ(3, out[9]);
+ */
+
+ ASSERT_EQ(0, aux[0]);
+ ASSERT_EQ(0, aux[1]);
+ ASSERT_EQ(0, aux[2]);
+ ASSERT_EQ(0, aux[3]);
+ ASSERT_EQ(0, aux[4]);
+ ASSERT_EQ(0, aux[5]);
+ ASSERT_EQ(0, aux[6]);
+ ASSERT_EQ(0, aux[7]);
+ ASSERT_EQ(4, aux[8]);
+ ASSERT_EQ(4, aux[9]);
+
+ return TEST_SUCCESS;
+}
+
+enum test_result test_stree_tricky_suffix_link(void)
+{
+ const uint8_t *in = (const uint8_t *) "cdddcdc";
+ uint8_t out[7];
+ size_t aux[7];
+ ASSERT_EQ(0, stree_encode(7, in, out, aux));
+
+ /*
+ ASSERT_EQ(2, out[0]);
+ ASSERT_EQ(3, out[1]);
+ ASSERT_EQ(1, out[2]);
+ ASSERT_EQ(0, out[3]);
+ ASSERT_EQ(1, out[4]);
+ ASSERT_EQ(0, out[5]);
+ ASSERT_EQ(1, out[6]);
+ */
+
+ ASSERT_EQ(0, aux[0]);
+ ASSERT_EQ(0, aux[1]);
+ ASSERT_EQ(0, aux[2]);
+ ASSERT_EQ(0, aux[3]);
+ ASSERT_EQ(0, aux[4]);
+ ASSERT_EQ(0, aux[5]);
+ ASSERT_EQ(0, aux[6]);
+
+ return TEST_SUCCESS;
+}
+
+enum test_result test_stree_minimal(void)
+{
+ const uint8_t *in = (const uint8_t *) "abcdeabacacabb";
+ uint8_t out[14];
+ size_t aux[14];
+ ASSERT_EQ(0, stree_encode(14, in, out, aux));
+
+ return TEST_SUCCESS;
+}
+
+enum test_result test_stree_minimal_2(void)
+{
+ const uint8_t *in = (const uint8_t *) "abcdeabacacabbabcdcccacc";
+ uint8_t out[24];
+ size_t aux[24];
+ ASSERT_EQ(0, stree_encode(24, in, out, aux));
+
+ return TEST_SUCCESS;
+}
+
+enum test_result test_stree_minimal_3(void)
+{
+ const uint8_t *in = (const uint8_t *) "abcdeabacacabbabcdcccaccccaa";
+ uint8_t out[28];
+ size_t aux[28];
+ ASSERT_EQ(0, stree_encode(28, in, out, aux));
+
+ return TEST_SUCCESS;
+}
+
+enum test_result test_stree_long(void)
+{
+ const uint8_t *in = (const uint8_t *) "abcdeabacacabbabcdcccaccccaabbbaababdadbaccabbdadbadbabaccacbbbcbadddbdababddddddabdabddddddabbbbccc";
+ uint8_t out[100];
+ size_t aux[100];
+ ASSERT_EQ(0, stree_encode(100, in, out, aux));
+
+ return TEST_SUCCESS;
+}
+
+enum test_result test_stree_very_long(void)
+{
+ const uint8_t *in = (const uint8_t *) "abcdeabacacabbabcdcccaccccaabbbaababdadbaccabbdadbadbabaccacbbbcbadddbdababddddddabdabddddddabbbbccc";
+ uint8_t out[100];
+ size_t aux[100];
+ ASSERT_EQ(0, stree_encode(100, in, out, aux));
+
+ return TEST_SUCCESS;
+}
+
+int main(void)
+{
+ RUN_TEST(test_stree_encode_empty);
+ RUN_TEST(test_stree_encode_simple);
+ RUN_TEST(test_stree_encode_nontrivial);
+ RUN_TEST(test_stree_encode_so_example);
+ RUN_TEST(test_stree_tricky_suffix_link);
+ RUN_TEST(test_stree_minimal);
+ RUN_TEST(test_stree_minimal_2);
+ RUN_TEST(test_stree_minimal_3);
+ RUN_TEST(test_stree_long);
+ RUN_TEST(test_stree_very_long);
+}