From 22b9eddb8617d639a9a9e70e8a8951ae6d24b77d Mon Sep 17 00:00:00 2001 From: Geoffrey Allott Date: Fri, 6 Jan 2023 19:56:47 +0000 Subject: [PATCH] change logic to push nodes to front as they are selected --- src/stree.c | 143 +++++++++++++++++++++++----------------------- test/test_stree.c | 2 +- 2 files changed, 71 insertions(+), 74 deletions(-) diff --git a/src/stree.c b/src/stree.c index 66116aa..05b262c 100644 --- a/src/stree.c +++ b/src/stree.c @@ -48,8 +48,18 @@ static void node_add_son(struct node *self, struct node *son) son->brother = brother; } -static void node_split_son(struct node *self, struct node *edge, struct node *brother, struct node *split, struct node *son, size_t len) +static void node_split_son(struct node *self, struct node *edge, struct node *split, struct node *son, size_t len) { + struct node *brother; + + if (edge != self->son) { + brother = self->son; + while (brother->brother != edge) + brother = brother->brother; + brother->brother = split; + } else + self->son = split; + split->from = edge->from; split->to = edge->from + len; split->brother = edge->brother; @@ -58,53 +68,29 @@ static void node_split_son(struct node *self, struct node *edge, struct node *br edge->from = edge->from + len; edge->brother = son; - - if (brother) - brother->brother = split; - else - self->son = split; } -static size_t node_count_sons(const struct node *self) +static void node_push_to_front(struct node *self, struct node *brother, struct node *node) { - size_t count = 0; - const struct node *node; - - for (node = self->son; node; node = node->brother) - ++count; - - return count; -} - -static void node_sort_sons(struct node *self) -{ - struct node *node, *brother, *tmp; - - for (brother = (struct node *) 0, node = self->son; node && node->brother; brother = node, node = node->brother) { - if (node->to > node->brother->to) { - tmp = node->brother; - node->brother = node->brother->brother; - tmp->brother = node; - if (brother) - brother->brother = tmp; - else - self->son = tmp; - brother = (struct node *) 0; - node = self->son; - } - } + if (brother) + brother->brother = node->brother; + else + self->son = node->brother; + node->brother = self->son; + self->son = node; } -static struct node *node_edge_decode(struct node *self, const uint8_t *str, struct node **brother, bool seen[], uint8_t *code) +static struct node *node_edge_decode(struct node *self, const uint8_t *str, bool seen[], uint8_t *code) { - struct node *node; - - node_sort_sons(self); + struct node *node, *brother; - for (*brother = (struct node *) 0, node = self->son; node; *brother = node, node = node->brother) { + for (brother = (struct node *) 0, node = self->son; node; brother = node, node = node->brother) { if (!seen[str[node->from]]) { seen[str[node->from]] = true; - if (*code == 0) return node; + if (*code == 0) { + node_push_to_front(self, brother, node); + return node; + } --*code; } } @@ -112,14 +98,15 @@ static struct node *node_edge_decode(struct node *self, const uint8_t *str, stru return (struct node *) 0; } -static struct node *node_edge_encode(struct node *self, const uint8_t *str, size_t i, struct node **brother, bool seen[], uint8_t *code) +static struct node *node_edge_encode(struct node *self, const uint8_t *str, size_t i, bool seen[], uint8_t *code) { - struct node *node; - - node_sort_sons(self); + struct node *node, *brother; - for (*brother = (struct node *) 0, node = self->son; node; *brother = node, node = node->brother) { - if (str[node->from] == str[i]) return node; + for (brother = (struct node *) 0, node = self->son; node; brother = node, node = node->brother) { + if (str[node->from] == str[i]) { + node_push_to_front(self, brother, node); + return node; + } if (!seen[str[node->from]]) { seen[str[node->from]] = true; ++*code; @@ -129,24 +116,27 @@ static struct node *node_edge_encode(struct node *self, const uint8_t *str, size return (struct node *) 0; } -static struct node *node_edge(struct node *self, const uint8_t *str, size_t i, struct node **brother) +static struct node *node_edge(struct node *self, const uint8_t *str, size_t i) { - struct node *node; + struct node *node, *brother; - for (*brother = (struct node *) 0, node = self->son; node; *brother = node, node = node->brother) - if (str[node->from] == str[i]) + for (brother = (struct node *) 0, node = self->son; node; brother = node, node = node->brother) + if (str[node->from] == str[i]) { + node_push_to_front(self, brother, node); return node; + } return (struct node *) 0; } +#ifndef NDEBUG static bool node_validate_suffix(struct node *self, size_t len, const uint8_t *str, size_t i) { - struct node *node, *brother; + struct node *node; size_t j; if (len == 0) return true; - node = node_edge(self, str, i, &brother); + node = node_edge(self, str, i); if (!node) return false; for (j = node->from; j < node->to && len > 0; --len, ++j, ++i) { if (str[j] != str[i]) return false; @@ -161,6 +151,7 @@ static bool node_validate_suffixes(struct node *self, size_t len, const uint8_t if (!node_validate_suffix(self, len, str, i)) return false; return true; } +#endif static size_t stree_max_size(size_t len) { @@ -173,8 +164,8 @@ int stree_encode(size_t len, const uint8_t *in, uint8_t *out, uint8_t *aux) struct node *root; struct node *prev; struct node *active_node; + struct node *updated_node; struct node *active_edge = (struct node *) 0; - struct node *brother = (struct node *) 0; size_t active_len = 0; size_t rem = 0; size_t i; @@ -198,9 +189,9 @@ int stree_encode(size_t len, const uint8_t *in, uint8_t *out, uint8_t *aux) memset(seen, 0, sizeof seen); code = 0; if (active_len == 0) - active_edge = node_edge_encode(active_node, in, i, &brother, seen, &code); + active_edge = node_edge_encode(active_node, in, i, seen, &code); else - active_edge = node_edge(active_node, in, i - active_len, &brother); + active_edge = node_edge(active_node, in, i - active_len); do { present = active_edge && in[active_edge->from+active_len] == in[i]; if (present) { @@ -209,35 +200,38 @@ int stree_encode(size_t len, const uint8_t *in, uint8_t *out, uint8_t *aux) if (!active_edge) { node_init(nodes + n, i); node_add_son(active_node, nodes + n); + updated_node = active_node; ++n, --rem; } else { if (!seen[in[active_edge->from+active_len]]) { seen[in[active_edge->from+active_len]] = true; ++code; } + node_init(nodes + n, active_edge->from); node_init(nodes + n + 1, i); - node_split_son(active_node, active_edge, brother, nodes + n, nodes + n + 1, active_len); + node_split_son(active_node, active_edge, nodes + n, nodes + n + 1, active_len); + updated_node = nodes + n; if (prev) prev->link = nodes + n; prev = nodes + n, n += 2, --rem; if (active_node == root) --active_len; } active_node = active_node->link ? active_node->link : (active_len = rem - 1, root); - if (active_len == 0 && active_node->son == nodes + n - 1) - brother = active_edge = (struct node *) 0; + if (active_len == 0 && active_node == updated_node) + active_edge = (struct node *) 0; else if (active_len == 0) - active_edge = node_edge_encode(active_node, in, i, &brother, seen, &code); + active_edge = node_edge_encode(active_node, in, i, seen, &code); else - active_edge = node_edge(active_node, in, i - active_len, &brother); + active_edge = node_edge(active_node, in, i - active_len); } while (active_edge && active_edge->from + active_len >= active_edge->to) { active_node = active_edge; active_len -= active_node->to - active_node->from; if (active_len == 0 && present) - brother = active_edge = (struct node *) 0; + active_edge = (struct node *) 0; else if (active_len == 0) - active_edge = node_edge_encode(active_node, in, i, &brother, seen, &code); + active_edge = node_edge_encode(active_node, in, i, seen, &code); else - active_edge = node_edge(active_node, in, i - active_len, &brother); + active_edge = node_edge(active_node, in, i - active_len); } } while (rem > 0 && !present); if (!present) @@ -266,8 +260,8 @@ int stree_decode(size_t len, const uint8_t *in, uint8_t *out, uint8_t *aux) struct node *root; struct node *prev; struct node *active_node; + struct node *updated_node; struct node *active_edge = (struct node *) 0; - struct node *brother = (struct node *) 0; size_t active_len = 0; size_t rem = 0; size_t i, n; @@ -290,9 +284,9 @@ int stree_decode(size_t len, const uint8_t *in, uint8_t *out, uint8_t *aux) memset(seen, 0, sizeof seen); code = in[i]; if (active_len == 0) - active_edge = node_edge_decode(active_node, out, &brother, seen, &code); + active_edge = node_edge_decode(active_node, out, seen, &code); else - active_edge = node_edge(active_node, out, i - active_len, &brother); + active_edge = node_edge(active_node, out, i - active_len); do { present = active_edge && code == 0 && (active_len == 0 || !seen[out[active_edge->from+active_len]]); if (present) { @@ -302,35 +296,38 @@ int stree_decode(size_t len, const uint8_t *in, uint8_t *out, uint8_t *aux) if (!active_edge) { node_init(nodes + n, i); node_add_son(active_node, nodes + n); + updated_node = active_node; ++n, --rem; } else { if (!seen[out[active_edge->from+active_len]]) { seen[out[active_edge->from+active_len]] = true; --code; } + node_init(nodes + n, active_edge->from); node_init(nodes + n + 1, i); - node_split_son(active_node, active_edge, brother, nodes + n, nodes + n + 1, active_len); + node_split_son(active_node, active_edge, nodes + n, nodes + n + 1, active_len); + updated_node = nodes + n; if (prev) prev->link = nodes + n; prev = nodes + n, n += 2, --rem; if (active_node == root) --active_len; } active_node = active_node->link ? active_node->link : (active_len = rem - 1, root); - if (active_len == 0 && active_node->son == nodes + n - 1) - brother = active_edge = (struct node *) 0; + if (active_len == 0 && active_node == updated_node) + active_edge = (struct node *) 0; else if (active_len == 0) - active_edge = node_edge_decode(active_node, out, &brother, seen, &code); + active_edge = node_edge_decode(active_node, out, seen, &code); else - active_edge = node_edge(active_node, out, i - active_len, &brother); + active_edge = node_edge(active_node, out, i - active_len); } while (active_edge && active_edge->from + active_len >= active_edge->to) { active_node = active_edge; active_len -= active_node->to - active_node->from; if (active_len == 0 && present) - brother = active_edge = (struct node *) 0; + active_edge = (struct node *) 0; else if (active_len == 0) - active_edge = node_edge_decode(active_node, out, &brother, seen, &code); + active_edge = node_edge_decode(active_node, out, seen, &code); else - active_edge = node_edge(active_node, out, i - active_len, &brother); + active_edge = node_edge(active_node, out, i - active_len); } } while (rem > 0 && !present); if (!present) diff --git a/test/test_stree.c b/test/test_stree.c index b55e5fa..db40721 100644 --- a/test/test_stree.c +++ b/test/test_stree.c @@ -54,7 +54,7 @@ static enum test_result test_stree_encode_so_example(void) ASSERT_EQ(2, out[3]); ASSERT_EQ(0, out[4]); ASSERT_EQ('x', out[5]); - ASSERT_EQ(1, out[6]); + ASSERT_EQ(2, out[6]); ASSERT_EQ(0, out[7]); ASSERT_EQ(0, out[8]); ASSERT_EQ('e', out[9]); -- 2.34.1