From: Geoffrey Allott Date: Wed, 24 Aug 2022 19:08:35 +0000 (+0100) Subject: calculate optimal entropy with/without aux X-Git-Url: https://git.pointlesshacks.com/?a=commitdiff_plain;h=64453fff92efb9840afcd1b205a80a305bcde102;p=tANS.git calculate optimal entropy with/without aux --- diff --git a/src/tANS.c b/src/tANS.c index 003a866..89916cf 100644 --- a/src/tANS.c +++ b/src/tANS.c @@ -15,6 +15,10 @@ ref: https://arxiv.org/abs/1311.2540 #include #include + +//FIXME +#include + #define MAX_BUFSZ 1048576 static void usage(void) @@ -40,12 +44,17 @@ static int tANS_compress_file(FILE* input, FILE *output) size_t *aux_buf; uint8_t *write_buf; double p[256] = {0}; + double p_aux[256][256] = {0}; struct tANS_freq_tbl freq_tbl; struct tANS_symbol_tbl symbol_tbl; struct tANS_encode_st st; const uint16_t log2_tblsz = 10; uint32_t read_sz = 8; + double total_len = 0; + double compressed_bits = 0; + double compressed_len = 0; + read_buf = malloc(MAX_BUFSZ); enc_buf = malloc(MAX_BUFSZ); aux_buf = malloc(MAX_BUFSZ * sizeof(size_t)); @@ -61,6 +70,7 @@ static int tANS_compress_file(FILE* input, FILE *output) len = (uint32_t) fread(read_buf, 1, read_sz, input); if (stree_encode(len, read_buf, enc_buf, aux_buf) != 0) goto fail; for (i = 0; i < len; ++i) ++p[enc_buf[i]]; + for (i = 0; i < len; ++i) ++p_aux[aux_buf[i] > 255 ? 255 : aux_buf[i]][enc_buf[i]]; if (fwrite(&len, sizeof len, 1, output) != 1) goto fail; bits = tANS_encode(&st, enc_buf, len, write_buf); if (fwrite(&bits, sizeof bits, 1, output) != 1) goto fail; @@ -71,7 +81,41 @@ static int tANS_compress_file(FILE* input, FILE *output) read_sz *= 2; if (read_sz > MAX_BUFSZ) read_sz = MAX_BUFSZ; + + total_len += len; + compressed_bits += bits; + compressed_len += sizeof len; + compressed_len += sizeof bits; + compressed_len += sizeof st.x; + compressed_len += (bits + 7) / 8; + } + + double true_bits_req = 0.0; + + for (i = 0; i < 256; ++i) + fprintf(stderr, "p[%u] = %f\n", i, p[i]); + for (size_t j = 0; j < 8; ++j) { + double H = 0.0; + double p_total = 0.0; + for (i = 0; i < 256; ++i) + p_total += p_aux[j][i]; + for (i = 0; i < 256; ++i) + if (p_aux[j][i] > 0) + H -= log2(p_aux[j][i]/p_total) * (p_aux[j][i]/p_total); + fprintf(stderr, "Η = %12.08f\n", H); + true_bits_req += H * p_total; } + double H = 0.0; + double p_total = 0.0; + for (i = 0; i < 256; ++i) + p_total += p[i]; + fprintf(stderr, "\n"); + H = true_bits_req / p_total; + fprintf(stderr, "Η = %12.08f\n", H); + fprintf(stderr, "ΔΗ_tANS = %12.08f\n", compressed_bits / total_len - H); + fprintf(stderr, "ΔΗ_total = %12.08f\n", compressed_len * 8 / total_len - H); + fprintf(stderr, "compression_ratio = %12.08f\n", compressed_len / total_len); + fprintf(stderr, "Η-optimal ratio = %12.08f\n", H / 8); free(read_buf); free(enc_buf);