// Copyright 2016 The Draco Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #ifndef DRACO_CORE_ANS_H_ #define DRACO_CORE_ANS_H_ // An implementation of Asymmetric Numeral Systems (rANS). // See http://arxiv.org/abs/1311.2540v2 for more informaiton on rANS. // This file is based off libvpx's ans.h. #include #include #define ANS_DIVIDE_BY_MULTIPLY 1 #if ANS_DIVIDE_BY_MULTIPLY #include "core/divide.h" #endif namespace draco { #if ANS_DIVIDE_BY_MULTIPLY #define ANS_DIVREM(quotient, remainder, dividend, divisor) \ do { \ quotient = fastdiv(dividend, divisor); \ remainder = dividend - quotient * divisor; \ } while (0) #define ANS_DIV(dividend, divisor) fastdiv(dividend, divisor) #else #define ANS_DIVREM(quotient, remainder, dividend, divisor) \ do { \ quotient = dividend / divisor; \ remainder = dividend % divisor; \ } while (0) #define ANS_DIV(dividend, divisor) ((dividend) / (divisor)) #endif struct AnsCoder { uint8_t *buf; int buf_offset; uint32_t state; }; struct AnsDecoder { const uint8_t *buf; int buf_offset; uint32_t state; }; typedef uint8_t AnsP8; #define ans_p8_precision 256u #define ans_p8_shift 8 #define ans_p10_precision 1024u #define l_base (ans_p10_precision * 4) // l_base % precision must be 0 #define io_base 256 // Range I = { l_base, l_base + 1, ..., l_base * io_base - 1 } static uint32_t mem_get_le16(const void *vmem) { uint32_t val; const uint8_t *mem = (const uint8_t *)vmem; val = mem[1] << 8; val |= mem[0]; return val; } static uint32_t mem_get_le24(const void *vmem) { uint32_t val; const uint8_t *mem = (const uint8_t *)vmem; val = mem[2] << 16; val |= mem[1] << 8; val |= mem[0]; return val; } static uint32_t mem_get_le32(const void *vmem) { uint32_t val; const uint8_t *mem = (const uint8_t *)vmem; val = mem[3] << 24; val |= mem[2] << 16; val |= mem[1] << 8; val |= mem[0]; return val; } static inline void mem_put_le16(void *vmem, uint32_t val) { uint8_t *mem = (uint8_t *)vmem; mem[0] = (val >> 0) & 0xff; mem[1] = (val >> 8) & 0xff; } static inline void mem_put_le24(void *vmem, uint32_t val) { uint8_t *mem = (uint8_t *)vmem; mem[0] = (val >> 0) & 0xff; mem[1] = (val >> 8) & 0xff; mem[2] = (val >> 16) & 0xff; } static inline void mem_put_le32(void *vmem, uint32_t val) { uint8_t *mem = (uint8_t *)vmem; mem[0] = (val >> 0) & 0xff; mem[1] = (val >> 8) & 0xff; mem[2] = (val >> 16) & 0xff; mem[3] = (val >> 24) & 0xff; } static inline void ans_write_init(struct AnsCoder *const ans, uint8_t *const buf) { ans->buf = buf; ans->buf_offset = 0; ans->state = l_base; } static inline int ans_write_end(struct AnsCoder *const ans) { uint32_t state; assert(ans->state >= l_base); assert(ans->state < l_base * io_base); state = ans->state - l_base; if (state < (1 << 6)) { ans->buf[ans->buf_offset] = (0x00 << 6) + state; return ans->buf_offset + 1; } else if (state < (1 << 14)) { mem_put_le16(ans->buf + ans->buf_offset, (0x01 << 14) + state); return ans->buf_offset + 2; } else if (state < (1 << 22)) { mem_put_le24(ans->buf + ans->buf_offset, (0x02 << 22) + state); return ans->buf_offset + 3; } else { assert(0 && "State is too large to be serialized"); return ans->buf_offset; } } // rABS with descending spread // p or p0 takes the place of l_s from the paper // ans_p8_precision is m static inline void rabs_desc_write(struct AnsCoder *ans, int val, AnsP8 p0) { const AnsP8 p = ans_p8_precision - p0; const unsigned l_s = val ? p : p0; unsigned quot, rem; if (ans->state >= l_base / ans_p8_precision * io_base * l_s) { ans->buf[ans->buf_offset++] = ans->state % io_base; ans->state /= io_base; } ANS_DIVREM(quot, rem, ans->state, l_s); ans->state = quot * ans_p8_precision + rem + (val ? 0 : p); } #define ANS_IMPL1 0 #define UNPREDICTABLE(x) x static inline int rabs_desc_read(struct AnsDecoder *ans, AnsP8 p0) { int val; #if ANS_IMPL1 unsigned l_s; #else unsigned quot, rem, x, xn; #endif const AnsP8 p = ans_p8_precision - p0; if (ans->state < l_base) { ans->state = ans->state * io_base + ans->buf[--ans->buf_offset]; } #if ANS_IMPL1 val = ans->state % ans_p8_precision < p; l_s = val ? p : p0; ans->state = (ans->state / ans_p8_precision) * l_s + ans->state % ans_p8_precision - (!val * p); #else x = ans->state; quot = x / ans_p8_precision; rem = x % ans_p8_precision; xn = quot * p; val = rem < p; if (UNPREDICTABLE(val)) { ans->state = xn + rem; } else { // ans->state = quot * p0 + rem - p; ans->state = x - xn - p; } #endif return val; } // rABS with ascending spread // p or p0 takes the place of l_s from the paper // ans_p8_precision is m static inline void rabs_asc_write(struct AnsCoder *ans, int val, AnsP8 p0) { const AnsP8 p = ans_p8_precision - p0; const unsigned l_s = val ? p : p0; unsigned quot, rem; if (ans->state >= l_base / ans_p8_precision * io_base * l_s) { ans->buf[ans->buf_offset++] = ans->state % io_base; ans->state /= io_base; } ANS_DIVREM(quot, rem, ans->state, l_s); ans->state = quot * ans_p8_precision + rem + (val ? p0 : 0); } static inline int rabs_asc_read(struct AnsDecoder *ans, AnsP8 p0) { int val; #if ANS_IMPL1 unsigned l_s; #else unsigned quot, rem, x, xn; #endif const AnsP8 p = ans_p8_precision - p0; if (ans->state < l_base) { ans->state = ans->state * io_base + ans->buf[--ans->buf_offset]; } #if ANS_IMPL1 val = ans->state % ans_p8_precision < p; l_s = val ? p : p0; ans->state = (ans->state / ans_p8_precision) * l_s + ans->state % ans_p8_precision - (!val * p); #else x = ans->state; quot = x / ans_p8_precision; rem = x % ans_p8_precision; xn = quot * p; val = rem >= p0; if (UNPREDICTABLE(val)) { ans->state = xn + rem - p0; } else { // ans->state = quot * p0 + rem - p0; ans->state = x - xn; } #endif return val; } #define rabs_read rabs_desc_read #define rabs_write rabs_desc_write // uABS with normalization static inline void uabs_write(struct AnsCoder *ans, int val, AnsP8 p0) { AnsP8 p = ans_p8_precision - p0; const unsigned l_s = val ? p : p0; while (ans->state >= l_base / ans_p8_precision * io_base * l_s) { ans->buf[ans->buf_offset++] = ans->state % io_base; ans->state /= io_base; } if (!val) ans->state = ANS_DIV(ans->state * ans_p8_precision, p0); else ans->state = ANS_DIV((ans->state + 1) * ans_p8_precision + p - 1, p) - 1; } static inline int uabs_read(struct AnsDecoder *ans, AnsP8 p0) { AnsP8 p = ans_p8_precision - p0; int s; // unsigned int xp1; unsigned xp, sp; unsigned state = ans->state; while (state < l_base && ans->buf_offset > 0) { state = state * io_base + ans->buf[--ans->buf_offset]; } sp = state * p; // xp1 = (sp + p) / ans_p8_precision; xp = sp / ans_p8_precision; // s = xp1 - xp; s = (sp & 0xFF) >= p0; if (UNPREDICTABLE(s)) ans->state = xp; else ans->state = state - xp; return s; } static inline int uabs_read_bit(struct AnsDecoder *ans) { int s; unsigned state = ans->state; while (state < l_base && ans->buf_offset > 0) { state = state * io_base + ans->buf[--ans->buf_offset]; } s = (int)(state & 1); ans->state = state >> 1; return s; } static inline int ans_read_init(struct AnsDecoder *const ans, const uint8_t *const buf, int offset) { unsigned x; if (offset < 1) return 1; ans->buf = buf; x = buf[offset - 1] >> 6; if (x == 0) { ans->buf_offset = offset - 1; ans->state = buf[offset - 1] & 0x3F; } else if (x == 1) { if (offset < 2) return 1; ans->buf_offset = offset - 2; ans->state = mem_get_le16(buf + offset - 2) & 0x3FFF; } else if (x == 2) { if (offset < 3) return 1; ans->buf_offset = offset - 3; ans->state = mem_get_le24(buf + offset - 3) & 0x3FFFFF; } else { // x == 3 implies this byte is a superframe marker return 1; } ans->state += l_base; if (ans->state >= l_base * io_base) return 1; return 0; } static inline int ans_read_end(struct AnsDecoder *const ans) { return ans->state == l_base; } static inline int ans_reader_has_error(const struct AnsDecoder *const ans) { return ans->state < l_base && ans->buf_offset == 0; } struct rans_sym { uint32_t prob; uint32_t cum_prob; // not-inclusive }; // Class for performing rANS encoding using a desired number of precision bits. // The max number of precision bits is currently 19. The actual number of // symbols in the input alphabet should be (much) smaller than that, otherwise // the compression rate may suffer. template class RAnsEncoder { public: RAnsEncoder() {} // Provides the input buffer where the data is going to be stored. inline void write_init(uint8_t *const buf) { ans_.buf = buf; ans_.buf_offset = 0; ans_.state = l_rans_base; } // Needs to be called after all symbols are encoded. inline int write_end() { uint32_t state; assert(ans_.state >= l_rans_base); assert(ans_.state < l_rans_base * io_base); state = ans_.state - l_rans_base; if (state < (1 << 6)) { ans_.buf[ans_.buf_offset] = (0x00 << 6) + state; return ans_.buf_offset + 1; } else if (state < (1 << 14)) { mem_put_le16(ans_.buf + ans_.buf_offset, (0x01 << 14) + state); return ans_.buf_offset + 2; } else if (state < (1 << 22)) { mem_put_le24(ans_.buf + ans_.buf_offset, (0x02 << 22) + state); return ans_.buf_offset + 3; } else if (state < (1 << 30)) { mem_put_le32(ans_.buf + ans_.buf_offset, (0x03 << 30) + state); return ans_.buf_offset + 4; } else { assert(0 && "State is too large to be serialized"); return ans_.buf_offset; } } // rANS with normalization // sym->prob takes the place of l_s from the paper // rans_precision is m inline void rans_write(const struct rans_sym *const sym) { const uint32_t p = sym->prob; while (ans_.state >= l_rans_base / rans_precision * io_base * p) { ans_.buf[ans_.buf_offset++] = ans_.state % io_base; ans_.state /= io_base; } // TODO(ostava): The division and multiplication should be optimized. ans_.state = (ans_.state / p) * rans_precision + ans_.state % p + sym->cum_prob; } private: static constexpr int rans_precision = 1 << rans_precision_bits_t; static constexpr int l_rans_base = rans_precision * 4; AnsCoder ans_; }; struct rans_dec_sym { uint32_t val; uint32_t prob; uint32_t cum_prob; // not-inclusive }; // Class for performing rANS decoding using a desired number of precision bits. // The number of precision bits needs to be the same as with the RAnsEncoder // that was used to encode the input data. template class RAnsDecoder { public: RAnsDecoder() {} // Initializes the decoder from the input buffer. The |offset| specifies the // number of bytes encoded by the encoder. inline int read_init(const uint8_t *const buf, int offset) { unsigned x; if (offset < 1) return 1; ans_.buf = buf; x = buf[offset - 1] >> 6; if (x == 0) { ans_.buf_offset = offset - 1; ans_.state = buf[offset - 1] & 0x3F; } else if (x == 1) { if (offset < 2) return 1; ans_.buf_offset = offset - 2; ans_.state = mem_get_le16(buf + offset - 2) & 0x3FFF; } else if (x == 2) { if (offset < 3) return 1; ans_.buf_offset = offset - 3; ans_.state = mem_get_le24(buf + offset - 3) & 0x3FFFFF; } else if (x == 3) { ans_.buf_offset = offset - 4; ans_.state = mem_get_le32(buf + offset - 4) & 0x3FFFFFFF; } else { return 1; } ans_.state += l_rans_base; if (ans_.state >= l_rans_base * io_base) return 1; return 0; } inline int read_end() { return ans_.state == l_rans_base; } inline int reader_has_error() { return ans_.state < l_rans_base && ans_.buf_offset == 0; } inline int rans_read() { unsigned rem; unsigned quo; struct rans_dec_sym sym; while (ans_.state < l_rans_base && ans_.buf_offset > 0) { ans_.state = ans_.state * io_base + ans_.buf[--ans_.buf_offset]; } // |rans_precision| is a power of two compile time constant, and the below // division and modulo are going to be optimized by the compiler. quo = ans_.state / rans_precision; rem = ans_.state % rans_precision; fetch_sym(&sym, rem); ans_.state = quo * sym.prob + rem - sym.cum_prob; return sym.val; } // Construct a look up table with |rans_precision| number of entries. inline void rans_build_look_up_table(const uint32_t token_probs[], uint32_t num_symbols) { lut_table_.resize(rans_precision); probability_table_.resize(num_symbols); uint32_t cum_prob = 0; uint32_t act_prob = 0; for (int i = 0; i < num_symbols; ++i) { probability_table_[i].prob = token_probs[i]; probability_table_[i].cum_prob = cum_prob; cum_prob += token_probs[i]; for (int j = act_prob; j < cum_prob; ++j) { lut_table_[j] = i; } act_prob = cum_prob; } assert(cum_prob == rans_precision); } private: inline void fetch_sym(struct rans_dec_sym *out, uint32_t rem) { uint32_t symbol = lut_table_[rem]; out->val = symbol; out->prob = probability_table_[symbol].prob; out->cum_prob = probability_table_[symbol].cum_prob; } static constexpr int rans_precision = 1 << rans_precision_bits_t; static constexpr int l_rans_base = rans_precision * 4; std::vector lut_table_; std::vector probability_table_; AnsDecoder ans_; }; #undef ANS_DIVREM } // namespace draco #endif // DRACO_CORE_ANS_H_