Merge pull request #1405 from nghttp2/huffman

Faster Huffman encoding/decoding
This commit is contained in:
Tatsuhiro Tsujikawa 2019-10-12 18:48:21 +09:00 committed by GitHub
commit d08c43951f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 4317 additions and 4345 deletions

View File

@ -29,114 +29,7 @@
#include <stdio.h>
#include "nghttp2_hd.h"
/*
* Encodes huffman code |sym| into |*dest_ptr|, whose least |rembits|
* bits are not filled yet. The |rembits| must be in range [1, 8],
* inclusive. At the end of the process, the |*dest_ptr| is updated
* and points where next output should be placed. The number of
* unfilled bits in the pointed location is returned.
*/
static ssize_t huff_encode_sym(nghttp2_bufs *bufs, size_t *avail_ptr,
size_t rembits, const nghttp2_huff_sym *sym) {
int rv;
size_t nbits = sym->nbits;
uint32_t code = sym->code;
/* We assume that sym->nbits <= 32 */
if (rembits > nbits) {
nghttp2_bufs_fast_orb_hold(bufs, (uint8_t)(code << (rembits - nbits)));
return (ssize_t)(rembits - nbits);
}
if (rembits == nbits) {
nghttp2_bufs_fast_orb(bufs, (uint8_t)code);
--*avail_ptr;
return 8;
}
nghttp2_bufs_fast_orb(bufs, (uint8_t)(code >> (nbits - rembits)));
--*avail_ptr;
nbits -= rembits;
if (nbits & 0x7) {
/* align code to MSB byte boundary */
code <<= 8 - (nbits & 0x7);
}
if (*avail_ptr < (nbits + 7) / 8) {
/* slow path */
if (nbits > 24) {
rv = nghttp2_bufs_addb(bufs, (uint8_t)(code >> 24));
if (rv != 0) {
return rv;
}
nbits -= 8;
}
if (nbits > 16) {
rv = nghttp2_bufs_addb(bufs, (uint8_t)(code >> 16));
if (rv != 0) {
return rv;
}
nbits -= 8;
}
if (nbits > 8) {
rv = nghttp2_bufs_addb(bufs, (uint8_t)(code >> 8));
if (rv != 0) {
return rv;
}
nbits -= 8;
}
if (nbits == 8) {
rv = nghttp2_bufs_addb(bufs, (uint8_t)code);
if (rv != 0) {
return rv;
}
*avail_ptr = nghttp2_bufs_cur_avail(bufs);
return 8;
}
rv = nghttp2_bufs_addb_hold(bufs, (uint8_t)code);
if (rv != 0) {
return rv;
}
*avail_ptr = nghttp2_bufs_cur_avail(bufs);
return (ssize_t)(8 - nbits);
}
/* fast path, since most code is less than 8 */
if (nbits < 8) {
nghttp2_bufs_fast_addb_hold(bufs, (uint8_t)code);
*avail_ptr = nghttp2_bufs_cur_avail(bufs);
return (ssize_t)(8 - nbits);
}
/* handle longer code path */
if (nbits > 24) {
nghttp2_bufs_fast_addb(bufs, (uint8_t)(code >> 24));
nbits -= 8;
}
if (nbits > 16) {
nghttp2_bufs_fast_addb(bufs, (uint8_t)(code >> 16));
nbits -= 8;
}
if (nbits > 8) {
nghttp2_bufs_fast_addb(bufs, (uint8_t)(code >> 8));
nbits -= 8;
}
if (nbits == 8) {
nghttp2_bufs_fast_addb(bufs, (uint8_t)code);
*avail_ptr = nghttp2_bufs_cur_avail(bufs);
return 8;
}
nghttp2_bufs_fast_addb_hold(bufs, (uint8_t)code);
*avail_ptr = nghttp2_bufs_cur_avail(bufs);
return (ssize_t)(8 - nbits);
}
#include "nghttp2_net.h"
size_t nghttp2_hd_huff_encode_count(const uint8_t *src, size_t len) {
size_t i;
@ -151,81 +44,97 @@ size_t nghttp2_hd_huff_encode_count(const uint8_t *src, size_t len) {
int nghttp2_hd_huff_encode(nghttp2_bufs *bufs, const uint8_t *src,
size_t srclen) {
int rv;
ssize_t rembits = 8;
size_t i;
const nghttp2_huff_sym *sym;
const uint8_t *end = src + srclen;
uint64_t code = 0;
uint32_t x;
size_t nbits = 0;
size_t avail;
int rv;
avail = nghttp2_bufs_cur_avail(bufs);
for (i = 0; i < srclen; ++i) {
const nghttp2_huff_sym *sym = &huff_sym_table[src[i]];
if (rembits == 8) {
if (avail) {
nghttp2_bufs_fast_addb_hold(bufs, 0);
} else {
rv = nghttp2_bufs_addb_hold(bufs, 0);
if (rv != 0) {
return rv;
}
avail = nghttp2_bufs_cur_avail(bufs);
for (; src != end;) {
sym = &huff_sym_table[*src++];
code |= (uint64_t)sym->code << (32 - nbits);
nbits += sym->nbits;
if (nbits < 32) {
continue;
}
if (avail >= 4) {
x = htonl((uint32_t)(code >> 32));
memcpy(bufs->cur->buf.last, &x, 4);
bufs->cur->buf.last += 4;
avail -= 4;
code <<= 32;
nbits -= 32;
continue;
}
for (; nbits >= 8;) {
rv = nghttp2_bufs_addb(bufs, (uint8_t)(code >> 56));
if (rv != 0) {
return rv;
}
code <<= 8;
nbits -= 8;
}
rembits = huff_encode_sym(bufs, &avail, (size_t)rembits, sym);
if (rembits < 0) {
return (int)rembits;
}
avail = nghttp2_bufs_cur_avail(bufs);
}
/* 256 is special terminal symbol, pad with its prefix */
if (rembits < 8) {
/* if rembits < 8, we should have at least 1 buffer space
available */
const nghttp2_huff_sym *sym = &huff_sym_table[256];
assert(avail);
/* Caution we no longer adjust avail here */
nghttp2_bufs_fast_orb(
bufs, (uint8_t)(sym->code >> (sym->nbits - (size_t)rembits)));
for (; nbits >= 8;) {
rv = nghttp2_bufs_addb(bufs, (uint8_t)(code >> 56));
if (rv != 0) {
return rv;
}
code <<= 8;
nbits -= 8;
}
if (nbits) {
rv = nghttp2_bufs_addb(
bufs, (uint8_t)((uint8_t)(code >> 56) | ((1 << (8 - nbits)) - 1)));
if (rv != 0) {
return rv;
}
}
return 0;
}
void nghttp2_hd_huff_decode_context_init(nghttp2_hd_huff_decode_context *ctx) {
ctx->state = 0;
ctx->accept = 1;
ctx->fstate = NGHTTP2_HUFF_ACCEPTED;
}
ssize_t nghttp2_hd_huff_decode(nghttp2_hd_huff_decode_context *ctx,
nghttp2_buf *buf, const uint8_t *src,
size_t srclen, int final) {
size_t i;
const uint8_t *end = src + srclen;
nghttp2_huff_decode node = {ctx->fstate, 0};
const nghttp2_huff_decode *t = &node;
uint8_t c;
/* We use the decoding algorithm described in
http://graphics.ics.uci.edu/pub/Prefix.pdf */
for (i = 0; i < srclen; ++i) {
const nghttp2_huff_decode *t;
t = &huff_decode_table[ctx->state][src[i] >> 4];
if (t->flags & NGHTTP2_HUFF_FAIL) {
return NGHTTP2_ERR_HEADER_COMP;
}
if (t->flags & NGHTTP2_HUFF_SYM) {
for (; src != end;) {
c = *src++;
t = &huff_decode_table[t->fstate & 0x1ff][c >> 4];
if (t->fstate & NGHTTP2_HUFF_SYM) {
*buf->last++ = t->sym;
}
t = &huff_decode_table[t->state][src[i] & 0xf];
if (t->flags & NGHTTP2_HUFF_FAIL) {
return NGHTTP2_ERR_HEADER_COMP;
}
if (t->flags & NGHTTP2_HUFF_SYM) {
t = &huff_decode_table[t->fstate & 0x1ff][c & 0xf];
if (t->fstate & NGHTTP2_HUFF_SYM) {
*buf->last++ = t->sym;
}
ctx->state = t->state;
ctx->accept = (t->flags & NGHTTP2_HUFF_ACCEPTED) != 0;
}
if (final && !ctx->accept) {
ctx->fstate = t->fstate;
if (final && !(ctx->fstate & NGHTTP2_HUFF_ACCEPTED)) {
return NGHTTP2_ERR_HEADER_COMP;
}
return (ssize_t)i;
return (ssize_t)srclen;
}

View File

@ -34,21 +34,20 @@
typedef enum {
/* FSA accepts this state as the end of huffman encoding
sequence. */
NGHTTP2_HUFF_ACCEPTED = 1,
NGHTTP2_HUFF_ACCEPTED = 1 << 14,
/* This state emits symbol */
NGHTTP2_HUFF_SYM = (1 << 1),
/* If state machine reaches this state, decoding fails. */
NGHTTP2_HUFF_FAIL = (1 << 2)
NGHTTP2_HUFF_SYM = 1 << 15,
} nghttp2_huff_decode_flag;
typedef struct {
/* huffman decoding state, which is actually the node ID of internal
huffman tree. We have 257 leaf nodes, but they are identical to
root node other than emitting a symbol, so we have 256 internal
nodes [1..255], inclusive. */
uint8_t state;
/* bitwise OR of zero or more of the nghttp2_huff_decode_flag */
uint8_t flags;
/* fstate is the current huffman decoding state, which is actually
the node ID of internal huffman tree with
nghttp2_huff_decode_flag OR-ed. We have 257 leaf nodes, but they
are identical to root node other than emitting a symbol, so we
have 256 internal nodes [1..255], inclusive. The node ID 256 is
a special node and it is a terminal state that means decoding
failed. */
uint16_t fstate;
/* symbol if NGHTTP2_HUFF_SYM flag set */
uint8_t sym;
} nghttp2_huff_decode;
@ -56,12 +55,8 @@ typedef struct {
typedef nghttp2_huff_decode huff_decode_table_type[16];
typedef struct {
/* Current huffman decoding state. We stripped leaf nodes, so the
value range is [0..255], inclusive. */
uint8_t state;
/* nonzero if we can say that the decoding process succeeds at this
state */
uint8_t accept;
/* fstate is the current huffman decoding state. */
uint16_t fstate;
} nghttp2_hd_huff_decode_context;
typedef struct {

File diff suppressed because it is too large Load Diff

View File

@ -357,9 +357,8 @@ def _build_transition_table(ctx, node):
def huffman_tree_build_transition_table(ctx):
_build_transition_table(ctx, ctx.root)
NGHTTP2_HUFF_ACCEPTED = 1
NGHTTP2_HUFF_SYM = 1 << 1
NGHTTP2_HUFF_FAIL = 1 << 2
NGHTTP2_HUFF_ACCEPTED = 1 << 14
NGHTTP2_HUFF_SYM = 1 << 15
def _print_transition_table(node):
if node.term is not None:
@ -374,8 +373,7 @@ def _print_transition_table(node):
out = sym
flags |= NGHTTP2_HUFF_SYM
if nd is None:
id = 0
flags |= NGHTTP2_HUFF_FAIL
id = 256
else:
id = nd.id
if id is None:
@ -384,13 +382,32 @@ def _print_transition_table(node):
flags |= NGHTTP2_HUFF_ACCEPTED
elif nd.accept:
flags |= NGHTTP2_HUFF_ACCEPTED
print ' {{{}, 0x{:02x}, {}}},'.format(id, flags, out)
print ' {{0x{:02x}, {}}},'.format(id | flags, out)
print '},'
_print_transition_table(node.left)
_print_transition_table(node.right)
def huffman_tree_print_transition_table(ctx):
_print_transition_table(ctx.root)
print '/* 256 */'
print '{'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print '},'
if __name__ == '__main__':
ctx = Context()
@ -423,9 +440,12 @@ typedef struct {
print '''\
const nghttp2_huff_sym huff_sym_table[] = {'''
for i in range(257):
nbits = symbol_tbl[i][0]
k = int(symbol_tbl[i][1], 16)
k = k << (32 - nbits)
print '''\
{{ {}, 0x{}u }}{}\
'''.format(symbol_tbl[i][0], symbol_tbl[i][1], ',' if i < 256 else '')
'''.format(symbol_tbl[i][0], hex(k)[2:], ',' if i < 256 else '')
print '};'
print ''
@ -433,14 +453,12 @@ const nghttp2_huff_sym huff_sym_table[] = {'''
enum {{
NGHTTP2_HUFF_ACCEPTED = {},
NGHTTP2_HUFF_SYM = {},
NGHTTP2_HUFF_FAIL = {},
}} nghttp2_huff_decode_flag;
'''.format(NGHTTP2_HUFF_ACCEPTED, NGHTTP2_HUFF_SYM, NGHTTP2_HUFF_FAIL)
'''.format(NGHTTP2_HUFF_ACCEPTED, NGHTTP2_HUFF_SYM)
print '''\
typedef struct {
uint8_t state;
uint8_t flags;
uint16_t fstate;
uint8_t sym;
} nghttp2_huff_decode;
'''

View File

@ -402,6 +402,7 @@ int main() {
test_nghttp2_hd_deflate_hd_vec) ||
!CU_add_test(pSuite, "hd_decode_length", test_nghttp2_hd_decode_length) ||
!CU_add_test(pSuite, "hd_huff_encode", test_nghttp2_hd_huff_encode) ||
!CU_add_test(pSuite, "hd_huff_decode", test_nghttp2_hd_huff_decode) ||
!CU_add_test(pSuite, "adjust_local_window_size",
test_nghttp2_adjust_local_window_size) ||
!CU_add_test(pSuite, "check_header_name",

View File

@ -1538,3 +1538,32 @@ void test_nghttp2_hd_huff_encode(void) {
nghttp2_bufs_free(&bufs);
}
void test_nghttp2_hd_huff_decode(void) {
const uint8_t e[] = {0x1f, 0xff, 0xff, 0xff, 0xff, 0xff};
nghttp2_hd_huff_decode_context ctx;
nghttp2_buf outbuf;
uint8_t b[256];
ssize_t len;
nghttp2_buf_wrap_init(&outbuf, b, sizeof(b));
nghttp2_hd_huff_decode_context_init(&ctx);
len = nghttp2_hd_huff_decode(&ctx, &outbuf, e, 1, 1);
CU_ASSERT(1 == len);
CU_ASSERT(0 == memcmp("a", outbuf.pos, 1));
/* Premature sequence must elicit decoding error */
nghttp2_buf_wrap_init(&outbuf, b, sizeof(b));
nghttp2_hd_huff_decode_context_init(&ctx);
len = nghttp2_hd_huff_decode(&ctx, &outbuf, e, 2, 1);
CU_ASSERT(NGHTTP2_ERR_HEADER_COMP == len);
/* Fully decoding EOS is error */
nghttp2_buf_wrap_init(&outbuf, b, sizeof(b));
nghttp2_hd_huff_decode_context_init(&ctx);
len = nghttp2_hd_huff_decode(&ctx, &outbuf, e, 2, 6);
CU_ASSERT(NGHTTP2_ERR_HEADER_COMP == len);
}

View File

@ -50,5 +50,6 @@ void test_nghttp2_hd_public_api(void);
void test_nghttp2_hd_deflate_hd_vec(void);
void test_nghttp2_hd_decode_length(void);
void test_nghttp2_hd_huff_encode(void);
void test_nghttp2_hd_huff_decode(void);
#endif /* NGHTTP2_HD_TEST_H */