Merge pull request #1405 from nghttp2/huffman

Faster Huffman encoding/decoding
This commit is contained in:
Tatsuhiro Tsujikawa 2019-10-12 18:48:21 +09:00 committed by GitHub
commit d08c43951f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 4317 additions and 4345 deletions

View File

@ -29,114 +29,7 @@
#include <stdio.h> #include <stdio.h>
#include "nghttp2_hd.h" #include "nghttp2_hd.h"
#include "nghttp2_net.h"
/*
* Encodes huffman code |sym| into |*dest_ptr|, whose least |rembits|
* bits are not filled yet. The |rembits| must be in range [1, 8],
* inclusive. At the end of the process, the |*dest_ptr| is updated
* and points where next output should be placed. The number of
* unfilled bits in the pointed location is returned.
*/
static ssize_t huff_encode_sym(nghttp2_bufs *bufs, size_t *avail_ptr,
size_t rembits, const nghttp2_huff_sym *sym) {
int rv;
size_t nbits = sym->nbits;
uint32_t code = sym->code;
/* We assume that sym->nbits <= 32 */
if (rembits > nbits) {
nghttp2_bufs_fast_orb_hold(bufs, (uint8_t)(code << (rembits - nbits)));
return (ssize_t)(rembits - nbits);
}
if (rembits == nbits) {
nghttp2_bufs_fast_orb(bufs, (uint8_t)code);
--*avail_ptr;
return 8;
}
nghttp2_bufs_fast_orb(bufs, (uint8_t)(code >> (nbits - rembits)));
--*avail_ptr;
nbits -= rembits;
if (nbits & 0x7) {
/* align code to MSB byte boundary */
code <<= 8 - (nbits & 0x7);
}
if (*avail_ptr < (nbits + 7) / 8) {
/* slow path */
if (nbits > 24) {
rv = nghttp2_bufs_addb(bufs, (uint8_t)(code >> 24));
if (rv != 0) {
return rv;
}
nbits -= 8;
}
if (nbits > 16) {
rv = nghttp2_bufs_addb(bufs, (uint8_t)(code >> 16));
if (rv != 0) {
return rv;
}
nbits -= 8;
}
if (nbits > 8) {
rv = nghttp2_bufs_addb(bufs, (uint8_t)(code >> 8));
if (rv != 0) {
return rv;
}
nbits -= 8;
}
if (nbits == 8) {
rv = nghttp2_bufs_addb(bufs, (uint8_t)code);
if (rv != 0) {
return rv;
}
*avail_ptr = nghttp2_bufs_cur_avail(bufs);
return 8;
}
rv = nghttp2_bufs_addb_hold(bufs, (uint8_t)code);
if (rv != 0) {
return rv;
}
*avail_ptr = nghttp2_bufs_cur_avail(bufs);
return (ssize_t)(8 - nbits);
}
/* fast path, since most code is less than 8 */
if (nbits < 8) {
nghttp2_bufs_fast_addb_hold(bufs, (uint8_t)code);
*avail_ptr = nghttp2_bufs_cur_avail(bufs);
return (ssize_t)(8 - nbits);
}
/* handle longer code path */
if (nbits > 24) {
nghttp2_bufs_fast_addb(bufs, (uint8_t)(code >> 24));
nbits -= 8;
}
if (nbits > 16) {
nghttp2_bufs_fast_addb(bufs, (uint8_t)(code >> 16));
nbits -= 8;
}
if (nbits > 8) {
nghttp2_bufs_fast_addb(bufs, (uint8_t)(code >> 8));
nbits -= 8;
}
if (nbits == 8) {
nghttp2_bufs_fast_addb(bufs, (uint8_t)code);
*avail_ptr = nghttp2_bufs_cur_avail(bufs);
return 8;
}
nghttp2_bufs_fast_addb_hold(bufs, (uint8_t)code);
*avail_ptr = nghttp2_bufs_cur_avail(bufs);
return (ssize_t)(8 - nbits);
}
size_t nghttp2_hd_huff_encode_count(const uint8_t *src, size_t len) { size_t nghttp2_hd_huff_encode_count(const uint8_t *src, size_t len) {
size_t i; size_t i;
@ -151,81 +44,97 @@ size_t nghttp2_hd_huff_encode_count(const uint8_t *src, size_t len) {
int nghttp2_hd_huff_encode(nghttp2_bufs *bufs, const uint8_t *src, int nghttp2_hd_huff_encode(nghttp2_bufs *bufs, const uint8_t *src,
size_t srclen) { size_t srclen) {
int rv; const nghttp2_huff_sym *sym;
ssize_t rembits = 8; const uint8_t *end = src + srclen;
size_t i; uint64_t code = 0;
uint32_t x;
size_t nbits = 0;
size_t avail; size_t avail;
int rv;
avail = nghttp2_bufs_cur_avail(bufs); avail = nghttp2_bufs_cur_avail(bufs);
for (i = 0; i < srclen; ++i) { for (; src != end;) {
const nghttp2_huff_sym *sym = &huff_sym_table[src[i]]; sym = &huff_sym_table[*src++];
if (rembits == 8) { code |= (uint64_t)sym->code << (32 - nbits);
if (avail) { nbits += sym->nbits;
nghttp2_bufs_fast_addb_hold(bufs, 0); if (nbits < 32) {
} else { continue;
rv = nghttp2_bufs_addb_hold(bufs, 0); }
if (avail >= 4) {
x = htonl((uint32_t)(code >> 32));
memcpy(bufs->cur->buf.last, &x, 4);
bufs->cur->buf.last += 4;
avail -= 4;
code <<= 32;
nbits -= 32;
continue;
}
for (; nbits >= 8;) {
rv = nghttp2_bufs_addb(bufs, (uint8_t)(code >> 56));
if (rv != 0) { if (rv != 0) {
return rv; return rv;
} }
code <<= 8;
nbits -= 8;
}
avail = nghttp2_bufs_cur_avail(bufs); avail = nghttp2_bufs_cur_avail(bufs);
} }
for (; nbits >= 8;) {
rv = nghttp2_bufs_addb(bufs, (uint8_t)(code >> 56));
if (rv != 0) {
return rv;
} }
rembits = huff_encode_sym(bufs, &avail, (size_t)rembits, sym); code <<= 8;
if (rembits < 0) { nbits -= 8;
return (int)rembits;
} }
if (nbits) {
rv = nghttp2_bufs_addb(
bufs, (uint8_t)((uint8_t)(code >> 56) | ((1 << (8 - nbits)) - 1)));
if (rv != 0) {
return rv;
} }
/* 256 is special terminal symbol, pad with its prefix */
if (rembits < 8) {
/* if rembits < 8, we should have at least 1 buffer space
available */
const nghttp2_huff_sym *sym = &huff_sym_table[256];
assert(avail);
/* Caution we no longer adjust avail here */
nghttp2_bufs_fast_orb(
bufs, (uint8_t)(sym->code >> (sym->nbits - (size_t)rembits)));
} }
return 0; return 0;
} }
void nghttp2_hd_huff_decode_context_init(nghttp2_hd_huff_decode_context *ctx) { void nghttp2_hd_huff_decode_context_init(nghttp2_hd_huff_decode_context *ctx) {
ctx->state = 0; ctx->fstate = NGHTTP2_HUFF_ACCEPTED;
ctx->accept = 1;
} }
ssize_t nghttp2_hd_huff_decode(nghttp2_hd_huff_decode_context *ctx, ssize_t nghttp2_hd_huff_decode(nghttp2_hd_huff_decode_context *ctx,
nghttp2_buf *buf, const uint8_t *src, nghttp2_buf *buf, const uint8_t *src,
size_t srclen, int final) { size_t srclen, int final) {
size_t i; const uint8_t *end = src + srclen;
nghttp2_huff_decode node = {ctx->fstate, 0};
const nghttp2_huff_decode *t = &node;
uint8_t c;
/* We use the decoding algorithm described in /* We use the decoding algorithm described in
http://graphics.ics.uci.edu/pub/Prefix.pdf */ http://graphics.ics.uci.edu/pub/Prefix.pdf */
for (i = 0; i < srclen; ++i) { for (; src != end;) {
const nghttp2_huff_decode *t; c = *src++;
t = &huff_decode_table[t->fstate & 0x1ff][c >> 4];
t = &huff_decode_table[ctx->state][src[i] >> 4]; if (t->fstate & NGHTTP2_HUFF_SYM) {
if (t->flags & NGHTTP2_HUFF_FAIL) {
return NGHTTP2_ERR_HEADER_COMP;
}
if (t->flags & NGHTTP2_HUFF_SYM) {
*buf->last++ = t->sym; *buf->last++ = t->sym;
} }
t = &huff_decode_table[t->state][src[i] & 0xf]; t = &huff_decode_table[t->fstate & 0x1ff][c & 0xf];
if (t->flags & NGHTTP2_HUFF_FAIL) { if (t->fstate & NGHTTP2_HUFF_SYM) {
return NGHTTP2_ERR_HEADER_COMP;
}
if (t->flags & NGHTTP2_HUFF_SYM) {
*buf->last++ = t->sym; *buf->last++ = t->sym;
} }
ctx->state = t->state;
ctx->accept = (t->flags & NGHTTP2_HUFF_ACCEPTED) != 0;
} }
if (final && !ctx->accept) {
ctx->fstate = t->fstate;
if (final && !(ctx->fstate & NGHTTP2_HUFF_ACCEPTED)) {
return NGHTTP2_ERR_HEADER_COMP; return NGHTTP2_ERR_HEADER_COMP;
} }
return (ssize_t)i;
return (ssize_t)srclen;
} }

View File

@ -34,21 +34,20 @@
typedef enum { typedef enum {
/* FSA accepts this state as the end of huffman encoding /* FSA accepts this state as the end of huffman encoding
sequence. */ sequence. */
NGHTTP2_HUFF_ACCEPTED = 1, NGHTTP2_HUFF_ACCEPTED = 1 << 14,
/* This state emits symbol */ /* This state emits symbol */
NGHTTP2_HUFF_SYM = (1 << 1), NGHTTP2_HUFF_SYM = 1 << 15,
/* If state machine reaches this state, decoding fails. */
NGHTTP2_HUFF_FAIL = (1 << 2)
} nghttp2_huff_decode_flag; } nghttp2_huff_decode_flag;
typedef struct { typedef struct {
/* huffman decoding state, which is actually the node ID of internal /* fstate is the current huffman decoding state, which is actually
huffman tree. We have 257 leaf nodes, but they are identical to the node ID of internal huffman tree with
root node other than emitting a symbol, so we have 256 internal nghttp2_huff_decode_flag OR-ed. We have 257 leaf nodes, but they
nodes [1..255], inclusive. */ are identical to root node other than emitting a symbol, so we
uint8_t state; have 256 internal nodes [1..255], inclusive. The node ID 256 is
/* bitwise OR of zero or more of the nghttp2_huff_decode_flag */ a special node and it is a terminal state that means decoding
uint8_t flags; failed. */
uint16_t fstate;
/* symbol if NGHTTP2_HUFF_SYM flag set */ /* symbol if NGHTTP2_HUFF_SYM flag set */
uint8_t sym; uint8_t sym;
} nghttp2_huff_decode; } nghttp2_huff_decode;
@ -56,12 +55,8 @@ typedef struct {
typedef nghttp2_huff_decode huff_decode_table_type[16]; typedef nghttp2_huff_decode huff_decode_table_type[16];
typedef struct { typedef struct {
/* Current huffman decoding state. We stripped leaf nodes, so the /* fstate is the current huffman decoding state. */
value range is [0..255], inclusive. */ uint16_t fstate;
uint8_t state;
/* nonzero if we can say that the decoding process succeeds at this
state */
uint8_t accept;
} nghttp2_hd_huff_decode_context; } nghttp2_hd_huff_decode_context;
typedef struct { typedef struct {

File diff suppressed because it is too large Load Diff

View File

@ -357,9 +357,8 @@ def _build_transition_table(ctx, node):
def huffman_tree_build_transition_table(ctx): def huffman_tree_build_transition_table(ctx):
_build_transition_table(ctx, ctx.root) _build_transition_table(ctx, ctx.root)
NGHTTP2_HUFF_ACCEPTED = 1 NGHTTP2_HUFF_ACCEPTED = 1 << 14
NGHTTP2_HUFF_SYM = 1 << 1 NGHTTP2_HUFF_SYM = 1 << 15
NGHTTP2_HUFF_FAIL = 1 << 2
def _print_transition_table(node): def _print_transition_table(node):
if node.term is not None: if node.term is not None:
@ -374,8 +373,7 @@ def _print_transition_table(node):
out = sym out = sym
flags |= NGHTTP2_HUFF_SYM flags |= NGHTTP2_HUFF_SYM
if nd is None: if nd is None:
id = 0 id = 256
flags |= NGHTTP2_HUFF_FAIL
else: else:
id = nd.id id = nd.id
if id is None: if id is None:
@ -384,13 +382,32 @@ def _print_transition_table(node):
flags |= NGHTTP2_HUFF_ACCEPTED flags |= NGHTTP2_HUFF_ACCEPTED
elif nd.accept: elif nd.accept:
flags |= NGHTTP2_HUFF_ACCEPTED flags |= NGHTTP2_HUFF_ACCEPTED
print ' {{{}, 0x{:02x}, {}}},'.format(id, flags, out) print ' {{0x{:02x}, {}}},'.format(id | flags, out)
print '},' print '},'
_print_transition_table(node.left) _print_transition_table(node.left)
_print_transition_table(node.right) _print_transition_table(node.right)
def huffman_tree_print_transition_table(ctx): def huffman_tree_print_transition_table(ctx):
_print_transition_table(ctx.root) _print_transition_table(ctx.root)
print '/* 256 */'
print '{'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print ' {0x100, 0},'
print '},'
if __name__ == '__main__': if __name__ == '__main__':
ctx = Context() ctx = Context()
@ -423,9 +440,12 @@ typedef struct {
print '''\ print '''\
const nghttp2_huff_sym huff_sym_table[] = {''' const nghttp2_huff_sym huff_sym_table[] = {'''
for i in range(257): for i in range(257):
nbits = symbol_tbl[i][0]
k = int(symbol_tbl[i][1], 16)
k = k << (32 - nbits)
print '''\ print '''\
{{ {}, 0x{}u }}{}\ {{ {}, 0x{}u }}{}\
'''.format(symbol_tbl[i][0], symbol_tbl[i][1], ',' if i < 256 else '') '''.format(symbol_tbl[i][0], hex(k)[2:], ',' if i < 256 else '')
print '};' print '};'
print '' print ''
@ -433,14 +453,12 @@ const nghttp2_huff_sym huff_sym_table[] = {'''
enum {{ enum {{
NGHTTP2_HUFF_ACCEPTED = {}, NGHTTP2_HUFF_ACCEPTED = {},
NGHTTP2_HUFF_SYM = {}, NGHTTP2_HUFF_SYM = {},
NGHTTP2_HUFF_FAIL = {},
}} nghttp2_huff_decode_flag; }} nghttp2_huff_decode_flag;
'''.format(NGHTTP2_HUFF_ACCEPTED, NGHTTP2_HUFF_SYM, NGHTTP2_HUFF_FAIL) '''.format(NGHTTP2_HUFF_ACCEPTED, NGHTTP2_HUFF_SYM)
print '''\ print '''\
typedef struct { typedef struct {
uint8_t state; uint16_t fstate;
uint8_t flags;
uint8_t sym; uint8_t sym;
} nghttp2_huff_decode; } nghttp2_huff_decode;
''' '''

View File

@ -402,6 +402,7 @@ int main() {
test_nghttp2_hd_deflate_hd_vec) || test_nghttp2_hd_deflate_hd_vec) ||
!CU_add_test(pSuite, "hd_decode_length", test_nghttp2_hd_decode_length) || !CU_add_test(pSuite, "hd_decode_length", test_nghttp2_hd_decode_length) ||
!CU_add_test(pSuite, "hd_huff_encode", test_nghttp2_hd_huff_encode) || !CU_add_test(pSuite, "hd_huff_encode", test_nghttp2_hd_huff_encode) ||
!CU_add_test(pSuite, "hd_huff_decode", test_nghttp2_hd_huff_decode) ||
!CU_add_test(pSuite, "adjust_local_window_size", !CU_add_test(pSuite, "adjust_local_window_size",
test_nghttp2_adjust_local_window_size) || test_nghttp2_adjust_local_window_size) ||
!CU_add_test(pSuite, "check_header_name", !CU_add_test(pSuite, "check_header_name",

View File

@ -1538,3 +1538,32 @@ void test_nghttp2_hd_huff_encode(void) {
nghttp2_bufs_free(&bufs); nghttp2_bufs_free(&bufs);
} }
void test_nghttp2_hd_huff_decode(void) {
const uint8_t e[] = {0x1f, 0xff, 0xff, 0xff, 0xff, 0xff};
nghttp2_hd_huff_decode_context ctx;
nghttp2_buf outbuf;
uint8_t b[256];
ssize_t len;
nghttp2_buf_wrap_init(&outbuf, b, sizeof(b));
nghttp2_hd_huff_decode_context_init(&ctx);
len = nghttp2_hd_huff_decode(&ctx, &outbuf, e, 1, 1);
CU_ASSERT(1 == len);
CU_ASSERT(0 == memcmp("a", outbuf.pos, 1));
/* Premature sequence must elicit decoding error */
nghttp2_buf_wrap_init(&outbuf, b, sizeof(b));
nghttp2_hd_huff_decode_context_init(&ctx);
len = nghttp2_hd_huff_decode(&ctx, &outbuf, e, 2, 1);
CU_ASSERT(NGHTTP2_ERR_HEADER_COMP == len);
/* Fully decoding EOS is error */
nghttp2_buf_wrap_init(&outbuf, b, sizeof(b));
nghttp2_hd_huff_decode_context_init(&ctx);
len = nghttp2_hd_huff_decode(&ctx, &outbuf, e, 2, 6);
CU_ASSERT(NGHTTP2_ERR_HEADER_COMP == len);
}

View File

@ -50,5 +50,6 @@ void test_nghttp2_hd_public_api(void);
void test_nghttp2_hd_deflate_hd_vec(void); void test_nghttp2_hd_deflate_hd_vec(void);
void test_nghttp2_hd_decode_length(void); void test_nghttp2_hd_decode_length(void);
void test_nghttp2_hd_huff_encode(void); void test_nghttp2_hd_huff_encode(void);
void test_nghttp2_hd_huff_decode(void);
#endif /* NGHTTP2_HD_TEST_H */ #endif /* NGHTTP2_HD_TEST_H */