nghttp2_hd: Rewrite huffman decoding

This commit is contained in:
Tatsuhiro Tsujikawa 2014-01-24 21:56:19 +09:00
parent a8e4da8058
commit f8a446fbeb
5 changed files with 9762 additions and 3240 deletions

View File

@ -371,19 +371,6 @@ ssize_t nghttp2_hd_huff_encode(uint8_t *dest, size_t destlen,
const uint8_t *src, size_t srclen, const uint8_t *src, size_t srclen,
nghttp2_hd_side side); nghttp2_hd_side side);
/*
* Counts the number of required bytes to decode |src| with length
* |srclen|. The given input must be padded with the prefix of
* terminal code. If |side| is NGHTTP2_HD_SIDE_REQUEST, the request
* huffman code table is used. Otherwise, the response code table is
* used.
*
* This function returns the number of required bytes to decode given
* data if it succeeds, or -1.
*/
ssize_t nghttp2_hd_huff_decode_count(const uint8_t *src, size_t srclen,
nghttp2_hd_side side);
/* /*
* Decodes the given data |src| with length |srclen|. This function * Decodes the given data |src| with length |srclen|. This function
* allocates memory to store the result and assigns the its pointer to * allocates memory to store the result and assigns the its pointer to

View File

@ -31,64 +31,11 @@
#include "nghttp2_hd.h" #include "nghttp2_hd.h"
extern const nghttp2_huff_sym req_huff_sym_table[]; extern const nghttp2_huff_sym req_huff_sym_table[];
extern const int16_t req_huff_decode_table[][256]; extern const nghttp2_huff_decode req_huff_decode_table[][16];
extern const nghttp2_huff_sym res_huff_sym_table[]; extern const nghttp2_huff_sym res_huff_sym_table[];
extern const int16_t res_huff_decode_table[][256]; extern const nghttp2_huff_decode res_huff_decode_table[][16];
/*
* Returns next 8 bits of data from |in|, starting |bitoff| bits
* offset. If there are fewer bits left than |bitoff|, the left bits
* with padded with 0 are returned. The |bitoff| must be strictly less
* than 8.
*/
static uint8_t get_prefix_byte(const uint8_t *in, size_t len, size_t bitoff)
{
uint8_t b;
if(bitoff == 0) {
return *in;
}
b = *in << bitoff;
if(len > 1) {
b |= *(in + 1) >> (8 - bitoff);
}
return b;
}
/*
* Decodes next byte from input |in| with length |len|, starting
* |bitoff| bit offset.
*
* This function returns the decoded symbol number (0-255 and 256 for
* special terminal symbol) if it succeeds, or -1.
*/
static int huff_decode(const uint8_t *in, size_t len, size_t bitoff,
const nghttp2_huff_sym *huff_sym_table,
const huff_decode_table_type *huff_decode_table)
{
int rv = 0;
size_t len_orig = len;
if(len == 0) {
return -1;
}
for(;;) {
rv = huff_decode_table[rv][get_prefix_byte(in, len, bitoff)];
if(rv >= 0) {
break;
}
/* Negative return value means we need to lookup next table. */
rv = -rv;
++in;
--len;
if(len == 0) {
return -1;
}
}
if(bitoff + huff_sym_table[rv].nbits > len_orig * 8) {
return -1;
}
return rv;
}
/* /*
* Encodes huffman code |sym| into |*dest_ptr|, whose least |rembits| * Encodes huffman code |sym| into |*dest_ptr|, whose least |rembits|
* bits are not filled yet. The |rembits| must be in range [1, 8], * bits are not filled yet. The |rembits| must be in range [1, 8],
@ -167,89 +114,36 @@ ssize_t nghttp2_hd_huff_encode(uint8_t *dest, size_t destlen,
return dest - dest_first; return dest - dest_first;
} }
static int check_last_byte(const uint8_t *src, size_t srclen, size_t idx,
size_t bitoff)
{
uint8_t last_mask = (1 << (8 - bitoff)) - 1;
return idx + 1 == srclen && bitoff > 0 &&
(src[idx] & last_mask) == last_mask;
}
ssize_t nghttp2_hd_huff_decode_count(const uint8_t *src, size_t srclen,
nghttp2_hd_side side)
{
size_t bitoff = 0;
size_t i, j;
const nghttp2_huff_sym *huff_sym_table;
const huff_decode_table_type *huff_decode_table;
if(side == NGHTTP2_HD_SIDE_REQUEST) {
huff_sym_table = req_huff_sym_table;
huff_decode_table = req_huff_decode_table;
} else {
huff_sym_table = res_huff_sym_table;
huff_decode_table = res_huff_decode_table;
}
j = 0;
for(i = 0; i < srclen;) {
int rv = huff_decode(src + i, srclen - i, bitoff,
huff_sym_table, huff_decode_table);
if(rv == -1) {
if(check_last_byte(src, srclen, i, bitoff)) {
break;
}
return -1;
}
if(rv == 256) {
/* 256 is special terminal symbol and it should not encoded in
byte string. */
return -1;
}
j++;
bitoff += huff_sym_table[rv].nbits;
i += bitoff / 8;
bitoff &= 0x7;
}
return j;
}
ssize_t nghttp2_hd_huff_decode(uint8_t **dest_ptr, ssize_t nghttp2_hd_huff_decode(uint8_t **dest_ptr,
const uint8_t *src, size_t srclen, const uint8_t *src, size_t srclen,
nghttp2_hd_side side) nghttp2_hd_side side)
{ {
size_t bitoff = 0; size_t i, j, k;
size_t i, j;
const nghttp2_huff_sym *huff_sym_table;
const huff_decode_table_type *huff_decode_table; const huff_decode_table_type *huff_decode_table;
uint8_t *dest = NULL; uint8_t *dest = NULL;
size_t destlen = 0; size_t destlen = 0;
int rv; int rv;
int16_t state = 0;
const nghttp2_huff_decode *t = NULL;
/* We use the decoding algorithm described in
http://graphics.ics.uci.edu/pub/Prefix.pdf */
if(side == NGHTTP2_HD_SIDE_REQUEST) { if(side == NGHTTP2_HD_SIDE_REQUEST) {
huff_sym_table = req_huff_sym_table;
huff_decode_table = req_huff_decode_table; huff_decode_table = req_huff_decode_table;
} else { } else {
huff_sym_table = res_huff_sym_table;
huff_decode_table = res_huff_decode_table; huff_decode_table = res_huff_decode_table;
} }
j = 0; j = 0;
for(i = 0; i < srclen;) { for(i = 0; i < srclen; ++i) {
rv = huff_decode(src + i, srclen - i, bitoff, uint8_t in = src[i] >> 4;
huff_sym_table, huff_decode_table); for(k = 0; k < 2; ++k) {
if(rv == -1) { t = &huff_decode_table[state][in];
if(check_last_byte(src, srclen, i, bitoff)) { if(t->state == -1) {
break;
}
rv = NGHTTP2_ERR_HEADER_COMP; rv = NGHTTP2_ERR_HEADER_COMP;
goto fail; goto fail;
} }
if(rv == 256) { if(t->flags & NGHTTP2_HUFF_SYM) {
/* 256 is special terminal symbol and it should not encoded in if(destlen == j) {
byte string. */
rv = NGHTTP2_ERR_HEADER_COMP;
goto fail;
}
if(j == destlen) {
size_t new_len = j == 0 ? 32 : j * 2; size_t new_len = j == 0 ? 32 : j * 2;
uint8_t *new_dest = realloc(dest, new_len); uint8_t *new_dest = realloc(dest, new_len);
if(new_dest == NULL) { if(new_dest == NULL) {
@ -259,10 +153,15 @@ ssize_t nghttp2_hd_huff_decode(uint8_t **dest_ptr,
dest = new_dest; dest = new_dest;
destlen = new_len; destlen = new_len;
} }
dest[j++] = rv; dest[j++] = t->sym;
bitoff += huff_sym_table[rv].nbits; }
i += bitoff / 8; state = t->state;
bitoff &= 0x7; in = src[i] & 0xf;
}
}
if(srclen && (t->flags & NGHTTP2_HUFF_ACCEPTED) == 0) {
rv = NGHTTP2_ERR_HEADER_COMP;
goto fail;
} }
*dest_ptr = dest; *dest_ptr = dest;
return j; return j;

View File

@ -31,7 +31,21 @@
#include <nghttp2/nghttp2.h> #include <nghttp2/nghttp2.h>
typedef int16_t huff_decode_table_type[256]; enum {
/* FSA accepts this state as the end of huffman encoding
sequence. */
NGHTTP2_HUFF_ACCEPTED = 1,
/* This state emits symbol */
NGHTTP2_HUFF_SYM = (1 << 1)
} nghttp2_huff_decode_flag;
typedef struct {
int16_t state;
uint8_t flags;
uint8_t sym;
} nghttp2_huff_decode;
typedef nghttp2_huff_decode huff_decode_table_type[16];
typedef struct { typedef struct {
/* The number of bits in this code */ /* The number of bits in this code */

File diff suppressed because it is too large Load Diff

View File

@ -3,9 +3,13 @@ import re
import sys import sys
class Node: class Node:
def __init__(self, depth): def __init__(self, term = None):
self.depth = depth self.term = term
self.children = {} self.left = None
self.right = None
self.trans = []
self.id = None
self.accept = False
def to_bin(s): def to_bin(s):
res = [] res = []
@ -21,27 +25,99 @@ def to_bin(s):
nodes = [] nodes = []
def insert(node, sym, binpat, nbits, pidx): def insert(node, sym, bits):
if pidx == len(binpat) - 1: if len(bits) == 0:
#assert(binpat[pidx] not in node.children) node.term = sym
mx = (8 - (nbits & 0x7)) & 0x7; return
#print "last", bin(binpat[pidx]), mx
for i in range(1 << mx):
node.children[binpat[pidx] + i] = sym
else: else:
if binpat[pidx] not in node.children: if bits[0] == '0':
node.children[binpat[pidx]] = -len(nodes) if node.left is None:
nextnode = Node(pidx + 1) node.left = Node()
nodes.append(nextnode) child = node.left
else: else:
nextnode = nodes[-node.children[binpat[pidx]]] if node.right is None:
insert(nextnode, sym, binpat, nbits, pidx + 1) node.right = Node()
child = node.right
insert(child, sym, bits[1:])
def traverse(node, bits, syms, start_node, root, depth):
if depth == 4:
if 256 in syms:
syms = []
node = None
start_node.trans.append((node, bits, syms))
return
if node.term is not None:
node = root
def go(node, bit):
nbits = list(bits)
nbits.append(bit)
nsyms = list(syms)
if node.term is not None:
nsyms.append(node.term)
traverse(node, nbits, nsyms, start_node, root, depth + 1)
go(node.left, 0)
go(node.right, 1)
idseed = 0
def dfs_setid(node, prefix):
if node.term is not None:
return
if len(prefix) <= 7 and [1] * len(prefix) == prefix:
node.accept = True
global idseed
node.id = idseed
idseed += 1
dfs_setid(node.left, prefix + [0])
dfs_setid(node.right, prefix + [1])
def dfs(node, root):
if node is None:
return
traverse(node, [], [], node, root, 0)
dfs(node.left, root)
dfs(node.right, root)
NGHTTP2_HUFF_ACCEPTED = 1
NGHTTP2_HUFF_SYM = 1 << 1
def dfs_print(node):
if node.term is not None:
return
print '/* {} */'.format(node.id)
print '{'
for nd, bits, syms in node.trans:
outlen = len(syms)
flags = 0
if outlen == 0:
out = 0
else:
assert(outlen == 1)
out = syms[0]
flags |= NGHTTP2_HUFF_SYM
if nd is None:
id = -1
else:
id = nd.id
if id is None:
# if nd.id is None, it is a leaf node
id = 0
flags |= NGHTTP2_HUFF_ACCEPTED
elif nd.accept:
flags |= NGHTTP2_HUFF_ACCEPTED
print ' {{{}, 0x{:02x}, {}}},'.format(id, flags, out)
print '},'
dfs_print(node.left)
dfs_print(node.right)
symbol_tbl = [(None, 0) for i in range(257)] symbol_tbl = [(None, 0) for i in range(257)]
tables = {} tables = {}
root = Node(0) root = Node()
nodes.append(root)
for line in sys.stdin: for line in sys.stdin:
m = re.match(r'.*\(\s*(\d+)\) ([|01]+) \[(\d+)\]\s+(\S+).*', line) m = re.match(r'.*\(\s*(\d+)\) ([|01]+) \[(\d+)\]\s+(\S+).*', line)
@ -50,14 +126,17 @@ for line in sys.stdin:
if len(m.group(4)) > 8: if len(m.group(4)) > 8:
raise Error('Code is more than 4 bytes long') raise Error('Code is more than 4 bytes long')
sym = int(m.group(1)) sym = int(m.group(1))
pat = re.sub(r'\|', '', m.group(2)) bits = re.sub(r'\|', '', m.group(2))
nbits = int(m.group(3)) nbits = int(m.group(3))
assert(len(pat) == nbits) assert(len(bits) == nbits)
binpat = to_bin(pat) binpat = to_bin(bits)
assert(len(binpat) == (nbits+7)/8) assert(len(binpat) == (nbits+7)/8)
symbol_tbl[sym] = (binpat, nbits, m.group(4)) symbol_tbl[sym] = (binpat, nbits, m.group(4))
#print "Inserting", sym #print "Inserting", sym
insert(root, sym, binpat, nbits, 0) insert(root, sym, bits)
dfs_setid(root, [])
dfs(root, root)
print '''\ print '''\
typedef struct { typedef struct {
@ -67,7 +146,7 @@ typedef struct {
''' '''
print '''\ print '''\
nghttp2_huff_sym huff_sym_table[] = {''' const nghttp2_huff_sym huff_sym_table[] = {'''
for i in range(257): for i in range(257):
pat = list(symbol_tbl[i][0]) pat = list(symbol_tbl[i][0])
pat += [0]*(4 - len(pat)) pat += [0]*(4 - len(pat))
@ -77,22 +156,22 @@ for i in range(257):
print '};' print '};'
print '' print ''
print '''int16_t huff_decode_table[][256] = {''' print '''\
for j in range(len(nodes)): enum {{
node = nodes[j] NGHTTP2_HUFF_ACCEPTED = {},
print '/* {} */'.format(j) NGHTTP2_HUFF_SYM = {}
print '{' }} nghttp2_huff_decode_flag;
for i in range(256): '''.format(NGHTTP2_HUFF_ACCEPTED, NGHTTP2_HUFF_SYM)
if i in node.children:
sys.stdout.write('''\ print '''\
{}{}'''.format(node.children[i], ',' if i < 255 else '')) typedef struct {
else: int16_t state;
sys.stdout.write(''' NGHTTP2_HD_HUFF_NO_ENT,''') uint8_t flags;
if (i+1)&0x7 == 0: uint8_t sym;
print '' } nghttp2_huff_decode;
sys.stdout.write('}') '''
if j == len(nodes) - 1:
print '' print '''\
else: const nghttp2_huff_decode huff_decode_table[][16] = {'''
print ',' dfs_print(root)
print '};' print '};'