nghttp2_hd: Rewrite huffman decoding
This commit is contained in:
parent
a8e4da8058
commit
f8a446fbeb
|
@ -371,19 +371,6 @@ ssize_t nghttp2_hd_huff_encode(uint8_t *dest, size_t destlen,
|
|||
const uint8_t *src, size_t srclen,
|
||||
nghttp2_hd_side side);
|
||||
|
||||
/*
|
||||
* Counts the number of required bytes to decode |src| with length
|
||||
* |srclen|. The given input must be padded with the prefix of
|
||||
* terminal code. If |side| is NGHTTP2_HD_SIDE_REQUEST, the request
|
||||
* huffman code table is used. Otherwise, the response code table is
|
||||
* used.
|
||||
*
|
||||
* This function returns the number of required bytes to decode given
|
||||
* data if it succeeds, or -1.
|
||||
*/
|
||||
ssize_t nghttp2_hd_huff_decode_count(const uint8_t *src, size_t srclen,
|
||||
nghttp2_hd_side side);
|
||||
|
||||
/*
|
||||
* Decodes the given data |src| with length |srclen|. This function
|
||||
* allocates memory to store the result and assigns the its pointer to
|
||||
|
|
|
@ -31,64 +31,11 @@
|
|||
#include "nghttp2_hd.h"
|
||||
|
||||
extern const nghttp2_huff_sym req_huff_sym_table[];
|
||||
extern const int16_t req_huff_decode_table[][256];
|
||||
extern const nghttp2_huff_decode req_huff_decode_table[][16];
|
||||
|
||||
extern const nghttp2_huff_sym res_huff_sym_table[];
|
||||
extern const int16_t res_huff_decode_table[][256];
|
||||
extern const nghttp2_huff_decode res_huff_decode_table[][16];
|
||||
|
||||
/*
|
||||
* Returns next 8 bits of data from |in|, starting |bitoff| bits
|
||||
* offset. If there are fewer bits left than |bitoff|, the left bits
|
||||
* with padded with 0 are returned. The |bitoff| must be strictly less
|
||||
* than 8.
|
||||
*/
|
||||
static uint8_t get_prefix_byte(const uint8_t *in, size_t len, size_t bitoff)
|
||||
{
|
||||
uint8_t b;
|
||||
if(bitoff == 0) {
|
||||
return *in;
|
||||
}
|
||||
b = *in << bitoff;
|
||||
if(len > 1) {
|
||||
b |= *(in + 1) >> (8 - bitoff);
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decodes next byte from input |in| with length |len|, starting
|
||||
* |bitoff| bit offset.
|
||||
*
|
||||
* This function returns the decoded symbol number (0-255 and 256 for
|
||||
* special terminal symbol) if it succeeds, or -1.
|
||||
*/
|
||||
static int huff_decode(const uint8_t *in, size_t len, size_t bitoff,
|
||||
const nghttp2_huff_sym *huff_sym_table,
|
||||
const huff_decode_table_type *huff_decode_table)
|
||||
{
|
||||
int rv = 0;
|
||||
size_t len_orig = len;
|
||||
if(len == 0) {
|
||||
return -1;
|
||||
}
|
||||
for(;;) {
|
||||
rv = huff_decode_table[rv][get_prefix_byte(in, len, bitoff)];
|
||||
if(rv >= 0) {
|
||||
break;
|
||||
}
|
||||
/* Negative return value means we need to lookup next table. */
|
||||
rv = -rv;
|
||||
++in;
|
||||
--len;
|
||||
if(len == 0) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if(bitoff + huff_sym_table[rv].nbits > len_orig * 8) {
|
||||
return -1;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
/*
|
||||
* Encodes huffman code |sym| into |*dest_ptr|, whose least |rembits|
|
||||
* bits are not filled yet. The |rembits| must be in range [1, 8],
|
||||
|
@ -167,102 +114,54 @@ ssize_t nghttp2_hd_huff_encode(uint8_t *dest, size_t destlen,
|
|||
return dest - dest_first;
|
||||
}
|
||||
|
||||
static int check_last_byte(const uint8_t *src, size_t srclen, size_t idx,
|
||||
size_t bitoff)
|
||||
{
|
||||
uint8_t last_mask = (1 << (8 - bitoff)) - 1;
|
||||
return idx + 1 == srclen && bitoff > 0 &&
|
||||
(src[idx] & last_mask) == last_mask;
|
||||
}
|
||||
|
||||
ssize_t nghttp2_hd_huff_decode_count(const uint8_t *src, size_t srclen,
|
||||
nghttp2_hd_side side)
|
||||
{
|
||||
size_t bitoff = 0;
|
||||
size_t i, j;
|
||||
const nghttp2_huff_sym *huff_sym_table;
|
||||
const huff_decode_table_type *huff_decode_table;
|
||||
|
||||
if(side == NGHTTP2_HD_SIDE_REQUEST) {
|
||||
huff_sym_table = req_huff_sym_table;
|
||||
huff_decode_table = req_huff_decode_table;
|
||||
} else {
|
||||
huff_sym_table = res_huff_sym_table;
|
||||
huff_decode_table = res_huff_decode_table;
|
||||
}
|
||||
j = 0;
|
||||
for(i = 0; i < srclen;) {
|
||||
int rv = huff_decode(src + i, srclen - i, bitoff,
|
||||
huff_sym_table, huff_decode_table);
|
||||
if(rv == -1) {
|
||||
if(check_last_byte(src, srclen, i, bitoff)) {
|
||||
break;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
if(rv == 256) {
|
||||
/* 256 is special terminal symbol and it should not encoded in
|
||||
byte string. */
|
||||
return -1;
|
||||
}
|
||||
j++;
|
||||
bitoff += huff_sym_table[rv].nbits;
|
||||
i += bitoff / 8;
|
||||
bitoff &= 0x7;
|
||||
}
|
||||
return j;
|
||||
}
|
||||
|
||||
ssize_t nghttp2_hd_huff_decode(uint8_t **dest_ptr,
|
||||
const uint8_t *src, size_t srclen,
|
||||
nghttp2_hd_side side)
|
||||
{
|
||||
size_t bitoff = 0;
|
||||
size_t i, j;
|
||||
const nghttp2_huff_sym *huff_sym_table;
|
||||
size_t i, j, k;
|
||||
const huff_decode_table_type *huff_decode_table;
|
||||
uint8_t *dest = NULL;
|
||||
size_t destlen = 0;
|
||||
int rv;
|
||||
int16_t state = 0;
|
||||
const nghttp2_huff_decode *t = NULL;
|
||||
|
||||
/* We use the decoding algorithm described in
|
||||
http://graphics.ics.uci.edu/pub/Prefix.pdf */
|
||||
if(side == NGHTTP2_HD_SIDE_REQUEST) {
|
||||
huff_sym_table = req_huff_sym_table;
|
||||
huff_decode_table = req_huff_decode_table;
|
||||
} else {
|
||||
huff_sym_table = res_huff_sym_table;
|
||||
huff_decode_table = res_huff_decode_table;
|
||||
}
|
||||
j = 0;
|
||||
for(i = 0; i < srclen;) {
|
||||
rv = huff_decode(src + i, srclen - i, bitoff,
|
||||
huff_sym_table, huff_decode_table);
|
||||
if(rv == -1) {
|
||||
if(check_last_byte(src, srclen, i, bitoff)) {
|
||||
break;
|
||||
}
|
||||
rv = NGHTTP2_ERR_HEADER_COMP;
|
||||
goto fail;
|
||||
}
|
||||
if(rv == 256) {
|
||||
/* 256 is special terminal symbol and it should not encoded in
|
||||
byte string. */
|
||||
rv = NGHTTP2_ERR_HEADER_COMP;
|
||||
goto fail;
|
||||
}
|
||||
if(j == destlen) {
|
||||
size_t new_len = j == 0 ? 32 : j * 2;
|
||||
uint8_t *new_dest = realloc(dest, new_len);
|
||||
if(new_dest == NULL) {
|
||||
rv = NGHTTP2_ERR_NOMEM;
|
||||
for(i = 0; i < srclen; ++i) {
|
||||
uint8_t in = src[i] >> 4;
|
||||
for(k = 0; k < 2; ++k) {
|
||||
t = &huff_decode_table[state][in];
|
||||
if(t->state == -1) {
|
||||
rv = NGHTTP2_ERR_HEADER_COMP;
|
||||
goto fail;
|
||||
}
|
||||
dest = new_dest;
|
||||
destlen = new_len;
|
||||
if(t->flags & NGHTTP2_HUFF_SYM) {
|
||||
if(destlen == j) {
|
||||
size_t new_len = j == 0 ? 32 : j * 2;
|
||||
uint8_t *new_dest = realloc(dest, new_len);
|
||||
if(new_dest == NULL) {
|
||||
rv = NGHTTP2_ERR_NOMEM;
|
||||
goto fail;
|
||||
}
|
||||
dest = new_dest;
|
||||
destlen = new_len;
|
||||
}
|
||||
dest[j++] = t->sym;
|
||||
}
|
||||
state = t->state;
|
||||
in = src[i] & 0xf;
|
||||
}
|
||||
dest[j++] = rv;
|
||||
bitoff += huff_sym_table[rv].nbits;
|
||||
i += bitoff / 8;
|
||||
bitoff &= 0x7;
|
||||
}
|
||||
if(srclen && (t->flags & NGHTTP2_HUFF_ACCEPTED) == 0) {
|
||||
rv = NGHTTP2_ERR_HEADER_COMP;
|
||||
goto fail;
|
||||
}
|
||||
*dest_ptr = dest;
|
||||
return j;
|
||||
|
|
|
@ -31,7 +31,21 @@
|
|||
|
||||
#include <nghttp2/nghttp2.h>
|
||||
|
||||
typedef int16_t huff_decode_table_type[256];
|
||||
enum {
|
||||
/* FSA accepts this state as the end of huffman encoding
|
||||
sequence. */
|
||||
NGHTTP2_HUFF_ACCEPTED = 1,
|
||||
/* This state emits symbol */
|
||||
NGHTTP2_HUFF_SYM = (1 << 1)
|
||||
} nghttp2_huff_decode_flag;
|
||||
|
||||
typedef struct {
|
||||
int16_t state;
|
||||
uint8_t flags;
|
||||
uint8_t sym;
|
||||
} nghttp2_huff_decode;
|
||||
|
||||
typedef nghttp2_huff_decode huff_decode_table_type[16];
|
||||
|
||||
typedef struct {
|
||||
/* The number of bits in this code */
|
||||
|
|
File diff suppressed because it is too large
Load Diff
161
mkhufftbl.py
161
mkhufftbl.py
|
@ -3,9 +3,13 @@ import re
|
|||
import sys
|
||||
|
||||
class Node:
|
||||
def __init__(self, depth):
|
||||
self.depth = depth
|
||||
self.children = {}
|
||||
def __init__(self, term = None):
|
||||
self.term = term
|
||||
self.left = None
|
||||
self.right = None
|
||||
self.trans = []
|
||||
self.id = None
|
||||
self.accept = False
|
||||
|
||||
def to_bin(s):
|
||||
res = []
|
||||
|
@ -21,27 +25,99 @@ def to_bin(s):
|
|||
|
||||
nodes = []
|
||||
|
||||
def insert(node, sym, binpat, nbits, pidx):
|
||||
if pidx == len(binpat) - 1:
|
||||
#assert(binpat[pidx] not in node.children)
|
||||
mx = (8 - (nbits & 0x7)) & 0x7;
|
||||
#print "last", bin(binpat[pidx]), mx
|
||||
for i in range(1 << mx):
|
||||
node.children[binpat[pidx] + i] = sym
|
||||
def insert(node, sym, bits):
|
||||
if len(bits) == 0:
|
||||
node.term = sym
|
||||
return
|
||||
else:
|
||||
if binpat[pidx] not in node.children:
|
||||
node.children[binpat[pidx]] = -len(nodes)
|
||||
nextnode = Node(pidx + 1)
|
||||
nodes.append(nextnode)
|
||||
if bits[0] == '0':
|
||||
if node.left is None:
|
||||
node.left = Node()
|
||||
child = node.left
|
||||
else:
|
||||
nextnode = nodes[-node.children[binpat[pidx]]]
|
||||
insert(nextnode, sym, binpat, nbits, pidx + 1)
|
||||
if node.right is None:
|
||||
node.right = Node()
|
||||
child = node.right
|
||||
insert(child, sym, bits[1:])
|
||||
|
||||
def traverse(node, bits, syms, start_node, root, depth):
|
||||
if depth == 4:
|
||||
if 256 in syms:
|
||||
syms = []
|
||||
node = None
|
||||
start_node.trans.append((node, bits, syms))
|
||||
return
|
||||
|
||||
if node.term is not None:
|
||||
node = root
|
||||
|
||||
def go(node, bit):
|
||||
nbits = list(bits)
|
||||
nbits.append(bit)
|
||||
nsyms = list(syms)
|
||||
if node.term is not None:
|
||||
nsyms.append(node.term)
|
||||
traverse(node, nbits, nsyms, start_node, root, depth + 1)
|
||||
|
||||
go(node.left, 0)
|
||||
go(node.right, 1)
|
||||
|
||||
idseed = 0
|
||||
|
||||
def dfs_setid(node, prefix):
|
||||
if node.term is not None:
|
||||
return
|
||||
if len(prefix) <= 7 and [1] * len(prefix) == prefix:
|
||||
node.accept = True
|
||||
global idseed
|
||||
node.id = idseed
|
||||
idseed += 1
|
||||
dfs_setid(node.left, prefix + [0])
|
||||
dfs_setid(node.right, prefix + [1])
|
||||
|
||||
def dfs(node, root):
|
||||
if node is None:
|
||||
return
|
||||
traverse(node, [], [], node, root, 0)
|
||||
dfs(node.left, root)
|
||||
dfs(node.right, root)
|
||||
|
||||
NGHTTP2_HUFF_ACCEPTED = 1
|
||||
NGHTTP2_HUFF_SYM = 1 << 1
|
||||
|
||||
def dfs_print(node):
|
||||
if node.term is not None:
|
||||
return
|
||||
print '/* {} */'.format(node.id)
|
||||
print '{'
|
||||
for nd, bits, syms in node.trans:
|
||||
outlen = len(syms)
|
||||
flags = 0
|
||||
if outlen == 0:
|
||||
out = 0
|
||||
else:
|
||||
assert(outlen == 1)
|
||||
out = syms[0]
|
||||
flags |= NGHTTP2_HUFF_SYM
|
||||
if nd is None:
|
||||
id = -1
|
||||
else:
|
||||
id = nd.id
|
||||
if id is None:
|
||||
# if nd.id is None, it is a leaf node
|
||||
id = 0
|
||||
flags |= NGHTTP2_HUFF_ACCEPTED
|
||||
elif nd.accept:
|
||||
flags |= NGHTTP2_HUFF_ACCEPTED
|
||||
print ' {{{}, 0x{:02x}, {}}},'.format(id, flags, out)
|
||||
print '},'
|
||||
dfs_print(node.left)
|
||||
dfs_print(node.right)
|
||||
|
||||
symbol_tbl = [(None, 0) for i in range(257)]
|
||||
tables = {}
|
||||
|
||||
root = Node(0)
|
||||
nodes.append(root)
|
||||
root = Node()
|
||||
|
||||
for line in sys.stdin:
|
||||
m = re.match(r'.*\(\s*(\d+)\) ([|01]+) \[(\d+)\]\s+(\S+).*', line)
|
||||
|
@ -50,14 +126,17 @@ for line in sys.stdin:
|
|||
if len(m.group(4)) > 8:
|
||||
raise Error('Code is more than 4 bytes long')
|
||||
sym = int(m.group(1))
|
||||
pat = re.sub(r'\|', '', m.group(2))
|
||||
bits = re.sub(r'\|', '', m.group(2))
|
||||
nbits = int(m.group(3))
|
||||
assert(len(pat) == nbits)
|
||||
binpat = to_bin(pat)
|
||||
assert(len(bits) == nbits)
|
||||
binpat = to_bin(bits)
|
||||
assert(len(binpat) == (nbits+7)/8)
|
||||
symbol_tbl[sym] = (binpat, nbits, m.group(4))
|
||||
#print "Inserting", sym
|
||||
insert(root, sym, binpat, nbits, 0)
|
||||
insert(root, sym, bits)
|
||||
|
||||
dfs_setid(root, [])
|
||||
dfs(root, root)
|
||||
|
||||
print '''\
|
||||
typedef struct {
|
||||
|
@ -67,7 +146,7 @@ typedef struct {
|
|||
'''
|
||||
|
||||
print '''\
|
||||
nghttp2_huff_sym huff_sym_table[] = {'''
|
||||
const nghttp2_huff_sym huff_sym_table[] = {'''
|
||||
for i in range(257):
|
||||
pat = list(symbol_tbl[i][0])
|
||||
pat += [0]*(4 - len(pat))
|
||||
|
@ -77,22 +156,22 @@ for i in range(257):
|
|||
print '};'
|
||||
print ''
|
||||
|
||||
print '''int16_t huff_decode_table[][256] = {'''
|
||||
for j in range(len(nodes)):
|
||||
node = nodes[j]
|
||||
print '/* {} */'.format(j)
|
||||
print '{'
|
||||
for i in range(256):
|
||||
if i in node.children:
|
||||
sys.stdout.write('''\
|
||||
{}{}'''.format(node.children[i], ',' if i < 255 else ''))
|
||||
else:
|
||||
sys.stdout.write(''' NGHTTP2_HD_HUFF_NO_ENT,''')
|
||||
if (i+1)&0x7 == 0:
|
||||
print ''
|
||||
sys.stdout.write('}')
|
||||
if j == len(nodes) - 1:
|
||||
print ''
|
||||
else:
|
||||
print ','
|
||||
print '''\
|
||||
enum {{
|
||||
NGHTTP2_HUFF_ACCEPTED = {},
|
||||
NGHTTP2_HUFF_SYM = {}
|
||||
}} nghttp2_huff_decode_flag;
|
||||
'''.format(NGHTTP2_HUFF_ACCEPTED, NGHTTP2_HUFF_SYM)
|
||||
|
||||
print '''\
|
||||
typedef struct {
|
||||
int16_t state;
|
||||
uint8_t flags;
|
||||
uint8_t sym;
|
||||
} nghttp2_huff_decode;
|
||||
'''
|
||||
|
||||
print '''\
|
||||
const nghttp2_huff_decode huff_decode_table[][16] = {'''
|
||||
dfs_print(root)
|
||||
print '};'
|
||||
|
|
Loading…
Reference in New Issue