nghttp2_hd: Rewrite huffman decoding
This commit is contained in:
parent
a8e4da8058
commit
f8a446fbeb
|
@ -371,19 +371,6 @@ ssize_t nghttp2_hd_huff_encode(uint8_t *dest, size_t destlen,
|
||||||
const uint8_t *src, size_t srclen,
|
const uint8_t *src, size_t srclen,
|
||||||
nghttp2_hd_side side);
|
nghttp2_hd_side side);
|
||||||
|
|
||||||
/*
|
|
||||||
* Counts the number of required bytes to decode |src| with length
|
|
||||||
* |srclen|. The given input must be padded with the prefix of
|
|
||||||
* terminal code. If |side| is NGHTTP2_HD_SIDE_REQUEST, the request
|
|
||||||
* huffman code table is used. Otherwise, the response code table is
|
|
||||||
* used.
|
|
||||||
*
|
|
||||||
* This function returns the number of required bytes to decode given
|
|
||||||
* data if it succeeds, or -1.
|
|
||||||
*/
|
|
||||||
ssize_t nghttp2_hd_huff_decode_count(const uint8_t *src, size_t srclen,
|
|
||||||
nghttp2_hd_side side);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Decodes the given data |src| with length |srclen|. This function
|
* Decodes the given data |src| with length |srclen|. This function
|
||||||
* allocates memory to store the result and assigns the its pointer to
|
* allocates memory to store the result and assigns the its pointer to
|
||||||
|
|
|
@ -31,64 +31,11 @@
|
||||||
#include "nghttp2_hd.h"
|
#include "nghttp2_hd.h"
|
||||||
|
|
||||||
extern const nghttp2_huff_sym req_huff_sym_table[];
|
extern const nghttp2_huff_sym req_huff_sym_table[];
|
||||||
extern const int16_t req_huff_decode_table[][256];
|
extern const nghttp2_huff_decode req_huff_decode_table[][16];
|
||||||
|
|
||||||
extern const nghttp2_huff_sym res_huff_sym_table[];
|
extern const nghttp2_huff_sym res_huff_sym_table[];
|
||||||
extern const int16_t res_huff_decode_table[][256];
|
extern const nghttp2_huff_decode res_huff_decode_table[][16];
|
||||||
|
|
||||||
/*
|
|
||||||
* Returns next 8 bits of data from |in|, starting |bitoff| bits
|
|
||||||
* offset. If there are fewer bits left than |bitoff|, the left bits
|
|
||||||
* with padded with 0 are returned. The |bitoff| must be strictly less
|
|
||||||
* than 8.
|
|
||||||
*/
|
|
||||||
static uint8_t get_prefix_byte(const uint8_t *in, size_t len, size_t bitoff)
|
|
||||||
{
|
|
||||||
uint8_t b;
|
|
||||||
if(bitoff == 0) {
|
|
||||||
return *in;
|
|
||||||
}
|
|
||||||
b = *in << bitoff;
|
|
||||||
if(len > 1) {
|
|
||||||
b |= *(in + 1) >> (8 - bitoff);
|
|
||||||
}
|
|
||||||
return b;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Decodes next byte from input |in| with length |len|, starting
|
|
||||||
* |bitoff| bit offset.
|
|
||||||
*
|
|
||||||
* This function returns the decoded symbol number (0-255 and 256 for
|
|
||||||
* special terminal symbol) if it succeeds, or -1.
|
|
||||||
*/
|
|
||||||
static int huff_decode(const uint8_t *in, size_t len, size_t bitoff,
|
|
||||||
const nghttp2_huff_sym *huff_sym_table,
|
|
||||||
const huff_decode_table_type *huff_decode_table)
|
|
||||||
{
|
|
||||||
int rv = 0;
|
|
||||||
size_t len_orig = len;
|
|
||||||
if(len == 0) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
for(;;) {
|
|
||||||
rv = huff_decode_table[rv][get_prefix_byte(in, len, bitoff)];
|
|
||||||
if(rv >= 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
/* Negative return value means we need to lookup next table. */
|
|
||||||
rv = -rv;
|
|
||||||
++in;
|
|
||||||
--len;
|
|
||||||
if(len == 0) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if(bitoff + huff_sym_table[rv].nbits > len_orig * 8) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
/*
|
/*
|
||||||
* Encodes huffman code |sym| into |*dest_ptr|, whose least |rembits|
|
* Encodes huffman code |sym| into |*dest_ptr|, whose least |rembits|
|
||||||
* bits are not filled yet. The |rembits| must be in range [1, 8],
|
* bits are not filled yet. The |rembits| must be in range [1, 8],
|
||||||
|
@ -167,102 +114,54 @@ ssize_t nghttp2_hd_huff_encode(uint8_t *dest, size_t destlen,
|
||||||
return dest - dest_first;
|
return dest - dest_first;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int check_last_byte(const uint8_t *src, size_t srclen, size_t idx,
|
|
||||||
size_t bitoff)
|
|
||||||
{
|
|
||||||
uint8_t last_mask = (1 << (8 - bitoff)) - 1;
|
|
||||||
return idx + 1 == srclen && bitoff > 0 &&
|
|
||||||
(src[idx] & last_mask) == last_mask;
|
|
||||||
}
|
|
||||||
|
|
||||||
ssize_t nghttp2_hd_huff_decode_count(const uint8_t *src, size_t srclen,
|
|
||||||
nghttp2_hd_side side)
|
|
||||||
{
|
|
||||||
size_t bitoff = 0;
|
|
||||||
size_t i, j;
|
|
||||||
const nghttp2_huff_sym *huff_sym_table;
|
|
||||||
const huff_decode_table_type *huff_decode_table;
|
|
||||||
|
|
||||||
if(side == NGHTTP2_HD_SIDE_REQUEST) {
|
|
||||||
huff_sym_table = req_huff_sym_table;
|
|
||||||
huff_decode_table = req_huff_decode_table;
|
|
||||||
} else {
|
|
||||||
huff_sym_table = res_huff_sym_table;
|
|
||||||
huff_decode_table = res_huff_decode_table;
|
|
||||||
}
|
|
||||||
j = 0;
|
|
||||||
for(i = 0; i < srclen;) {
|
|
||||||
int rv = huff_decode(src + i, srclen - i, bitoff,
|
|
||||||
huff_sym_table, huff_decode_table);
|
|
||||||
if(rv == -1) {
|
|
||||||
if(check_last_byte(src, srclen, i, bitoff)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if(rv == 256) {
|
|
||||||
/* 256 is special terminal symbol and it should not encoded in
|
|
||||||
byte string. */
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
j++;
|
|
||||||
bitoff += huff_sym_table[rv].nbits;
|
|
||||||
i += bitoff / 8;
|
|
||||||
bitoff &= 0x7;
|
|
||||||
}
|
|
||||||
return j;
|
|
||||||
}
|
|
||||||
|
|
||||||
ssize_t nghttp2_hd_huff_decode(uint8_t **dest_ptr,
|
ssize_t nghttp2_hd_huff_decode(uint8_t **dest_ptr,
|
||||||
const uint8_t *src, size_t srclen,
|
const uint8_t *src, size_t srclen,
|
||||||
nghttp2_hd_side side)
|
nghttp2_hd_side side)
|
||||||
{
|
{
|
||||||
size_t bitoff = 0;
|
size_t i, j, k;
|
||||||
size_t i, j;
|
|
||||||
const nghttp2_huff_sym *huff_sym_table;
|
|
||||||
const huff_decode_table_type *huff_decode_table;
|
const huff_decode_table_type *huff_decode_table;
|
||||||
uint8_t *dest = NULL;
|
uint8_t *dest = NULL;
|
||||||
size_t destlen = 0;
|
size_t destlen = 0;
|
||||||
int rv;
|
int rv;
|
||||||
|
int16_t state = 0;
|
||||||
|
const nghttp2_huff_decode *t = NULL;
|
||||||
|
|
||||||
|
/* We use the decoding algorithm described in
|
||||||
|
http://graphics.ics.uci.edu/pub/Prefix.pdf */
|
||||||
if(side == NGHTTP2_HD_SIDE_REQUEST) {
|
if(side == NGHTTP2_HD_SIDE_REQUEST) {
|
||||||
huff_sym_table = req_huff_sym_table;
|
|
||||||
huff_decode_table = req_huff_decode_table;
|
huff_decode_table = req_huff_decode_table;
|
||||||
} else {
|
} else {
|
||||||
huff_sym_table = res_huff_sym_table;
|
|
||||||
huff_decode_table = res_huff_decode_table;
|
huff_decode_table = res_huff_decode_table;
|
||||||
}
|
}
|
||||||
j = 0;
|
j = 0;
|
||||||
for(i = 0; i < srclen;) {
|
for(i = 0; i < srclen; ++i) {
|
||||||
rv = huff_decode(src + i, srclen - i, bitoff,
|
uint8_t in = src[i] >> 4;
|
||||||
huff_sym_table, huff_decode_table);
|
for(k = 0; k < 2; ++k) {
|
||||||
if(rv == -1) {
|
t = &huff_decode_table[state][in];
|
||||||
if(check_last_byte(src, srclen, i, bitoff)) {
|
if(t->state == -1) {
|
||||||
break;
|
rv = NGHTTP2_ERR_HEADER_COMP;
|
||||||
}
|
|
||||||
rv = NGHTTP2_ERR_HEADER_COMP;
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
if(rv == 256) {
|
|
||||||
/* 256 is special terminal symbol and it should not encoded in
|
|
||||||
byte string. */
|
|
||||||
rv = NGHTTP2_ERR_HEADER_COMP;
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
if(j == destlen) {
|
|
||||||
size_t new_len = j == 0 ? 32 : j * 2;
|
|
||||||
uint8_t *new_dest = realloc(dest, new_len);
|
|
||||||
if(new_dest == NULL) {
|
|
||||||
rv = NGHTTP2_ERR_NOMEM;
|
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
dest = new_dest;
|
if(t->flags & NGHTTP2_HUFF_SYM) {
|
||||||
destlen = new_len;
|
if(destlen == j) {
|
||||||
|
size_t new_len = j == 0 ? 32 : j * 2;
|
||||||
|
uint8_t *new_dest = realloc(dest, new_len);
|
||||||
|
if(new_dest == NULL) {
|
||||||
|
rv = NGHTTP2_ERR_NOMEM;
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
dest = new_dest;
|
||||||
|
destlen = new_len;
|
||||||
|
}
|
||||||
|
dest[j++] = t->sym;
|
||||||
|
}
|
||||||
|
state = t->state;
|
||||||
|
in = src[i] & 0xf;
|
||||||
}
|
}
|
||||||
dest[j++] = rv;
|
}
|
||||||
bitoff += huff_sym_table[rv].nbits;
|
if(srclen && (t->flags & NGHTTP2_HUFF_ACCEPTED) == 0) {
|
||||||
i += bitoff / 8;
|
rv = NGHTTP2_ERR_HEADER_COMP;
|
||||||
bitoff &= 0x7;
|
goto fail;
|
||||||
}
|
}
|
||||||
*dest_ptr = dest;
|
*dest_ptr = dest;
|
||||||
return j;
|
return j;
|
||||||
|
|
|
@ -31,7 +31,21 @@
|
||||||
|
|
||||||
#include <nghttp2/nghttp2.h>
|
#include <nghttp2/nghttp2.h>
|
||||||
|
|
||||||
typedef int16_t huff_decode_table_type[256];
|
enum {
|
||||||
|
/* FSA accepts this state as the end of huffman encoding
|
||||||
|
sequence. */
|
||||||
|
NGHTTP2_HUFF_ACCEPTED = 1,
|
||||||
|
/* This state emits symbol */
|
||||||
|
NGHTTP2_HUFF_SYM = (1 << 1)
|
||||||
|
} nghttp2_huff_decode_flag;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int16_t state;
|
||||||
|
uint8_t flags;
|
||||||
|
uint8_t sym;
|
||||||
|
} nghttp2_huff_decode;
|
||||||
|
|
||||||
|
typedef nghttp2_huff_decode huff_decode_table_type[16];
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
/* The number of bits in this code */
|
/* The number of bits in this code */
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
161
mkhufftbl.py
161
mkhufftbl.py
|
@ -3,9 +3,13 @@ import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
class Node:
|
class Node:
|
||||||
def __init__(self, depth):
|
def __init__(self, term = None):
|
||||||
self.depth = depth
|
self.term = term
|
||||||
self.children = {}
|
self.left = None
|
||||||
|
self.right = None
|
||||||
|
self.trans = []
|
||||||
|
self.id = None
|
||||||
|
self.accept = False
|
||||||
|
|
||||||
def to_bin(s):
|
def to_bin(s):
|
||||||
res = []
|
res = []
|
||||||
|
@ -21,27 +25,99 @@ def to_bin(s):
|
||||||
|
|
||||||
nodes = []
|
nodes = []
|
||||||
|
|
||||||
def insert(node, sym, binpat, nbits, pidx):
|
def insert(node, sym, bits):
|
||||||
if pidx == len(binpat) - 1:
|
if len(bits) == 0:
|
||||||
#assert(binpat[pidx] not in node.children)
|
node.term = sym
|
||||||
mx = (8 - (nbits & 0x7)) & 0x7;
|
return
|
||||||
#print "last", bin(binpat[pidx]), mx
|
|
||||||
for i in range(1 << mx):
|
|
||||||
node.children[binpat[pidx] + i] = sym
|
|
||||||
else:
|
else:
|
||||||
if binpat[pidx] not in node.children:
|
if bits[0] == '0':
|
||||||
node.children[binpat[pidx]] = -len(nodes)
|
if node.left is None:
|
||||||
nextnode = Node(pidx + 1)
|
node.left = Node()
|
||||||
nodes.append(nextnode)
|
child = node.left
|
||||||
else:
|
else:
|
||||||
nextnode = nodes[-node.children[binpat[pidx]]]
|
if node.right is None:
|
||||||
insert(nextnode, sym, binpat, nbits, pidx + 1)
|
node.right = Node()
|
||||||
|
child = node.right
|
||||||
|
insert(child, sym, bits[1:])
|
||||||
|
|
||||||
|
def traverse(node, bits, syms, start_node, root, depth):
|
||||||
|
if depth == 4:
|
||||||
|
if 256 in syms:
|
||||||
|
syms = []
|
||||||
|
node = None
|
||||||
|
start_node.trans.append((node, bits, syms))
|
||||||
|
return
|
||||||
|
|
||||||
|
if node.term is not None:
|
||||||
|
node = root
|
||||||
|
|
||||||
|
def go(node, bit):
|
||||||
|
nbits = list(bits)
|
||||||
|
nbits.append(bit)
|
||||||
|
nsyms = list(syms)
|
||||||
|
if node.term is not None:
|
||||||
|
nsyms.append(node.term)
|
||||||
|
traverse(node, nbits, nsyms, start_node, root, depth + 1)
|
||||||
|
|
||||||
|
go(node.left, 0)
|
||||||
|
go(node.right, 1)
|
||||||
|
|
||||||
|
idseed = 0
|
||||||
|
|
||||||
|
def dfs_setid(node, prefix):
|
||||||
|
if node.term is not None:
|
||||||
|
return
|
||||||
|
if len(prefix) <= 7 and [1] * len(prefix) == prefix:
|
||||||
|
node.accept = True
|
||||||
|
global idseed
|
||||||
|
node.id = idseed
|
||||||
|
idseed += 1
|
||||||
|
dfs_setid(node.left, prefix + [0])
|
||||||
|
dfs_setid(node.right, prefix + [1])
|
||||||
|
|
||||||
|
def dfs(node, root):
|
||||||
|
if node is None:
|
||||||
|
return
|
||||||
|
traverse(node, [], [], node, root, 0)
|
||||||
|
dfs(node.left, root)
|
||||||
|
dfs(node.right, root)
|
||||||
|
|
||||||
|
NGHTTP2_HUFF_ACCEPTED = 1
|
||||||
|
NGHTTP2_HUFF_SYM = 1 << 1
|
||||||
|
|
||||||
|
def dfs_print(node):
|
||||||
|
if node.term is not None:
|
||||||
|
return
|
||||||
|
print '/* {} */'.format(node.id)
|
||||||
|
print '{'
|
||||||
|
for nd, bits, syms in node.trans:
|
||||||
|
outlen = len(syms)
|
||||||
|
flags = 0
|
||||||
|
if outlen == 0:
|
||||||
|
out = 0
|
||||||
|
else:
|
||||||
|
assert(outlen == 1)
|
||||||
|
out = syms[0]
|
||||||
|
flags |= NGHTTP2_HUFF_SYM
|
||||||
|
if nd is None:
|
||||||
|
id = -1
|
||||||
|
else:
|
||||||
|
id = nd.id
|
||||||
|
if id is None:
|
||||||
|
# if nd.id is None, it is a leaf node
|
||||||
|
id = 0
|
||||||
|
flags |= NGHTTP2_HUFF_ACCEPTED
|
||||||
|
elif nd.accept:
|
||||||
|
flags |= NGHTTP2_HUFF_ACCEPTED
|
||||||
|
print ' {{{}, 0x{:02x}, {}}},'.format(id, flags, out)
|
||||||
|
print '},'
|
||||||
|
dfs_print(node.left)
|
||||||
|
dfs_print(node.right)
|
||||||
|
|
||||||
symbol_tbl = [(None, 0) for i in range(257)]
|
symbol_tbl = [(None, 0) for i in range(257)]
|
||||||
tables = {}
|
tables = {}
|
||||||
|
|
||||||
root = Node(0)
|
root = Node()
|
||||||
nodes.append(root)
|
|
||||||
|
|
||||||
for line in sys.stdin:
|
for line in sys.stdin:
|
||||||
m = re.match(r'.*\(\s*(\d+)\) ([|01]+) \[(\d+)\]\s+(\S+).*', line)
|
m = re.match(r'.*\(\s*(\d+)\) ([|01]+) \[(\d+)\]\s+(\S+).*', line)
|
||||||
|
@ -50,14 +126,17 @@ for line in sys.stdin:
|
||||||
if len(m.group(4)) > 8:
|
if len(m.group(4)) > 8:
|
||||||
raise Error('Code is more than 4 bytes long')
|
raise Error('Code is more than 4 bytes long')
|
||||||
sym = int(m.group(1))
|
sym = int(m.group(1))
|
||||||
pat = re.sub(r'\|', '', m.group(2))
|
bits = re.sub(r'\|', '', m.group(2))
|
||||||
nbits = int(m.group(3))
|
nbits = int(m.group(3))
|
||||||
assert(len(pat) == nbits)
|
assert(len(bits) == nbits)
|
||||||
binpat = to_bin(pat)
|
binpat = to_bin(bits)
|
||||||
assert(len(binpat) == (nbits+7)/8)
|
assert(len(binpat) == (nbits+7)/8)
|
||||||
symbol_tbl[sym] = (binpat, nbits, m.group(4))
|
symbol_tbl[sym] = (binpat, nbits, m.group(4))
|
||||||
#print "Inserting", sym
|
#print "Inserting", sym
|
||||||
insert(root, sym, binpat, nbits, 0)
|
insert(root, sym, bits)
|
||||||
|
|
||||||
|
dfs_setid(root, [])
|
||||||
|
dfs(root, root)
|
||||||
|
|
||||||
print '''\
|
print '''\
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -67,7 +146,7 @@ typedef struct {
|
||||||
'''
|
'''
|
||||||
|
|
||||||
print '''\
|
print '''\
|
||||||
nghttp2_huff_sym huff_sym_table[] = {'''
|
const nghttp2_huff_sym huff_sym_table[] = {'''
|
||||||
for i in range(257):
|
for i in range(257):
|
||||||
pat = list(symbol_tbl[i][0])
|
pat = list(symbol_tbl[i][0])
|
||||||
pat += [0]*(4 - len(pat))
|
pat += [0]*(4 - len(pat))
|
||||||
|
@ -77,22 +156,22 @@ for i in range(257):
|
||||||
print '};'
|
print '};'
|
||||||
print ''
|
print ''
|
||||||
|
|
||||||
print '''int16_t huff_decode_table[][256] = {'''
|
print '''\
|
||||||
for j in range(len(nodes)):
|
enum {{
|
||||||
node = nodes[j]
|
NGHTTP2_HUFF_ACCEPTED = {},
|
||||||
print '/* {} */'.format(j)
|
NGHTTP2_HUFF_SYM = {}
|
||||||
print '{'
|
}} nghttp2_huff_decode_flag;
|
||||||
for i in range(256):
|
'''.format(NGHTTP2_HUFF_ACCEPTED, NGHTTP2_HUFF_SYM)
|
||||||
if i in node.children:
|
|
||||||
sys.stdout.write('''\
|
print '''\
|
||||||
{}{}'''.format(node.children[i], ',' if i < 255 else ''))
|
typedef struct {
|
||||||
else:
|
int16_t state;
|
||||||
sys.stdout.write(''' NGHTTP2_HD_HUFF_NO_ENT,''')
|
uint8_t flags;
|
||||||
if (i+1)&0x7 == 0:
|
uint8_t sym;
|
||||||
print ''
|
} nghttp2_huff_decode;
|
||||||
sys.stdout.write('}')
|
'''
|
||||||
if j == len(nodes) - 1:
|
|
||||||
print ''
|
print '''\
|
||||||
else:
|
const nghttp2_huff_decode huff_decode_table[][16] = {'''
|
||||||
print ','
|
dfs_print(root)
|
||||||
print '};'
|
print '};'
|
||||||
|
|
Loading…
Reference in New Issue