Faster huffman encoding

This commit is contained in:
Tatsuhiro Tsujikawa 2019-10-08 23:13:31 +09:00
parent 6f967c6ef3
commit 0d855bfc1b
3 changed files with 117 additions and 201 deletions

View File

@ -29,114 +29,7 @@
#include <stdio.h> #include <stdio.h>
#include "nghttp2_hd.h" #include "nghttp2_hd.h"
#include "nghttp2_net.h"
/*
* Encodes huffman code |sym| into |*dest_ptr|, whose least |rembits|
* bits are not filled yet. The |rembits| must be in range [1, 8],
* inclusive. At the end of the process, the |*dest_ptr| is updated
* and points where next output should be placed. The number of
* unfilled bits in the pointed location is returned.
*/
static ssize_t huff_encode_sym(nghttp2_bufs *bufs, size_t *avail_ptr,
size_t rembits, const nghttp2_huff_sym *sym) {
int rv;
size_t nbits = sym->nbits;
uint32_t code = sym->code;
/* We assume that sym->nbits <= 32 */
if (rembits > nbits) {
nghttp2_bufs_fast_orb_hold(bufs, (uint8_t)(code << (rembits - nbits)));
return (ssize_t)(rembits - nbits);
}
if (rembits == nbits) {
nghttp2_bufs_fast_orb(bufs, (uint8_t)code);
--*avail_ptr;
return 8;
}
nghttp2_bufs_fast_orb(bufs, (uint8_t)(code >> (nbits - rembits)));
--*avail_ptr;
nbits -= rembits;
if (nbits & 0x7) {
/* align code to MSB byte boundary */
code <<= 8 - (nbits & 0x7);
}
if (*avail_ptr < (nbits + 7) / 8) {
/* slow path */
if (nbits > 24) {
rv = nghttp2_bufs_addb(bufs, (uint8_t)(code >> 24));
if (rv != 0) {
return rv;
}
nbits -= 8;
}
if (nbits > 16) {
rv = nghttp2_bufs_addb(bufs, (uint8_t)(code >> 16));
if (rv != 0) {
return rv;
}
nbits -= 8;
}
if (nbits > 8) {
rv = nghttp2_bufs_addb(bufs, (uint8_t)(code >> 8));
if (rv != 0) {
return rv;
}
nbits -= 8;
}
if (nbits == 8) {
rv = nghttp2_bufs_addb(bufs, (uint8_t)code);
if (rv != 0) {
return rv;
}
*avail_ptr = nghttp2_bufs_cur_avail(bufs);
return 8;
}
rv = nghttp2_bufs_addb_hold(bufs, (uint8_t)code);
if (rv != 0) {
return rv;
}
*avail_ptr = nghttp2_bufs_cur_avail(bufs);
return (ssize_t)(8 - nbits);
}
/* fast path, since most code is less than 8 */
if (nbits < 8) {
nghttp2_bufs_fast_addb_hold(bufs, (uint8_t)code);
*avail_ptr = nghttp2_bufs_cur_avail(bufs);
return (ssize_t)(8 - nbits);
}
/* handle longer code path */
if (nbits > 24) {
nghttp2_bufs_fast_addb(bufs, (uint8_t)(code >> 24));
nbits -= 8;
}
if (nbits > 16) {
nghttp2_bufs_fast_addb(bufs, (uint8_t)(code >> 16));
nbits -= 8;
}
if (nbits > 8) {
nghttp2_bufs_fast_addb(bufs, (uint8_t)(code >> 8));
nbits -= 8;
}
if (nbits == 8) {
nghttp2_bufs_fast_addb(bufs, (uint8_t)code);
*avail_ptr = nghttp2_bufs_cur_avail(bufs);
return 8;
}
nghttp2_bufs_fast_addb_hold(bufs, (uint8_t)code);
*avail_ptr = nghttp2_bufs_cur_avail(bufs);
return (ssize_t)(8 - nbits);
}
size_t nghttp2_hd_huff_encode_count(const uint8_t *src, size_t len) { size_t nghttp2_hd_huff_encode_count(const uint8_t *src, size_t len) {
size_t i; size_t i;
@ -151,40 +44,60 @@ size_t nghttp2_hd_huff_encode_count(const uint8_t *src, size_t len) {
int nghttp2_hd_huff_encode(nghttp2_bufs *bufs, const uint8_t *src, int nghttp2_hd_huff_encode(nghttp2_bufs *bufs, const uint8_t *src,
size_t srclen) { size_t srclen) {
int rv; const nghttp2_huff_sym *sym;
ssize_t rembits = 8; const uint8_t *end = src + srclen;
size_t i; uint64_t code = 0;
uint32_t x;
size_t nbits = 0;
size_t avail; size_t avail;
int rv;
avail = nghttp2_bufs_cur_avail(bufs); avail = nghttp2_bufs_cur_avail(bufs);
for (i = 0; i < srclen; ++i) { for (; src != end;) {
const nghttp2_huff_sym *sym = &huff_sym_table[src[i]]; sym = &huff_sym_table[*src++];
if (rembits == 8) { code |= (uint64_t)sym->code << (32 - nbits);
if (avail) { nbits += sym->nbits;
nghttp2_bufs_fast_addb_hold(bufs, 0); if (nbits < 32) {
} else { continue;
rv = nghttp2_bufs_addb_hold(bufs, 0); }
if (avail >= 4) {
x = htonl((uint32_t)(code >> 32));
memcpy(bufs->cur->buf.last, &x, 4);
bufs->cur->buf.last += 4;
avail -= 4;
code <<= 32;
nbits -= 32;
continue;
}
for (; nbits >= 8;) {
rv = nghttp2_bufs_addb(bufs, (uint8_t)(code >> 56));
if (rv != 0) { if (rv != 0) {
return rv; return rv;
} }
code <<= 8;
nbits -= 8;
}
avail = nghttp2_bufs_cur_avail(bufs); avail = nghttp2_bufs_cur_avail(bufs);
} }
for (; nbits >= 8;) {
rv = nghttp2_bufs_addb(bufs, (uint8_t)(code >> 56));
if (rv != 0) {
return rv;
} }
rembits = huff_encode_sym(bufs, &avail, (size_t)rembits, sym); code <<= 8;
if (rembits < 0) { nbits -= 8;
return (int)rembits;
} }
if (nbits) {
rv = nghttp2_bufs_addb(
bufs, (uint8_t)((uint8_t)(code >> 56) | ((1 << (8 - nbits)) - 1)));
if (rv != 0) {
return rv;
} }
/* 256 is special terminal symbol, pad with its prefix */
if (rembits < 8) {
/* if rembits < 8, we should have at least 1 buffer space
available */
const nghttp2_huff_sym *sym = &huff_sym_table[256];
assert(avail);
/* Caution we no longer adjust avail here */
nghttp2_bufs_fast_orb(
bufs, (uint8_t)(sym->code >> (sym->nbits - (size_t)rembits)));
} }
return 0; return 0;

View File

@ -27,71 +27,71 @@
/* Generated by mkhufftbl.py */ /* Generated by mkhufftbl.py */
const nghttp2_huff_sym huff_sym_table[] = { const nghttp2_huff_sym huff_sym_table[] = {
{13, 0x1ff8u}, {23, 0x7fffd8u}, {28, 0xfffffe2u}, {28, 0xfffffe3u}, {13, 0xffc00000u}, {23, 0xffffb000u}, {28, 0xfffffe20u}, {28, 0xfffffe30u},
{28, 0xfffffe4u}, {28, 0xfffffe5u}, {28, 0xfffffe6u}, {28, 0xfffffe7u}, {28, 0xfffffe40u}, {28, 0xfffffe50u}, {28, 0xfffffe60u}, {28, 0xfffffe70u},
{28, 0xfffffe8u}, {24, 0xffffeau}, {30, 0x3ffffffcu}, {28, 0xfffffe9u}, {28, 0xfffffe80u}, {24, 0xffffea00u}, {30, 0xfffffff0u}, {28, 0xfffffe90u},
{28, 0xfffffeau}, {30, 0x3ffffffdu}, {28, 0xfffffebu}, {28, 0xfffffecu}, {28, 0xfffffea0u}, {30, 0xfffffff4u}, {28, 0xfffffeb0u}, {28, 0xfffffec0u},
{28, 0xfffffedu}, {28, 0xfffffeeu}, {28, 0xfffffefu}, {28, 0xffffff0u}, {28, 0xfffffed0u}, {28, 0xfffffee0u}, {28, 0xfffffef0u}, {28, 0xffffff00u},
{28, 0xffffff1u}, {28, 0xffffff2u}, {30, 0x3ffffffeu}, {28, 0xffffff3u}, {28, 0xffffff10u}, {28, 0xffffff20u}, {30, 0xfffffff8u}, {28, 0xffffff30u},
{28, 0xffffff4u}, {28, 0xffffff5u}, {28, 0xffffff6u}, {28, 0xffffff7u}, {28, 0xffffff40u}, {28, 0xffffff50u}, {28, 0xffffff60u}, {28, 0xffffff70u},
{28, 0xffffff8u}, {28, 0xffffff9u}, {28, 0xffffffau}, {28, 0xffffffbu}, {28, 0xffffff80u}, {28, 0xffffff90u}, {28, 0xffffffa0u}, {28, 0xffffffb0u},
{6, 0x14u}, {10, 0x3f8u}, {10, 0x3f9u}, {12, 0xffau}, {6, 0x50000000u}, {10, 0xfe000000u}, {10, 0xfe400000u}, {12, 0xffa00000u},
{13, 0x1ff9u}, {6, 0x15u}, {8, 0xf8u}, {11, 0x7fau}, {13, 0xffc80000u}, {6, 0x54000000u}, {8, 0xf8000000u}, {11, 0xff400000u},
{10, 0x3fau}, {10, 0x3fbu}, {8, 0xf9u}, {11, 0x7fbu}, {10, 0xfe800000u}, {10, 0xfec00000u}, {8, 0xf9000000u}, {11, 0xff600000u},
{8, 0xfau}, {6, 0x16u}, {6, 0x17u}, {6, 0x18u}, {8, 0xfa000000u}, {6, 0x58000000u}, {6, 0x5c000000u}, {6, 0x60000000u},
{5, 0x0u}, {5, 0x1u}, {5, 0x2u}, {6, 0x19u}, {5, 0x0u}, {5, 0x8000000u}, {5, 0x10000000u}, {6, 0x64000000u},
{6, 0x1au}, {6, 0x1bu}, {6, 0x1cu}, {6, 0x1du}, {6, 0x68000000u}, {6, 0x6c000000u}, {6, 0x70000000u}, {6, 0x74000000u},
{6, 0x1eu}, {6, 0x1fu}, {7, 0x5cu}, {8, 0xfbu}, {6, 0x78000000u}, {6, 0x7c000000u}, {7, 0xb8000000u}, {8, 0xfb000000u},
{15, 0x7ffcu}, {6, 0x20u}, {12, 0xffbu}, {10, 0x3fcu}, {15, 0xfff80000u}, {6, 0x80000000u}, {12, 0xffb00000u}, {10, 0xff000000u},
{13, 0x1ffau}, {6, 0x21u}, {7, 0x5du}, {7, 0x5eu}, {13, 0xffd00000u}, {6, 0x84000000u}, {7, 0xba000000u}, {7, 0xbc000000u},
{7, 0x5fu}, {7, 0x60u}, {7, 0x61u}, {7, 0x62u}, {7, 0xbe000000u}, {7, 0xc0000000u}, {7, 0xc2000000u}, {7, 0xc4000000u},
{7, 0x63u}, {7, 0x64u}, {7, 0x65u}, {7, 0x66u}, {7, 0xc6000000u}, {7, 0xc8000000u}, {7, 0xca000000u}, {7, 0xcc000000u},
{7, 0x67u}, {7, 0x68u}, {7, 0x69u}, {7, 0x6au}, {7, 0xce000000u}, {7, 0xd0000000u}, {7, 0xd2000000u}, {7, 0xd4000000u},
{7, 0x6bu}, {7, 0x6cu}, {7, 0x6du}, {7, 0x6eu}, {7, 0xd6000000u}, {7, 0xd8000000u}, {7, 0xda000000u}, {7, 0xdc000000u},
{7, 0x6fu}, {7, 0x70u}, {7, 0x71u}, {7, 0x72u}, {7, 0xde000000u}, {7, 0xe0000000u}, {7, 0xe2000000u}, {7, 0xe4000000u},
{8, 0xfcu}, {7, 0x73u}, {8, 0xfdu}, {13, 0x1ffbu}, {8, 0xfc000000u}, {7, 0xe6000000u}, {8, 0xfd000000u}, {13, 0xffd80000u},
{19, 0x7fff0u}, {13, 0x1ffcu}, {14, 0x3ffcu}, {6, 0x22u}, {19, 0xfffe0000u}, {13, 0xffe00000u}, {14, 0xfff00000u}, {6, 0x88000000u},
{15, 0x7ffdu}, {5, 0x3u}, {6, 0x23u}, {5, 0x4u}, {15, 0xfffa0000u}, {5, 0x18000000u}, {6, 0x8c000000u}, {5, 0x20000000u},
{6, 0x24u}, {5, 0x5u}, {6, 0x25u}, {6, 0x26u}, {6, 0x90000000u}, {5, 0x28000000u}, {6, 0x94000000u}, {6, 0x98000000u},
{6, 0x27u}, {5, 0x6u}, {7, 0x74u}, {7, 0x75u}, {6, 0x9c000000u}, {5, 0x30000000u}, {7, 0xe8000000u}, {7, 0xea000000u},
{6, 0x28u}, {6, 0x29u}, {6, 0x2au}, {5, 0x7u}, {6, 0xa0000000u}, {6, 0xa4000000u}, {6, 0xa8000000u}, {5, 0x38000000u},
{6, 0x2bu}, {7, 0x76u}, {6, 0x2cu}, {5, 0x8u}, {6, 0xac000000u}, {7, 0xec000000u}, {6, 0xb0000000u}, {5, 0x40000000u},
{5, 0x9u}, {6, 0x2du}, {7, 0x77u}, {7, 0x78u}, {5, 0x48000000u}, {6, 0xb4000000u}, {7, 0xee000000u}, {7, 0xf0000000u},
{7, 0x79u}, {7, 0x7au}, {7, 0x7bu}, {15, 0x7ffeu}, {7, 0xf2000000u}, {7, 0xf4000000u}, {7, 0xf6000000u}, {15, 0xfffc0000u},
{11, 0x7fcu}, {14, 0x3ffdu}, {13, 0x1ffdu}, {28, 0xffffffcu}, {11, 0xff800000u}, {14, 0xfff40000u}, {13, 0xffe80000u}, {28, 0xffffffc0u},
{20, 0xfffe6u}, {22, 0x3fffd2u}, {20, 0xfffe7u}, {20, 0xfffe8u}, {20, 0xfffe6000u}, {22, 0xffff4800u}, {20, 0xfffe7000u}, {20, 0xfffe8000u},
{22, 0x3fffd3u}, {22, 0x3fffd4u}, {22, 0x3fffd5u}, {23, 0x7fffd9u}, {22, 0xffff4c00u}, {22, 0xffff5000u}, {22, 0xffff5400u}, {23, 0xffffb200u},
{22, 0x3fffd6u}, {23, 0x7fffdau}, {23, 0x7fffdbu}, {23, 0x7fffdcu}, {22, 0xffff5800u}, {23, 0xffffb400u}, {23, 0xffffb600u}, {23, 0xffffb800u},
{23, 0x7fffddu}, {23, 0x7fffdeu}, {24, 0xffffebu}, {23, 0x7fffdfu}, {23, 0xffffba00u}, {23, 0xffffbc00u}, {24, 0xffffeb00u}, {23, 0xffffbe00u},
{24, 0xffffecu}, {24, 0xffffedu}, {22, 0x3fffd7u}, {23, 0x7fffe0u}, {24, 0xffffec00u}, {24, 0xffffed00u}, {22, 0xffff5c00u}, {23, 0xffffc000u},
{24, 0xffffeeu}, {23, 0x7fffe1u}, {23, 0x7fffe2u}, {23, 0x7fffe3u}, {24, 0xffffee00u}, {23, 0xffffc200u}, {23, 0xffffc400u}, {23, 0xffffc600u},
{23, 0x7fffe4u}, {21, 0x1fffdcu}, {22, 0x3fffd8u}, {23, 0x7fffe5u}, {23, 0xffffc800u}, {21, 0xfffee000u}, {22, 0xffff6000u}, {23, 0xffffca00u},
{22, 0x3fffd9u}, {23, 0x7fffe6u}, {23, 0x7fffe7u}, {24, 0xffffefu}, {22, 0xffff6400u}, {23, 0xffffcc00u}, {23, 0xffffce00u}, {24, 0xffffef00u},
{22, 0x3fffdau}, {21, 0x1fffddu}, {20, 0xfffe9u}, {22, 0x3fffdbu}, {22, 0xffff6800u}, {21, 0xfffee800u}, {20, 0xfffe9000u}, {22, 0xffff6c00u},
{22, 0x3fffdcu}, {23, 0x7fffe8u}, {23, 0x7fffe9u}, {21, 0x1fffdeu}, {22, 0xffff7000u}, {23, 0xffffd000u}, {23, 0xffffd200u}, {21, 0xfffef000u},
{23, 0x7fffeau}, {22, 0x3fffddu}, {22, 0x3fffdeu}, {24, 0xfffff0u}, {23, 0xffffd400u}, {22, 0xffff7400u}, {22, 0xffff7800u}, {24, 0xfffff000u},
{21, 0x1fffdfu}, {22, 0x3fffdfu}, {23, 0x7fffebu}, {23, 0x7fffecu}, {21, 0xfffef800u}, {22, 0xffff7c00u}, {23, 0xffffd600u}, {23, 0xffffd800u},
{21, 0x1fffe0u}, {21, 0x1fffe1u}, {22, 0x3fffe0u}, {21, 0x1fffe2u}, {21, 0xffff0000u}, {21, 0xffff0800u}, {22, 0xffff8000u}, {21, 0xffff1000u},
{23, 0x7fffedu}, {22, 0x3fffe1u}, {23, 0x7fffeeu}, {23, 0x7fffefu}, {23, 0xffffda00u}, {22, 0xffff8400u}, {23, 0xffffdc00u}, {23, 0xffffde00u},
{20, 0xfffeau}, {22, 0x3fffe2u}, {22, 0x3fffe3u}, {22, 0x3fffe4u}, {20, 0xfffea000u}, {22, 0xffff8800u}, {22, 0xffff8c00u}, {22, 0xffff9000u},
{23, 0x7ffff0u}, {22, 0x3fffe5u}, {22, 0x3fffe6u}, {23, 0x7ffff1u}, {23, 0xffffe000u}, {22, 0xffff9400u}, {22, 0xffff9800u}, {23, 0xffffe200u},
{26, 0x3ffffe0u}, {26, 0x3ffffe1u}, {20, 0xfffebu}, {19, 0x7fff1u}, {26, 0xfffff800u}, {26, 0xfffff840u}, {20, 0xfffeb000u}, {19, 0xfffe2000u},
{22, 0x3fffe7u}, {23, 0x7ffff2u}, {22, 0x3fffe8u}, {25, 0x1ffffecu}, {22, 0xffff9c00u}, {23, 0xffffe400u}, {22, 0xffffa000u}, {25, 0xfffff600u},
{26, 0x3ffffe2u}, {26, 0x3ffffe3u}, {26, 0x3ffffe4u}, {27, 0x7ffffdeu}, {26, 0xfffff880u}, {26, 0xfffff8c0u}, {26, 0xfffff900u}, {27, 0xfffffbc0u},
{27, 0x7ffffdfu}, {26, 0x3ffffe5u}, {24, 0xfffff1u}, {25, 0x1ffffedu}, {27, 0xfffffbe0u}, {26, 0xfffff940u}, {24, 0xfffff100u}, {25, 0xfffff680u},
{19, 0x7fff2u}, {21, 0x1fffe3u}, {26, 0x3ffffe6u}, {27, 0x7ffffe0u}, {19, 0xfffe4000u}, {21, 0xffff1800u}, {26, 0xfffff980u}, {27, 0xfffffc00u},
{27, 0x7ffffe1u}, {26, 0x3ffffe7u}, {27, 0x7ffffe2u}, {24, 0xfffff2u}, {27, 0xfffffc20u}, {26, 0xfffff9c0u}, {27, 0xfffffc40u}, {24, 0xfffff200u},
{21, 0x1fffe4u}, {21, 0x1fffe5u}, {26, 0x3ffffe8u}, {26, 0x3ffffe9u}, {21, 0xffff2000u}, {21, 0xffff2800u}, {26, 0xfffffa00u}, {26, 0xfffffa40u},
{28, 0xffffffdu}, {27, 0x7ffffe3u}, {27, 0x7ffffe4u}, {27, 0x7ffffe5u}, {28, 0xffffffd0u}, {27, 0xfffffc60u}, {27, 0xfffffc80u}, {27, 0xfffffca0u},
{20, 0xfffecu}, {24, 0xfffff3u}, {20, 0xfffedu}, {21, 0x1fffe6u}, {20, 0xfffec000u}, {24, 0xfffff300u}, {20, 0xfffed000u}, {21, 0xffff3000u},
{22, 0x3fffe9u}, {21, 0x1fffe7u}, {21, 0x1fffe8u}, {23, 0x7ffff3u}, {22, 0xffffa400u}, {21, 0xffff3800u}, {21, 0xffff4000u}, {23, 0xffffe600u},
{22, 0x3fffeau}, {22, 0x3fffebu}, {25, 0x1ffffeeu}, {25, 0x1ffffefu}, {22, 0xffffa800u}, {22, 0xffffac00u}, {25, 0xfffff700u}, {25, 0xfffff780u},
{24, 0xfffff4u}, {24, 0xfffff5u}, {26, 0x3ffffeau}, {23, 0x7ffff4u}, {24, 0xfffff400u}, {24, 0xfffff500u}, {26, 0xfffffa80u}, {23, 0xffffe800u},
{26, 0x3ffffebu}, {27, 0x7ffffe6u}, {26, 0x3ffffecu}, {26, 0x3ffffedu}, {26, 0xfffffac0u}, {27, 0xfffffcc0u}, {26, 0xfffffb00u}, {26, 0xfffffb40u},
{27, 0x7ffffe7u}, {27, 0x7ffffe8u}, {27, 0x7ffffe9u}, {27, 0x7ffffeau}, {27, 0xfffffce0u}, {27, 0xfffffd00u}, {27, 0xfffffd20u}, {27, 0xfffffd40u},
{27, 0x7ffffebu}, {28, 0xffffffeu}, {27, 0x7ffffecu}, {27, 0x7ffffedu}, {27, 0xfffffd60u}, {28, 0xffffffe0u}, {27, 0xfffffd80u}, {27, 0xfffffda0u},
{27, 0x7ffffeeu}, {27, 0x7ffffefu}, {27, 0x7fffff0u}, {26, 0x3ffffeeu}, {27, 0xfffffdc0u}, {27, 0xfffffde0u}, {27, 0xfffffe00u}, {26, 0xfffffb80u},
{30, 0x3fffffffu}}; {30, 0xfffffffcu}};
const nghttp2_huff_decode huff_decode_table[][16] = { const nghttp2_huff_decode huff_decode_table[][16] = {
/* 0 */ /* 0 */

View File

@ -423,9 +423,12 @@ typedef struct {
print '''\ print '''\
const nghttp2_huff_sym huff_sym_table[] = {''' const nghttp2_huff_sym huff_sym_table[] = {'''
for i in range(257): for i in range(257):
nbits = symbol_tbl[i][0]
k = int(symbol_tbl[i][1], 16)
k = k << (32 - nbits)
print '''\ print '''\
{{ {}, 0x{}u }}{}\ {{ {}, 0x{}u }}{}\
'''.format(symbol_tbl[i][0], symbol_tbl[i][1], ',' if i < 256 else '') '''.format(symbol_tbl[i][0], hex(k)[2:], ',' if i < 256 else '')
print '};' print '};'
print '' print ''