From 0d855bfc1ba6e4201e21d7d130dbaad0907c8a3e Mon Sep 17 00:00:00 2001 From: Tatsuhiro Tsujikawa Date: Tue, 8 Oct 2019 23:13:31 +0900 Subject: [PATCH] Faster huffman encoding --- lib/nghttp2_hd_huffman.c | 183 +++++++++------------------------- lib/nghttp2_hd_huffman_data.c | 130 ++++++++++++------------ mkhufftbl.py | 5 +- 3 files changed, 117 insertions(+), 201 deletions(-) diff --git a/lib/nghttp2_hd_huffman.c b/lib/nghttp2_hd_huffman.c index 8881aacb..716c2782 100644 --- a/lib/nghttp2_hd_huffman.c +++ b/lib/nghttp2_hd_huffman.c @@ -29,114 +29,7 @@ #include #include "nghttp2_hd.h" - -/* - * Encodes huffman code |sym| into |*dest_ptr|, whose least |rembits| - * bits are not filled yet. The |rembits| must be in range [1, 8], - * inclusive. At the end of the process, the |*dest_ptr| is updated - * and points where next output should be placed. The number of - * unfilled bits in the pointed location is returned. - */ -static ssize_t huff_encode_sym(nghttp2_bufs *bufs, size_t *avail_ptr, - size_t rembits, const nghttp2_huff_sym *sym) { - int rv; - size_t nbits = sym->nbits; - uint32_t code = sym->code; - - /* We assume that sym->nbits <= 32 */ - if (rembits > nbits) { - nghttp2_bufs_fast_orb_hold(bufs, (uint8_t)(code << (rembits - nbits))); - return (ssize_t)(rembits - nbits); - } - - if (rembits == nbits) { - nghttp2_bufs_fast_orb(bufs, (uint8_t)code); - --*avail_ptr; - return 8; - } - - nghttp2_bufs_fast_orb(bufs, (uint8_t)(code >> (nbits - rembits))); - --*avail_ptr; - - nbits -= rembits; - if (nbits & 0x7) { - /* align code to MSB byte boundary */ - code <<= 8 - (nbits & 0x7); - } - - if (*avail_ptr < (nbits + 7) / 8) { - /* slow path */ - if (nbits > 24) { - rv = nghttp2_bufs_addb(bufs, (uint8_t)(code >> 24)); - if (rv != 0) { - return rv; - } - nbits -= 8; - } - if (nbits > 16) { - rv = nghttp2_bufs_addb(bufs, (uint8_t)(code >> 16)); - if (rv != 0) { - return rv; - } - nbits -= 8; - } - if (nbits > 8) { - rv = nghttp2_bufs_addb(bufs, (uint8_t)(code >> 8)); - if (rv != 0) { - return rv; - } - nbits -= 8; - } - if (nbits == 8) { - rv = nghttp2_bufs_addb(bufs, (uint8_t)code); - if (rv != 0) { - return rv; - } - *avail_ptr = nghttp2_bufs_cur_avail(bufs); - return 8; - } - - rv = nghttp2_bufs_addb_hold(bufs, (uint8_t)code); - if (rv != 0) { - return rv; - } - *avail_ptr = nghttp2_bufs_cur_avail(bufs); - return (ssize_t)(8 - nbits); - } - - /* fast path, since most code is less than 8 */ - if (nbits < 8) { - nghttp2_bufs_fast_addb_hold(bufs, (uint8_t)code); - *avail_ptr = nghttp2_bufs_cur_avail(bufs); - return (ssize_t)(8 - nbits); - } - - /* handle longer code path */ - if (nbits > 24) { - nghttp2_bufs_fast_addb(bufs, (uint8_t)(code >> 24)); - nbits -= 8; - } - - if (nbits > 16) { - nghttp2_bufs_fast_addb(bufs, (uint8_t)(code >> 16)); - nbits -= 8; - } - - if (nbits > 8) { - nghttp2_bufs_fast_addb(bufs, (uint8_t)(code >> 8)); - nbits -= 8; - } - - if (nbits == 8) { - nghttp2_bufs_fast_addb(bufs, (uint8_t)code); - *avail_ptr = nghttp2_bufs_cur_avail(bufs); - return 8; - } - - nghttp2_bufs_fast_addb_hold(bufs, (uint8_t)code); - *avail_ptr = nghttp2_bufs_cur_avail(bufs); - return (ssize_t)(8 - nbits); -} +#include "nghttp2_net.h" size_t nghttp2_hd_huff_encode_count(const uint8_t *src, size_t len) { size_t i; @@ -151,40 +44,60 @@ size_t nghttp2_hd_huff_encode_count(const uint8_t *src, size_t len) { int nghttp2_hd_huff_encode(nghttp2_bufs *bufs, const uint8_t *src, size_t srclen) { - int rv; - ssize_t rembits = 8; - size_t i; + const nghttp2_huff_sym *sym; + const uint8_t *end = src + srclen; + uint64_t code = 0; + uint32_t x; + size_t nbits = 0; size_t avail; + int rv; avail = nghttp2_bufs_cur_avail(bufs); - for (i = 0; i < srclen; ++i) { - const nghttp2_huff_sym *sym = &huff_sym_table[src[i]]; - if (rembits == 8) { - if (avail) { - nghttp2_bufs_fast_addb_hold(bufs, 0); - } else { - rv = nghttp2_bufs_addb_hold(bufs, 0); - if (rv != 0) { - return rv; - } - avail = nghttp2_bufs_cur_avail(bufs); + for (; src != end;) { + sym = &huff_sym_table[*src++]; + code |= (uint64_t)sym->code << (32 - nbits); + nbits += sym->nbits; + if (nbits < 32) { + continue; + } + if (avail >= 4) { + x = htonl((uint32_t)(code >> 32)); + memcpy(bufs->cur->buf.last, &x, 4); + bufs->cur->buf.last += 4; + avail -= 4; + code <<= 32; + nbits -= 32; + continue; + } + + for (; nbits >= 8;) { + rv = nghttp2_bufs_addb(bufs, (uint8_t)(code >> 56)); + if (rv != 0) { + return rv; } + code <<= 8; + nbits -= 8; } - rembits = huff_encode_sym(bufs, &avail, (size_t)rembits, sym); - if (rembits < 0) { - return (int)rembits; - } + + avail = nghttp2_bufs_cur_avail(bufs); } - /* 256 is special terminal symbol, pad with its prefix */ - if (rembits < 8) { - /* if rembits < 8, we should have at least 1 buffer space - available */ - const nghttp2_huff_sym *sym = &huff_sym_table[256]; - assert(avail); - /* Caution we no longer adjust avail here */ - nghttp2_bufs_fast_orb( - bufs, (uint8_t)(sym->code >> (sym->nbits - (size_t)rembits))); + + for (; nbits >= 8;) { + rv = nghttp2_bufs_addb(bufs, (uint8_t)(code >> 56)); + if (rv != 0) { + return rv; + } + code <<= 8; + nbits -= 8; + } + + if (nbits) { + rv = nghttp2_bufs_addb( + bufs, (uint8_t)((uint8_t)(code >> 56) | ((1 << (8 - nbits)) - 1))); + if (rv != 0) { + return rv; + } } return 0; diff --git a/lib/nghttp2_hd_huffman_data.c b/lib/nghttp2_hd_huffman_data.c index 5ef4a956..3bc155e9 100644 --- a/lib/nghttp2_hd_huffman_data.c +++ b/lib/nghttp2_hd_huffman_data.c @@ -27,71 +27,71 @@ /* Generated by mkhufftbl.py */ const nghttp2_huff_sym huff_sym_table[] = { - {13, 0x1ff8u}, {23, 0x7fffd8u}, {28, 0xfffffe2u}, {28, 0xfffffe3u}, - {28, 0xfffffe4u}, {28, 0xfffffe5u}, {28, 0xfffffe6u}, {28, 0xfffffe7u}, - {28, 0xfffffe8u}, {24, 0xffffeau}, {30, 0x3ffffffcu}, {28, 0xfffffe9u}, - {28, 0xfffffeau}, {30, 0x3ffffffdu}, {28, 0xfffffebu}, {28, 0xfffffecu}, - {28, 0xfffffedu}, {28, 0xfffffeeu}, {28, 0xfffffefu}, {28, 0xffffff0u}, - {28, 0xffffff1u}, {28, 0xffffff2u}, {30, 0x3ffffffeu}, {28, 0xffffff3u}, - {28, 0xffffff4u}, {28, 0xffffff5u}, {28, 0xffffff6u}, {28, 0xffffff7u}, - {28, 0xffffff8u}, {28, 0xffffff9u}, {28, 0xffffffau}, {28, 0xffffffbu}, - {6, 0x14u}, {10, 0x3f8u}, {10, 0x3f9u}, {12, 0xffau}, - {13, 0x1ff9u}, {6, 0x15u}, {8, 0xf8u}, {11, 0x7fau}, - {10, 0x3fau}, {10, 0x3fbu}, {8, 0xf9u}, {11, 0x7fbu}, - {8, 0xfau}, {6, 0x16u}, {6, 0x17u}, {6, 0x18u}, - {5, 0x0u}, {5, 0x1u}, {5, 0x2u}, {6, 0x19u}, - {6, 0x1au}, {6, 0x1bu}, {6, 0x1cu}, {6, 0x1du}, - {6, 0x1eu}, {6, 0x1fu}, {7, 0x5cu}, {8, 0xfbu}, - {15, 0x7ffcu}, {6, 0x20u}, {12, 0xffbu}, {10, 0x3fcu}, - {13, 0x1ffau}, {6, 0x21u}, {7, 0x5du}, {7, 0x5eu}, - {7, 0x5fu}, {7, 0x60u}, {7, 0x61u}, {7, 0x62u}, - {7, 0x63u}, {7, 0x64u}, {7, 0x65u}, {7, 0x66u}, - {7, 0x67u}, {7, 0x68u}, {7, 0x69u}, {7, 0x6au}, - {7, 0x6bu}, {7, 0x6cu}, {7, 0x6du}, {7, 0x6eu}, - {7, 0x6fu}, {7, 0x70u}, {7, 0x71u}, {7, 0x72u}, - {8, 0xfcu}, {7, 0x73u}, {8, 0xfdu}, {13, 0x1ffbu}, - {19, 0x7fff0u}, {13, 0x1ffcu}, {14, 0x3ffcu}, {6, 0x22u}, - {15, 0x7ffdu}, {5, 0x3u}, {6, 0x23u}, {5, 0x4u}, - {6, 0x24u}, {5, 0x5u}, {6, 0x25u}, {6, 0x26u}, - {6, 0x27u}, {5, 0x6u}, {7, 0x74u}, {7, 0x75u}, - {6, 0x28u}, {6, 0x29u}, {6, 0x2au}, {5, 0x7u}, - {6, 0x2bu}, {7, 0x76u}, {6, 0x2cu}, {5, 0x8u}, - {5, 0x9u}, {6, 0x2du}, {7, 0x77u}, {7, 0x78u}, - {7, 0x79u}, {7, 0x7au}, {7, 0x7bu}, {15, 0x7ffeu}, - {11, 0x7fcu}, {14, 0x3ffdu}, {13, 0x1ffdu}, {28, 0xffffffcu}, - {20, 0xfffe6u}, {22, 0x3fffd2u}, {20, 0xfffe7u}, {20, 0xfffe8u}, - {22, 0x3fffd3u}, {22, 0x3fffd4u}, {22, 0x3fffd5u}, {23, 0x7fffd9u}, - {22, 0x3fffd6u}, {23, 0x7fffdau}, {23, 0x7fffdbu}, {23, 0x7fffdcu}, - {23, 0x7fffddu}, {23, 0x7fffdeu}, {24, 0xffffebu}, {23, 0x7fffdfu}, - {24, 0xffffecu}, {24, 0xffffedu}, {22, 0x3fffd7u}, {23, 0x7fffe0u}, - {24, 0xffffeeu}, {23, 0x7fffe1u}, {23, 0x7fffe2u}, {23, 0x7fffe3u}, - {23, 0x7fffe4u}, {21, 0x1fffdcu}, {22, 0x3fffd8u}, {23, 0x7fffe5u}, - {22, 0x3fffd9u}, {23, 0x7fffe6u}, {23, 0x7fffe7u}, {24, 0xffffefu}, - {22, 0x3fffdau}, {21, 0x1fffddu}, {20, 0xfffe9u}, {22, 0x3fffdbu}, - {22, 0x3fffdcu}, {23, 0x7fffe8u}, {23, 0x7fffe9u}, {21, 0x1fffdeu}, - {23, 0x7fffeau}, {22, 0x3fffddu}, {22, 0x3fffdeu}, {24, 0xfffff0u}, - {21, 0x1fffdfu}, {22, 0x3fffdfu}, {23, 0x7fffebu}, {23, 0x7fffecu}, - {21, 0x1fffe0u}, {21, 0x1fffe1u}, {22, 0x3fffe0u}, {21, 0x1fffe2u}, - {23, 0x7fffedu}, {22, 0x3fffe1u}, {23, 0x7fffeeu}, {23, 0x7fffefu}, - {20, 0xfffeau}, {22, 0x3fffe2u}, {22, 0x3fffe3u}, {22, 0x3fffe4u}, - {23, 0x7ffff0u}, {22, 0x3fffe5u}, {22, 0x3fffe6u}, {23, 0x7ffff1u}, - {26, 0x3ffffe0u}, {26, 0x3ffffe1u}, {20, 0xfffebu}, {19, 0x7fff1u}, - {22, 0x3fffe7u}, {23, 0x7ffff2u}, {22, 0x3fffe8u}, {25, 0x1ffffecu}, - {26, 0x3ffffe2u}, {26, 0x3ffffe3u}, {26, 0x3ffffe4u}, {27, 0x7ffffdeu}, - {27, 0x7ffffdfu}, {26, 0x3ffffe5u}, {24, 0xfffff1u}, {25, 0x1ffffedu}, - {19, 0x7fff2u}, {21, 0x1fffe3u}, {26, 0x3ffffe6u}, {27, 0x7ffffe0u}, - {27, 0x7ffffe1u}, {26, 0x3ffffe7u}, {27, 0x7ffffe2u}, {24, 0xfffff2u}, - {21, 0x1fffe4u}, {21, 0x1fffe5u}, {26, 0x3ffffe8u}, {26, 0x3ffffe9u}, - {28, 0xffffffdu}, {27, 0x7ffffe3u}, {27, 0x7ffffe4u}, {27, 0x7ffffe5u}, - {20, 0xfffecu}, {24, 0xfffff3u}, {20, 0xfffedu}, {21, 0x1fffe6u}, - {22, 0x3fffe9u}, {21, 0x1fffe7u}, {21, 0x1fffe8u}, {23, 0x7ffff3u}, - {22, 0x3fffeau}, {22, 0x3fffebu}, {25, 0x1ffffeeu}, {25, 0x1ffffefu}, - {24, 0xfffff4u}, {24, 0xfffff5u}, {26, 0x3ffffeau}, {23, 0x7ffff4u}, - {26, 0x3ffffebu}, {27, 0x7ffffe6u}, {26, 0x3ffffecu}, {26, 0x3ffffedu}, - {27, 0x7ffffe7u}, {27, 0x7ffffe8u}, {27, 0x7ffffe9u}, {27, 0x7ffffeau}, - {27, 0x7ffffebu}, {28, 0xffffffeu}, {27, 0x7ffffecu}, {27, 0x7ffffedu}, - {27, 0x7ffffeeu}, {27, 0x7ffffefu}, {27, 0x7fffff0u}, {26, 0x3ffffeeu}, - {30, 0x3fffffffu}}; + {13, 0xffc00000u}, {23, 0xffffb000u}, {28, 0xfffffe20u}, {28, 0xfffffe30u}, + {28, 0xfffffe40u}, {28, 0xfffffe50u}, {28, 0xfffffe60u}, {28, 0xfffffe70u}, + {28, 0xfffffe80u}, {24, 0xffffea00u}, {30, 0xfffffff0u}, {28, 0xfffffe90u}, + {28, 0xfffffea0u}, {30, 0xfffffff4u}, {28, 0xfffffeb0u}, {28, 0xfffffec0u}, + {28, 0xfffffed0u}, {28, 0xfffffee0u}, {28, 0xfffffef0u}, {28, 0xffffff00u}, + {28, 0xffffff10u}, {28, 0xffffff20u}, {30, 0xfffffff8u}, {28, 0xffffff30u}, + {28, 0xffffff40u}, {28, 0xffffff50u}, {28, 0xffffff60u}, {28, 0xffffff70u}, + {28, 0xffffff80u}, {28, 0xffffff90u}, {28, 0xffffffa0u}, {28, 0xffffffb0u}, + {6, 0x50000000u}, {10, 0xfe000000u}, {10, 0xfe400000u}, {12, 0xffa00000u}, + {13, 0xffc80000u}, {6, 0x54000000u}, {8, 0xf8000000u}, {11, 0xff400000u}, + {10, 0xfe800000u}, {10, 0xfec00000u}, {8, 0xf9000000u}, {11, 0xff600000u}, + {8, 0xfa000000u}, {6, 0x58000000u}, {6, 0x5c000000u}, {6, 0x60000000u}, + {5, 0x0u}, {5, 0x8000000u}, {5, 0x10000000u}, {6, 0x64000000u}, + {6, 0x68000000u}, {6, 0x6c000000u}, {6, 0x70000000u}, {6, 0x74000000u}, + {6, 0x78000000u}, {6, 0x7c000000u}, {7, 0xb8000000u}, {8, 0xfb000000u}, + {15, 0xfff80000u}, {6, 0x80000000u}, {12, 0xffb00000u}, {10, 0xff000000u}, + {13, 0xffd00000u}, {6, 0x84000000u}, {7, 0xba000000u}, {7, 0xbc000000u}, + {7, 0xbe000000u}, {7, 0xc0000000u}, {7, 0xc2000000u}, {7, 0xc4000000u}, + {7, 0xc6000000u}, {7, 0xc8000000u}, {7, 0xca000000u}, {7, 0xcc000000u}, + {7, 0xce000000u}, {7, 0xd0000000u}, {7, 0xd2000000u}, {7, 0xd4000000u}, + {7, 0xd6000000u}, {7, 0xd8000000u}, {7, 0xda000000u}, {7, 0xdc000000u}, + {7, 0xde000000u}, {7, 0xe0000000u}, {7, 0xe2000000u}, {7, 0xe4000000u}, + {8, 0xfc000000u}, {7, 0xe6000000u}, {8, 0xfd000000u}, {13, 0xffd80000u}, + {19, 0xfffe0000u}, {13, 0xffe00000u}, {14, 0xfff00000u}, {6, 0x88000000u}, + {15, 0xfffa0000u}, {5, 0x18000000u}, {6, 0x8c000000u}, {5, 0x20000000u}, + {6, 0x90000000u}, {5, 0x28000000u}, {6, 0x94000000u}, {6, 0x98000000u}, + {6, 0x9c000000u}, {5, 0x30000000u}, {7, 0xe8000000u}, {7, 0xea000000u}, + {6, 0xa0000000u}, {6, 0xa4000000u}, {6, 0xa8000000u}, {5, 0x38000000u}, + {6, 0xac000000u}, {7, 0xec000000u}, {6, 0xb0000000u}, {5, 0x40000000u}, + {5, 0x48000000u}, {6, 0xb4000000u}, {7, 0xee000000u}, {7, 0xf0000000u}, + {7, 0xf2000000u}, {7, 0xf4000000u}, {7, 0xf6000000u}, {15, 0xfffc0000u}, + {11, 0xff800000u}, {14, 0xfff40000u}, {13, 0xffe80000u}, {28, 0xffffffc0u}, + {20, 0xfffe6000u}, {22, 0xffff4800u}, {20, 0xfffe7000u}, {20, 0xfffe8000u}, + {22, 0xffff4c00u}, {22, 0xffff5000u}, {22, 0xffff5400u}, {23, 0xffffb200u}, + {22, 0xffff5800u}, {23, 0xffffb400u}, {23, 0xffffb600u}, {23, 0xffffb800u}, + {23, 0xffffba00u}, {23, 0xffffbc00u}, {24, 0xffffeb00u}, {23, 0xffffbe00u}, + {24, 0xffffec00u}, {24, 0xffffed00u}, {22, 0xffff5c00u}, {23, 0xffffc000u}, + {24, 0xffffee00u}, {23, 0xffffc200u}, {23, 0xffffc400u}, {23, 0xffffc600u}, + {23, 0xffffc800u}, {21, 0xfffee000u}, {22, 0xffff6000u}, {23, 0xffffca00u}, + {22, 0xffff6400u}, {23, 0xffffcc00u}, {23, 0xffffce00u}, {24, 0xffffef00u}, + {22, 0xffff6800u}, {21, 0xfffee800u}, {20, 0xfffe9000u}, {22, 0xffff6c00u}, + {22, 0xffff7000u}, {23, 0xffffd000u}, {23, 0xffffd200u}, {21, 0xfffef000u}, + {23, 0xffffd400u}, {22, 0xffff7400u}, {22, 0xffff7800u}, {24, 0xfffff000u}, + {21, 0xfffef800u}, {22, 0xffff7c00u}, {23, 0xffffd600u}, {23, 0xffffd800u}, + {21, 0xffff0000u}, {21, 0xffff0800u}, {22, 0xffff8000u}, {21, 0xffff1000u}, + {23, 0xffffda00u}, {22, 0xffff8400u}, {23, 0xffffdc00u}, {23, 0xffffde00u}, + {20, 0xfffea000u}, {22, 0xffff8800u}, {22, 0xffff8c00u}, {22, 0xffff9000u}, + {23, 0xffffe000u}, {22, 0xffff9400u}, {22, 0xffff9800u}, {23, 0xffffe200u}, + {26, 0xfffff800u}, {26, 0xfffff840u}, {20, 0xfffeb000u}, {19, 0xfffe2000u}, + {22, 0xffff9c00u}, {23, 0xffffe400u}, {22, 0xffffa000u}, {25, 0xfffff600u}, + {26, 0xfffff880u}, {26, 0xfffff8c0u}, {26, 0xfffff900u}, {27, 0xfffffbc0u}, + {27, 0xfffffbe0u}, {26, 0xfffff940u}, {24, 0xfffff100u}, {25, 0xfffff680u}, + {19, 0xfffe4000u}, {21, 0xffff1800u}, {26, 0xfffff980u}, {27, 0xfffffc00u}, + {27, 0xfffffc20u}, {26, 0xfffff9c0u}, {27, 0xfffffc40u}, {24, 0xfffff200u}, + {21, 0xffff2000u}, {21, 0xffff2800u}, {26, 0xfffffa00u}, {26, 0xfffffa40u}, + {28, 0xffffffd0u}, {27, 0xfffffc60u}, {27, 0xfffffc80u}, {27, 0xfffffca0u}, + {20, 0xfffec000u}, {24, 0xfffff300u}, {20, 0xfffed000u}, {21, 0xffff3000u}, + {22, 0xffffa400u}, {21, 0xffff3800u}, {21, 0xffff4000u}, {23, 0xffffe600u}, + {22, 0xffffa800u}, {22, 0xffffac00u}, {25, 0xfffff700u}, {25, 0xfffff780u}, + {24, 0xfffff400u}, {24, 0xfffff500u}, {26, 0xfffffa80u}, {23, 0xffffe800u}, + {26, 0xfffffac0u}, {27, 0xfffffcc0u}, {26, 0xfffffb00u}, {26, 0xfffffb40u}, + {27, 0xfffffce0u}, {27, 0xfffffd00u}, {27, 0xfffffd20u}, {27, 0xfffffd40u}, + {27, 0xfffffd60u}, {28, 0xffffffe0u}, {27, 0xfffffd80u}, {27, 0xfffffda0u}, + {27, 0xfffffdc0u}, {27, 0xfffffde0u}, {27, 0xfffffe00u}, {26, 0xfffffb80u}, + {30, 0xfffffffcu}}; const nghttp2_huff_decode huff_decode_table[][16] = { /* 0 */ diff --git a/mkhufftbl.py b/mkhufftbl.py index 86960428..b5f33a48 100755 --- a/mkhufftbl.py +++ b/mkhufftbl.py @@ -423,9 +423,12 @@ typedef struct { print '''\ const nghttp2_huff_sym huff_sym_table[] = {''' for i in range(257): + nbits = symbol_tbl[i][0] + k = int(symbol_tbl[i][1], 16) + k = k << (32 - nbits) print '''\ {{ {}, 0x{}u }}{}\ -'''.format(symbol_tbl[i][0], symbol_tbl[i][1], ',' if i < 256 else '') +'''.format(symbol_tbl[i][0], hex(k)[2:], ',' if i < 256 else '') print '};' print ''