T1: loop unrolling in dec_sigpass_raw and dec_refpass_raw

This commit is contained in:
Even Rouault 2017-05-31 17:15:27 +02:00
parent 68557ff503
commit aa7a8a4398
1 changed files with 119 additions and 17 deletions

View File

@ -630,29 +630,92 @@ static void opj_t1_dec_sigpass_raw(
OPJ_INT32 bpno, OPJ_INT32 bpno,
OPJ_INT32 cblksty) OPJ_INT32 cblksty)
{ {
OPJ_INT32 one, half, oneplushalf, vsc; OPJ_INT32 one, half, oneplushalf;
OPJ_UINT32 i, j, k; OPJ_UINT32 i, j, k;
opj_flag_t *flagsp = &T1_FLAGS(0, 0); opj_flag_t *flagsp = &T1_FLAGS(0, 0);
one = 1 << bpno; one = 1 << bpno;
half = one >> 1; half = one >> 1;
oneplushalf = one | half; oneplushalf = one | half;
if ((cblksty & J2K_CCP_CBLKSTY_VSC)) {
OPJ_INT32 *data1 = t1->data;
for (k = 0; k < t1->h; k += 4) { for (k = 0; k < t1->h; k += 4) {
for (i = 0; i < t1->w; ++i) { for (i = 0; i < t1->w; ++i) {
OPJ_INT32* data2 = data1 + i;
opj_flag_t *flagsp2 = flagsp + i; opj_flag_t *flagsp2 = flagsp + i;
for (j = k; j < k + 4 && j < t1->h; ++j) { for (j = k; j < k + 4 && j < t1->h; ++j) {
vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 || OPJ_INT32 vsc = (j == k + 3 || j == t1->h - 1) ? 1 : 0;
j == t1->h - 1)) ? 1 : 0;
opj_t1_dec_sigpass_step_raw( opj_t1_dec_sigpass_step_raw(
t1, t1,
flagsp2, flagsp2,
&t1->data[(j * t1->w) + i], data2,
oneplushalf, oneplushalf,
vsc, vsc,
j - k); j - k);
data2 += t1->w;
} }
} }
data1 += t1->w << 2;
flagsp += t1->flags_stride; flagsp += t1->flags_stride;
} }
} else {
OPJ_INT32 *data1 = t1->data;
for (k = 0; k < (t1->h & ~3U); k += 4) {
for (i = 0; i < t1->w; ++i) {
OPJ_INT32* data2 = data1 + i;
opj_flag_t *flagsp2 = flagsp + i;
opj_t1_dec_sigpass_step_raw(
t1,
flagsp2,
data2,
oneplushalf,
0, /* vsc */
0U);
data2 += t1->w;
opj_t1_dec_sigpass_step_raw(
t1,
flagsp2,
data2,
oneplushalf,
0, /* vsc */
1U);
data2 += t1->w;
opj_t1_dec_sigpass_step_raw(
t1,
flagsp2,
data2,
oneplushalf,
0, /* vsc */
2U);
data2 += t1->w;
opj_t1_dec_sigpass_step_raw(
t1,
flagsp2,
data2,
oneplushalf,
0, /* vsc */
3U);
data2 += t1->w;
}
data1 += t1->w << 2;
flagsp += t1->flags_stride;
}
if (k < t1->h) {
for (i = 0; i < t1->w; ++i) {
OPJ_INT32* data2 = data1 + i;
opj_flag_t *flagsp2 = flagsp + i;
for (j = 0; j < t1->h - k; ++j) {
opj_t1_dec_sigpass_step_raw(
t1,
flagsp2,
data2,
oneplushalf,
0, /* vsc */
j);
data2 += t1->w;
}
}
}
}
} }
#define opj_t1_dec_sigpass_mqc_internal(t1, bpno, w, h, flags_stride) \ #define opj_t1_dec_sigpass_mqc_internal(t1, bpno, w, h, flags_stride) \
@ -798,13 +861,14 @@ static INLINE void opj_t1_dec_refpass_step_raw(
OPJ_INT32 neghalf, OPJ_INT32 neghalf,
OPJ_UINT32 ci) OPJ_UINT32 ci)
{ {
OPJ_INT32 v, t; OPJ_UINT32 v;
OPJ_INT32 t;
opj_raw_t *raw = t1->raw; /* RAW component */ opj_raw_t *raw = t1->raw; /* RAW component */
if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) ==
(T1_SIGMA_THIS << (ci * 3U))) { (T1_SIGMA_THIS << (ci * 3U))) {
v = (OPJ_INT32)opj_raw_decode(raw); v = opj_raw_decode(raw);
t = v ? poshalf : neghalf; t = v ? poshalf : neghalf;
*datap += *datap < 0 ? -t : t; *datap += *datap < 0 ? -t : t;
*flagsp |= T1_MU_THIS << (ci * 3U); *flagsp |= T1_MU_THIS << (ci * 3U);
@ -981,23 +1045,61 @@ static void opj_t1_dec_refpass_raw(
{ {
OPJ_INT32 one, poshalf, neghalf; OPJ_INT32 one, poshalf, neghalf;
OPJ_UINT32 i, j, k; OPJ_UINT32 i, j, k;
OPJ_INT32 *data1 = t1->data;
opj_flag_t *flagsp1 = &T1_FLAGS(0, 0); opj_flag_t *flagsp1 = &T1_FLAGS(0, 0);
one = 1 << bpno; one = 1 << bpno;
poshalf = one >> 1; poshalf = one >> 1;
neghalf = bpno > 0 ? -poshalf : -1; neghalf = bpno > 0 ? -poshalf : -1;
for (k = 0; k < t1->h; k += 4) { for (k = 0; k < (t1->h & ~3U); k += 4) {
for (i = 0; i < t1->w; ++i) { for (i = 0; i < t1->w; ++i) {
OPJ_INT32 *data2 = data1 + i;
opj_flag_t *flagsp2 = flagsp1 + i; opj_flag_t *flagsp2 = flagsp1 + i;
for (j = k; j < k + 4 && j < t1->h; ++j) {
opj_t1_dec_refpass_step_raw( opj_t1_dec_refpass_step_raw(
t1, t1,
flagsp2, flagsp2,
&t1->data[(j * t1->w) + i], data2,
poshalf,
neghalf, 0U);
data2 += t1->w;
opj_t1_dec_refpass_step_raw(
t1,
flagsp2,
data2,
poshalf,
neghalf, 1U);
data2 += t1->w;
opj_t1_dec_refpass_step_raw(
t1,
flagsp2,
data2,
poshalf,
neghalf, 2U);
data2 += t1->w;
opj_t1_dec_refpass_step_raw(
t1,
flagsp2,
data2,
poshalf,
neghalf, 3U);
data2 += t1->w;
}
data1 += t1->w << 2;
flagsp1 += t1->flags_stride;
}
if (k < t1->h) {
for (i = 0; i < t1->w; ++i) {
OPJ_INT32 *data2 = data1 + i;
opj_flag_t *flagsp2 = flagsp1 + i;
for (j = k; j < t1->h; ++j) {
opj_t1_dec_refpass_step_raw(
t1,
flagsp2,
data2,
poshalf, poshalf,
neghalf, j - k); neghalf, j - k);
data2 += t1->w;
} }
} }
flagsp1 += t1->flags_stride;
} }
} }