#include "headers.h" /** * FUNCTION: * * void fdct_8x8(uint8* input_data, sint16* output_data, sint32 num_fdcts) * * PARAMETERS: * * uint8* input_data - uint8 pointer to macro block data for which the DCT * is performed * * sint16* output_data - sint16 pointer to output data of the DCT. Caller must * handle the memory allocation. The output_data must * *not* overlap the input_data! * * sint32 num_fdcts - Indicates the number of 8x8 DCTs to be performed * i.e. this is the number of 8x8 components in the * input macroblock */ /* Original TI comment (partially outdated): * ========================================================================= * * * * TEXAS INSTRUMENTS, INC. * * * * NAME * * fdct_8x8 -- 8x8 Block FDCT With Rounding, Endian Neutral * * * * REVISION HISTORY * * 20-May-1999 Initial handcode version * * * * USAGE * * This routine is C callable, and has the following C prototype: * * * * void fdct_8x8(short fdct_data[], unsigned num_fdcts) * * * * The fdct routine accepts a list of 8x8 pixel blocks and performs * * FDCTs on each. The array should be laid out identically to * * "fdct_data[num_fdcts+1][8][8]". All operations in this array are * * performed entirely in-place. * * * * Input values are stored in shorts, and may be in the range * * [-512,511]. Larger input values may result in overflow, * * although the 12-bit JPEG range [-1024,1023] overflows rarely. * * * * This code requires '48 + 160 * num_fdcts' cycles to process * * 'num_fdcts' blocks, including function call overhead. When * * 'num_fdcts' is zero, an early exit is taken and the function * * runs for only 13 cycles (again, including call overhead). * * * * DESCRIPTION * * The fdct_8x8 function implements a Chen FDCT. Output values are * * rounded, providing improved accuracy. Input terms are expected * * to be signed 11Q0 values, producing signed 15Q0 results. (A * * smaller dynamic range may be used on the input, producing a * * correspondingly smaller output range. Typical applications * * include processing signed 9Q0 and unsigned 8Q0 pixel data, * * producing signed 13Q0 or 12Q0 outputs, respectively.) No * * saturation is performed. * */ void fdct_8x8(uint8 *input_data, sint16 *output_data, sint32 num_fdcts) { /* -------------------------------------------------------- */ /* Set up the cosine coefficients c0..c7. */ /* -------------------------------------------------------- */ const uint16 c1 = 0x2C62, c3 = 0x25A0; const uint16 c5 = 0x1924, c7 = 0x08D4; const uint16 c0 = 0xB505, c2 = 0x29CF; const uint16 c6 = 0x1151; /* -------------------------------------------------------- */ /* Intermediate calculations. */ /* -------------------------------------------------------- */ sint16 f0, f1, f2, f3, f4, f5, f6, f7; /* Spatial domain samples. */ sint32 g0, g1, h0, h1, p0, p1; /* Even-half intermediate. */ sint16 r0, r1; /* Even-half intermediate. */ sint32 P0, P1, R0, R1; /* Even-half intermediate. */ sint16 g2, g3, h2, h3; /* Odd-half intermediate. */ sint16 q0a,s0a,q0, q1, s0, s1; /* Odd-half intermediate. */ sint16 Q0, Q1, S0, S1; /* Odd-half intermediate. */ sint32 F0, F1, F2, F3, F4, F5, F6, F7; /* Freq. domain results. */ sint32 F0r,F1r,F2r,F3r, F4r,F5r,F6r,F7r; /* Rounded, truncated results. */ /* -------------------------------------------------------- */ /* Input and output pointers, loop control. */ /* -------------------------------------------------------- */ sint32 i, j; uint8 *dct_i_ptr; sint16 *dct_o_ptr; /* -------------------------------------------------------- */ /* Outer vertical loop -- Process each 8x8 block. */ /* -------------------------------------------------------- */ dct_i_ptr = input_data; dct_o_ptr = output_data; for (i = 0; i < num_fdcts; i++) { /* ---------------------------------------------------- */ /* Perform Vert 1-D FDCT on columns within each block. */ /* ---------------------------------------------------- */ for (j = 0; j < 8; j++) { /* ------------------------------------------------ */ /* Load the spatial-domain samples. */ /* ------------------------------------------------ */ f0 = dct_i_ptr[ 0]; f1 = dct_i_ptr[ 8]; f2 = dct_i_ptr[16]; f3 = dct_i_ptr[24]; f4 = dct_i_ptr[32]; f5 = dct_i_ptr[40]; f6 = dct_i_ptr[48]; f7 = dct_i_ptr[56]; #ifdef ENC_DEBUG printf("vert. i: %x ", f0); printf("%x ", f1); printf("%x ", f2); printf("%x ", f3); printf("%x ", f4); printf("%x ", f5); printf("%x ", f6); printf("%x ", f7); #endif /* ------------------------------------------------ */ /* Stage 1: Separate into even and odd halves. */ /* ------------------------------------------------ */ g0 = f0 + f7; h2 = f0 - f7; g1 = f1 + f6; h3 = f1 - f6; h1 = f2 + f5; g3 = f2 - f5; h0 = f3 + f4; g2 = f3 - f4; /* ------------------------------------------------ */ /* Stage 2 */ /* ------------------------------------------------ */ p0 = g0 + h0; r0 = g0 - h0; p1 = g1 + h1; r1 = g1 - h1; q1 = g2; s1 = h2; s0a= h3 + g3; q0a= h3 - g3; s0 = (s0a * c0 + 0x7FFF) >> 16; q0 = (q0a * c0 + 0x7FFF) >> 16; /* ------------------------------------------------ */ /* Stage 3 */ /* ------------------------------------------------ */ P0 = p0 + p1; P1 = p0 - p1; R1 = c6 * r1 + c2 * r0; R0 = c6 * r0 - c2 * r1; Q1 = q1 + q0; Q0 = q1 - q0; S1 = s1 + s0; S0 = s1 - s0; /* ------------------------------------------------ */ /* Stage 4 */ /* ------------------------------------------------ */ F0 = P0; F4 = P1; F2 = R1; F6 = R0; F1 = c7 * Q1 + c1 * S1; F7 = c7 * S1 - c1 * Q1; F5 = c3 * Q0 + c5 * S0; F3 = c3 * S0 - c5 * Q0; /* ------------------------------------------------ */ /* Store the frequency domain results. */ /* ------------------------------------------------ */ dct_o_ptr[ 0] = (sint16)F0; dct_o_ptr[ 8] = (sint16)(F1 >> 13); dct_o_ptr[16] = (sint16)(F2 >> 13); dct_o_ptr[24] = (sint16)(F3 >> 13); dct_o_ptr[32] = (sint16)F4; dct_o_ptr[40] = (sint16)(F5 >> 13); dct_o_ptr[48] = (sint16)(F6 >> 13); dct_o_ptr[56] = (sint16)(F7 >> 13); #ifdef ENC_DEBUG printf("o: %x ", (uint16)dct_o_ptr[0]); printf("%x ", (uint16)dct_o_ptr[8]); printf("%x ", (uint16)dct_o_ptr[16]); printf("%x ", (uint16)dct_o_ptr[24]); printf("%x ", (uint16)dct_o_ptr[32]); printf("%x ", (uint16)dct_o_ptr[40]); printf("%x ", (uint16)dct_o_ptr[48]); printf("%x\r\n", (uint16)dct_o_ptr[56]); #endif dct_i_ptr++; dct_o_ptr++; } /* ---------------------------------------------------- */ /* Update pointer to next 8x8 FDCT block. */ /* ---------------------------------------------------- */ dct_i_ptr += 56; dct_o_ptr += 56; } /* -------------------------------------------------------- */ /* Perform Horizontal 1-D FDCT on each 8x8 block. */ /* -------------------------------------------------------- */ dct_o_ptr = output_data; for (i = 0; i < 8 * num_fdcts; i++) { /* ---------------------------------------------------- */ /* Load the spatial-domain samples. */ /* ---------------------------------------------------- */ f0 = dct_o_ptr[0]; f1 = dct_o_ptr[1]; f2 = dct_o_ptr[2]; f3 = dct_o_ptr[3]; f4 = dct_o_ptr[4]; f5 = dct_o_ptr[5]; f6 = dct_o_ptr[6]; f7 = dct_o_ptr[7]; #ifdef ENC_DEBUG printf("horis. i: %x ", f0); printf("%x ", f1); printf("%x ", f2); printf("%x ", f3); printf("%x ", f4); printf("%x ", f5); printf("%x ", f6); printf("%x ", f7); #endif /* ---------------------------------------------------- */ /* Stage 1: Separate into even and odd halves. */ /* ---------------------------------------------------- */ g0 = f0 + f7; h2 = f0 - f7; g1 = f1 + f6; h3 = f1 - f6; h1 = f2 + f5; g3 = f2 - f5; h0 = f3 + f4; g2 = f3 - f4; /* ---------------------------------------------------- */ /* Stage 2 */ /* ---------------------------------------------------- */ p0 = g0 + h0; r0 = g0 - h0; p1 = g1 + h1; r1 = g1 - h1; q1 = g2; s1 = h2; s0a= h3 + g3; q0a= h3 - g3; q0 = (q0a * c0 + 0x7FFF) >> 16; s0 = (s0a * c0 + 0x7FFF) >> 16; /* ---------------------------------------------------- */ /* Stage 3 */ /* ---------------------------------------------------- */ P0 = p0 + p1; P1 = p0 - p1; R1 = c6 * r1 + c2 * r0; R0 = c6 * r0 - c2 * r1; Q1 = q1 + q0; Q0 = q1 - q0; S1 = s1 + s0; S0 = s1 - s0; /* ---------------------------------------------------- */ /* Stage 4 */ /* ---------------------------------------------------- */ F0 = P0; F4 = P1; F2 = R1; F6 = R0; F1 = c7 * Q1 + c1 * S1; F7 = c7 * S1 - c1 * Q1; F5 = c3 * Q0 + c5 * S0; F3 = c3 * S0 - c5 * Q0; /* ---------------------------------------------------- */ /* Round and truncate values. */ /* */ /* Note: F0 and F4 have different rounding since no */ /* MPYs have been applied to either term. Also, F0's */ /* rounding is slightly different to offset the */ /* truncation effects from the horizontal pass (which */ /* does not round). */ /* ---------------------------------------------------- */ F0r = (F0 + 0x0006) >> 3; F1r = (F1 + 0x7FFF) >> 16; F2r = (F2 + 0x7FFF) >> 16; F3r = (F3 + 0x7FFF) >> 16; F4r = (F4 + 0x0004) >> 3; F5r = (F5 + 0x7FFF) >> 16; F6r = (F6 + 0x7FFF) >> 16; F7r = (F7 + 0x7FFF) >> 16; /* ---------------------------------------------------- */ /* Store the results */ /* ---------------------------------------------------- */ dct_o_ptr[0] = (sint16)F0r; dct_o_ptr[1] = (sint16)F1r; dct_o_ptr[2] = (sint16)F2r; dct_o_ptr[3] = (sint16)F3r; dct_o_ptr[4] = (sint16)F4r; dct_o_ptr[5] = (sint16)F5r; dct_o_ptr[6] = (sint16)F6r; dct_o_ptr[7] = (sint16)F7r; #ifdef ENC_DEBUG printf("o: %x ", (uint16)dct_o_ptr[0]); printf("%x ", (uint16)dct_o_ptr[1]); printf("%x ", (uint16)dct_o_ptr[2]); printf("%x ", (uint16)dct_o_ptr[3]); printf("%x ", (uint16)dct_o_ptr[4]); printf("%x ", (uint16)dct_o_ptr[5]); printf("%x ", (uint16)dct_o_ptr[6]); printf("%x\r\n", (uint16)dct_o_ptr[7]); #endif /* ---------------------------------------------------- */ /* Update pointer to next FDCT row. */ /* ---------------------------------------------------- */ dct_o_ptr += 8; } #ifdef ENC_DEBUG printf("\r\n"); #endif return; }