00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00028
00029
00030
00031
00032 #include "avcodec.h"
00033 #include "dsputil.h"
00034 #include "simple_idct.h"
00035
00036 #if 0
00037 #define W1 2841
00038 #define W2 2676
00039 #define W3 2408
00040 #define W4 2048
00041 #define W5 1609
00042 #define W6 1108
00043 #define W7 565
00044 #define ROW_SHIFT 8
00045 #define COL_SHIFT 17
00046 #else
00047 #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00048 #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00049 #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00050 #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00051 #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00052 #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00053 #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00054 #define ROW_SHIFT 11
00055 #define COL_SHIFT 20 // 6
00056 #endif
00057
00058 #if defined(ARCH_POWERPC_405)
00059
00060
00061 #define MAC16(rt, ra, rb) \
00062 asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
00063
00064
00065 #define MUL16(rt, ra, rb) \
00066 asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb));
00067
00068 #else
00069
00070
00071 #define MAC16(rt, ra, rb) rt += (ra) * (rb)
00072
00073
00074 #define MUL16(rt, ra, rb) rt = (ra) * (rb)
00075
00076 #endif
00077
00078 static inline void idctRowCondDC (DCTELEM * row)
00079 {
00080 int a0, a1, a2, a3, b0, b1, b2, b3;
00081 #ifdef HAVE_FAST_64BIT
00082 uint64_t temp;
00083 #else
00084 uint32_t temp;
00085 #endif
00086
00087 #ifdef HAVE_FAST_64BIT
00088 #ifdef WORDS_BIGENDIAN
00089 #define ROW0_MASK 0xffff000000000000LL
00090 #else
00091 #define ROW0_MASK 0xffffLL
00092 #endif
00093 if(sizeof(DCTELEM)==2){
00094 if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) |
00095 ((uint64_t *)row)[1]) == 0) {
00096 temp = (row[0] << 3) & 0xffff;
00097 temp += temp << 16;
00098 temp += temp << 32;
00099 ((uint64_t *)row)[0] = temp;
00100 ((uint64_t *)row)[1] = temp;
00101 return;
00102 }
00103 }else{
00104 if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) {
00105 row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3;
00106 return;
00107 }
00108 }
00109 #else
00110 if(sizeof(DCTELEM)==2){
00111 if (!(((uint32_t*)row)[1] |
00112 ((uint32_t*)row)[2] |
00113 ((uint32_t*)row)[3] |
00114 row[1])) {
00115 temp = (row[0] << 3) & 0xffff;
00116 temp += temp << 16;
00117 ((uint32_t*)row)[0]=((uint32_t*)row)[1] =
00118 ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp;
00119 return;
00120 }
00121 }else{
00122 if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) {
00123 row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3;
00124 return;
00125 }
00126 }
00127 #endif
00128
00129 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
00130 a1 = a0;
00131 a2 = a0;
00132 a3 = a0;
00133
00134
00135 a0 += W2 * row[2];
00136 a1 += W6 * row[2];
00137 a2 -= W6 * row[2];
00138 a3 -= W2 * row[2];
00139
00140 MUL16(b0, W1, row[1]);
00141 MAC16(b0, W3, row[3]);
00142 MUL16(b1, W3, row[1]);
00143 MAC16(b1, -W7, row[3]);
00144 MUL16(b2, W5, row[1]);
00145 MAC16(b2, -W1, row[3]);
00146 MUL16(b3, W7, row[1]);
00147 MAC16(b3, -W5, row[3]);
00148
00149 #ifdef HAVE_FAST_64BIT
00150 temp = ((uint64_t*)row)[1];
00151 #else
00152 temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3];
00153 #endif
00154 if (temp != 0) {
00155 a0 += W4*row[4] + W6*row[6];
00156 a1 += - W4*row[4] - W2*row[6];
00157 a2 += - W4*row[4] + W2*row[6];
00158 a3 += W4*row[4] - W6*row[6];
00159
00160 MAC16(b0, W5, row[5]);
00161 MAC16(b0, W7, row[7]);
00162
00163 MAC16(b1, -W1, row[5]);
00164 MAC16(b1, -W5, row[7]);
00165
00166 MAC16(b2, W7, row[5]);
00167 MAC16(b2, W3, row[7]);
00168
00169 MAC16(b3, W3, row[5]);
00170 MAC16(b3, -W1, row[7]);
00171 }
00172
00173 row[0] = (a0 + b0) >> ROW_SHIFT;
00174 row[7] = (a0 - b0) >> ROW_SHIFT;
00175 row[1] = (a1 + b1) >> ROW_SHIFT;
00176 row[6] = (a1 - b1) >> ROW_SHIFT;
00177 row[2] = (a2 + b2) >> ROW_SHIFT;
00178 row[5] = (a2 - b2) >> ROW_SHIFT;
00179 row[3] = (a3 + b3) >> ROW_SHIFT;
00180 row[4] = (a3 - b3) >> ROW_SHIFT;
00181 }
00182
00183 static inline void idctSparseColPut (uint8_t *dest, int line_size,
00184 DCTELEM * col)
00185 {
00186 int a0, a1, a2, a3, b0, b1, b2, b3;
00187 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00188
00189
00190 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
00191 a1 = a0;
00192 a2 = a0;
00193 a3 = a0;
00194
00195 a0 += + W2*col[8*2];
00196 a1 += + W6*col[8*2];
00197 a2 += - W6*col[8*2];
00198 a3 += - W2*col[8*2];
00199
00200 MUL16(b0, W1, col[8*1]);
00201 MUL16(b1, W3, col[8*1]);
00202 MUL16(b2, W5, col[8*1]);
00203 MUL16(b3, W7, col[8*1]);
00204
00205 MAC16(b0, + W3, col[8*3]);
00206 MAC16(b1, - W7, col[8*3]);
00207 MAC16(b2, - W1, col[8*3]);
00208 MAC16(b3, - W5, col[8*3]);
00209
00210 if(col[8*4]){
00211 a0 += + W4*col[8*4];
00212 a1 += - W4*col[8*4];
00213 a2 += - W4*col[8*4];
00214 a3 += + W4*col[8*4];
00215 }
00216
00217 if (col[8*5]) {
00218 MAC16(b0, + W5, col[8*5]);
00219 MAC16(b1, - W1, col[8*5]);
00220 MAC16(b2, + W7, col[8*5]);
00221 MAC16(b3, + W3, col[8*5]);
00222 }
00223
00224 if(col[8*6]){
00225 a0 += + W6*col[8*6];
00226 a1 += - W2*col[8*6];
00227 a2 += + W2*col[8*6];
00228 a3 += - W6*col[8*6];
00229 }
00230
00231 if (col[8*7]) {
00232 MAC16(b0, + W7, col[8*7]);
00233 MAC16(b1, - W5, col[8*7]);
00234 MAC16(b2, + W3, col[8*7]);
00235 MAC16(b3, - W1, col[8*7]);
00236 }
00237
00238 dest[0] = cm[(a0 + b0) >> COL_SHIFT];
00239 dest += line_size;
00240 dest[0] = cm[(a1 + b1) >> COL_SHIFT];
00241 dest += line_size;
00242 dest[0] = cm[(a2 + b2) >> COL_SHIFT];
00243 dest += line_size;
00244 dest[0] = cm[(a3 + b3) >> COL_SHIFT];
00245 dest += line_size;
00246 dest[0] = cm[(a3 - b3) >> COL_SHIFT];
00247 dest += line_size;
00248 dest[0] = cm[(a2 - b2) >> COL_SHIFT];
00249 dest += line_size;
00250 dest[0] = cm[(a1 - b1) >> COL_SHIFT];
00251 dest += line_size;
00252 dest[0] = cm[(a0 - b0) >> COL_SHIFT];
00253 }
00254
00255 static inline void idctSparseColAdd (uint8_t *dest, int line_size,
00256 DCTELEM * col)
00257 {
00258 int a0, a1, a2, a3, b0, b1, b2, b3;
00259 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00260
00261
00262 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
00263 a1 = a0;
00264 a2 = a0;
00265 a3 = a0;
00266
00267 a0 += + W2*col[8*2];
00268 a1 += + W6*col[8*2];
00269 a2 += - W6*col[8*2];
00270 a3 += - W2*col[8*2];
00271
00272 MUL16(b0, W1, col[8*1]);
00273 MUL16(b1, W3, col[8*1]);
00274 MUL16(b2, W5, col[8*1]);
00275 MUL16(b3, W7, col[8*1]);
00276
00277 MAC16(b0, + W3, col[8*3]);
00278 MAC16(b1, - W7, col[8*3]);
00279 MAC16(b2, - W1, col[8*3]);
00280 MAC16(b3, - W5, col[8*3]);
00281
00282 if(col[8*4]){
00283 a0 += + W4*col[8*4];
00284 a1 += - W4*col[8*4];
00285 a2 += - W4*col[8*4];
00286 a3 += + W4*col[8*4];
00287 }
00288
00289 if (col[8*5]) {
00290 MAC16(b0, + W5, col[8*5]);
00291 MAC16(b1, - W1, col[8*5]);
00292 MAC16(b2, + W7, col[8*5]);
00293 MAC16(b3, + W3, col[8*5]);
00294 }
00295
00296 if(col[8*6]){
00297 a0 += + W6*col[8*6];
00298 a1 += - W2*col[8*6];
00299 a2 += + W2*col[8*6];
00300 a3 += - W6*col[8*6];
00301 }
00302
00303 if (col[8*7]) {
00304 MAC16(b0, + W7, col[8*7]);
00305 MAC16(b1, - W5, col[8*7]);
00306 MAC16(b2, + W3, col[8*7]);
00307 MAC16(b3, - W1, col[8*7]);
00308 }
00309
00310 dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)];
00311 dest += line_size;
00312 dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)];
00313 dest += line_size;
00314 dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)];
00315 dest += line_size;
00316 dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)];
00317 dest += line_size;
00318 dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)];
00319 dest += line_size;
00320 dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)];
00321 dest += line_size;
00322 dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)];
00323 dest += line_size;
00324 dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)];
00325 }
00326
00327 static inline void idctSparseCol (DCTELEM * col)
00328 {
00329 int a0, a1, a2, a3, b0, b1, b2, b3;
00330
00331
00332 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
00333 a1 = a0;
00334 a2 = a0;
00335 a3 = a0;
00336
00337 a0 += + W2*col[8*2];
00338 a1 += + W6*col[8*2];
00339 a2 += - W6*col[8*2];
00340 a3 += - W2*col[8*2];
00341
00342 MUL16(b0, W1, col[8*1]);
00343 MUL16(b1, W3, col[8*1]);
00344 MUL16(b2, W5, col[8*1]);
00345 MUL16(b3, W7, col[8*1]);
00346
00347 MAC16(b0, + W3, col[8*3]);
00348 MAC16(b1, - W7, col[8*3]);
00349 MAC16(b2, - W1, col[8*3]);
00350 MAC16(b3, - W5, col[8*3]);
00351
00352 if(col[8*4]){
00353 a0 += + W4*col[8*4];
00354 a1 += - W4*col[8*4];
00355 a2 += - W4*col[8*4];
00356 a3 += + W4*col[8*4];
00357 }
00358
00359 if (col[8*5]) {
00360 MAC16(b0, + W5, col[8*5]);
00361 MAC16(b1, - W1, col[8*5]);
00362 MAC16(b2, + W7, col[8*5]);
00363 MAC16(b3, + W3, col[8*5]);
00364 }
00365
00366 if(col[8*6]){
00367 a0 += + W6*col[8*6];
00368 a1 += - W2*col[8*6];
00369 a2 += + W2*col[8*6];
00370 a3 += - W6*col[8*6];
00371 }
00372
00373 if (col[8*7]) {
00374 MAC16(b0, + W7, col[8*7]);
00375 MAC16(b1, - W5, col[8*7]);
00376 MAC16(b2, + W3, col[8*7]);
00377 MAC16(b3, - W1, col[8*7]);
00378 }
00379
00380 col[0 ] = ((a0 + b0) >> COL_SHIFT);
00381 col[8 ] = ((a1 + b1) >> COL_SHIFT);
00382 col[16] = ((a2 + b2) >> COL_SHIFT);
00383 col[24] = ((a3 + b3) >> COL_SHIFT);
00384 col[32] = ((a3 - b3) >> COL_SHIFT);
00385 col[40] = ((a2 - b2) >> COL_SHIFT);
00386 col[48] = ((a1 - b1) >> COL_SHIFT);
00387 col[56] = ((a0 - b0) >> COL_SHIFT);
00388 }
00389
00390 void ff_simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
00391 {
00392 int i;
00393 for(i=0; i<8; i++)
00394 idctRowCondDC(block + i*8);
00395
00396 for(i=0; i<8; i++)
00397 idctSparseColPut(dest + i, line_size, block + i);
00398 }
00399
00400 void ff_simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
00401 {
00402 int i;
00403 for(i=0; i<8; i++)
00404 idctRowCondDC(block + i*8);
00405
00406 for(i=0; i<8; i++)
00407 idctSparseColAdd(dest + i, line_size, block + i);
00408 }
00409
00410 void ff_simple_idct(DCTELEM *block)
00411 {
00412 int i;
00413 for(i=0; i<8; i++)
00414 idctRowCondDC(block + i*8);
00415
00416 for(i=0; i<8; i++)
00417 idctSparseCol(block + i);
00418 }
00419
00420
00421
00422 #define CN_SHIFT 12
00423 #define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5))
00424 #define C1 C_FIX(0.6532814824)
00425 #define C2 C_FIX(0.2705980501)
00426
00427
00428
00429 #define C_SHIFT (4+1+12)
00430
00431 static inline void idct4col_put(uint8_t *dest, int line_size, const DCTELEM *col)
00432 {
00433 int c0, c1, c2, c3, a0, a1, a2, a3;
00434 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00435
00436 a0 = col[8*0];
00437 a1 = col[8*2];
00438 a2 = col[8*4];
00439 a3 = col[8*6];
00440 c0 = ((a0 + a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
00441 c2 = ((a0 - a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
00442 c1 = a1 * C1 + a3 * C2;
00443 c3 = a1 * C2 - a3 * C1;
00444 dest[0] = cm[(c0 + c1) >> C_SHIFT];
00445 dest += line_size;
00446 dest[0] = cm[(c2 + c3) >> C_SHIFT];
00447 dest += line_size;
00448 dest[0] = cm[(c2 - c3) >> C_SHIFT];
00449 dest += line_size;
00450 dest[0] = cm[(c0 - c1) >> C_SHIFT];
00451 }
00452
00453 #define BF(k) \
00454 {\
00455 int a0, a1;\
00456 a0 = ptr[k];\
00457 a1 = ptr[8 + k];\
00458 ptr[k] = a0 + a1;\
00459 ptr[8 + k] = a0 - a1;\
00460 }
00461
00462
00463
00464
00465
00466
00467
00468 void ff_simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block)
00469 {
00470 int i;
00471 DCTELEM *ptr;
00472
00473
00474 ptr = block;
00475 for(i=0;i<4;i++) {
00476 BF(0);
00477 BF(1);
00478 BF(2);
00479 BF(3);
00480 BF(4);
00481 BF(5);
00482 BF(6);
00483 BF(7);
00484 ptr += 2 * 8;
00485 }
00486
00487
00488 for(i=0; i<8; i++) {
00489 idctRowCondDC(block + i*8);
00490 }
00491
00492
00493 for(i=0;i<8;i++) {
00494 idct4col_put(dest + i, 2 * line_size, block + i);
00495 idct4col_put(dest + line_size + i, 2 * line_size, block + 8 + i);
00496 }
00497 }
00498
00499
00500 #undef CN_SHIFT
00501 #undef C_SHIFT
00502 #undef C_FIX
00503 #undef C1
00504 #undef C2
00505 #define CN_SHIFT 12
00506 #define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5))
00507 #define C1 C_FIX(0.6532814824)
00508 #define C2 C_FIX(0.2705980501)
00509 #define C3 C_FIX(0.5)
00510 #define C_SHIFT (4+1+12)
00511 static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col)
00512 {
00513 int c0, c1, c2, c3, a0, a1, a2, a3;
00514 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00515
00516 a0 = col[8*0];
00517 a1 = col[8*1];
00518 a2 = col[8*2];
00519 a3 = col[8*3];
00520 c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1));
00521 c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1));
00522 c1 = a1 * C1 + a3 * C2;
00523 c3 = a1 * C2 - a3 * C1;
00524 dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)];
00525 dest += line_size;
00526 dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)];
00527 dest += line_size;
00528 dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)];
00529 dest += line_size;
00530 dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)];
00531 }
00532
00533 #define RN_SHIFT 15
00534 #define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5))
00535 #define R1 R_FIX(0.6532814824)
00536 #define R2 R_FIX(0.2705980501)
00537 #define R3 R_FIX(0.5)
00538 #define R_SHIFT 11
00539 static inline void idct4row(DCTELEM *row)
00540 {
00541 int c0, c1, c2, c3, a0, a1, a2, a3;
00542
00543
00544 a0 = row[0];
00545 a1 = row[1];
00546 a2 = row[2];
00547 a3 = row[3];
00548 c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1));
00549 c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1));
00550 c1 = a1 * R1 + a3 * R2;
00551 c3 = a1 * R2 - a3 * R1;
00552 row[0]= (c0 + c1) >> R_SHIFT;
00553 row[1]= (c2 + c3) >> R_SHIFT;
00554 row[2]= (c2 - c3) >> R_SHIFT;
00555 row[3]= (c0 - c1) >> R_SHIFT;
00556 }
00557
00558 void ff_simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block)
00559 {
00560 int i;
00561
00562
00563 for(i=0; i<4; i++) {
00564 idctRowCondDC(block + i*8);
00565 }
00566
00567
00568 for(i=0;i<8;i++) {
00569 idct4col_add(dest + i, line_size, block + i);
00570 }
00571 }
00572
00573 void ff_simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block)
00574 {
00575 int i;
00576
00577
00578 for(i=0; i<8; i++) {
00579 idct4row(block + i*8);
00580 }
00581
00582
00583 for(i=0; i<4; i++){
00584 idctSparseColAdd(dest + i, line_size, block + i);
00585 }
00586 }
00587
00588 void ff_simple_idct44_add(uint8_t *dest, int line_size, DCTELEM *block)
00589 {
00590 int i;
00591
00592
00593 for(i=0; i<4; i++) {
00594 idct4row(block + i*8);
00595 }
00596
00597
00598 for(i=0; i<4; i++){
00599 idct4col_add(dest + i, line_size, block + i);
00600 }
00601 }