00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00028 #include <stdlib.h>
00029 #include <stdio.h>
00030 #include <string.h>
00031 #include <sys/time.h>
00032 #include <unistd.h>
00033 #include <math.h>
00034
00035 #include "dsputil.h"
00036
00037 #include "simple_idct.h"
00038 #include "faandct.h"
00039
00040 #ifndef MAX
00041 #define MAX(a, b) (((a) > (b)) ? (a) : (b))
00042 #endif
00043
00044 #undef printf
00045 #undef random
00046
00047 void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
00048
00049
00050 extern void fdct(DCTELEM *block);
00051 extern void idct(DCTELEM *block);
00052 extern void ff_idct_xvid_mmx(DCTELEM *block);
00053 extern void ff_idct_xvid_mmx2(DCTELEM *block);
00054 extern void init_fdct();
00055
00056 extern void ff_mmx_idct(DCTELEM *data);
00057 extern void ff_mmxext_idct(DCTELEM *data);
00058
00059 extern void odivx_idct_c (short *block);
00060
00061
00062 extern void ff_bfin_idct (DCTELEM *block) ;
00063 extern void ff_bfin_fdct (DCTELEM *block) ;
00064
00065
00066 extern void fdct_altivec (DCTELEM *block);
00067
00068
00069
00070 struct algo {
00071 char *name;
00072 enum { FDCT, IDCT } is_idct;
00073 void (* func) (DCTELEM *block);
00074 void (* ref) (DCTELEM *block);
00075 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM } format;
00076 };
00077
00078 #ifndef FAAN_POSTSCALE
00079 #define FAAN_SCALE SCALE_PERM
00080 #else
00081 #define FAAN_SCALE NO_PERM
00082 #endif
00083
00084 #define DCT_ERROR(name,is_idct,func,ref,form) {name,is_idct,func,ref,form}
00085
00086
00087 struct algo algos[] = {
00088 DCT_ERROR( "REF-DBL", 0, fdct, fdct, NO_PERM),
00089 DCT_ERROR("FAAN", 0, ff_faandct, fdct, FAAN_SCALE),
00090 DCT_ERROR("IJG-AAN-INT", 0, fdct_ifast, fdct, SCALE_PERM),
00091 DCT_ERROR("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, NO_PERM),
00092 DCT_ERROR("REF-DBL", 1, idct, idct, NO_PERM),
00093 DCT_ERROR("INT", 1, j_rev_dct, idct, MMX_PERM),
00094 DCT_ERROR("SIMPLE-C", 1, ff_simple_idct, idct, NO_PERM),
00095
00096 #ifdef HAVE_MMX
00097 DCT_ERROR("MMX", 0, ff_fdct_mmx, fdct, NO_PERM),
00098 #ifdef HAVE_MMX2
00099 DCT_ERROR("MMX2", 0, ff_fdct_mmx2, fdct, NO_PERM),
00100 #endif
00101
00102 #ifdef CONFIG_GPL
00103 DCT_ERROR("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, MMX_PERM),
00104 DCT_ERROR("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, MMX_PERM),
00105 #endif
00106 DCT_ERROR("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM),
00107 DCT_ERROR("XVID-MMX", 1, ff_idct_xvid_mmx, idct, NO_PERM),
00108 DCT_ERROR("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, NO_PERM),
00109 #endif
00110
00111 #ifdef HAVE_ALTIVEC
00112 DCT_ERROR("altivecfdct", 0, fdct_altivec, fdct, NO_PERM),
00113 #endif
00114
00115 #ifdef ARCH_BFIN
00116 DCT_ERROR("BFINfdct", 0, ff_bfin_fdct, fdct, NO_PERM),
00117 DCT_ERROR("BFINidct", 1, ff_bfin_idct, idct, NO_PERM),
00118 #endif
00119
00120 { 0 }
00121 };
00122
00123 #define AANSCALE_BITS 12
00124 static const unsigned short aanscales[64] = {
00125
00126 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
00127 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
00128 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
00129 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
00130 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
00131 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
00132 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
00133 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
00134 };
00135
00136 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
00137
00138 int64_t gettime(void)
00139 {
00140 struct timeval tv;
00141 gettimeofday(&tv,NULL);
00142 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
00143 }
00144
00145 #define NB_ITS 20000
00146 #define NB_ITS_SPEED 50000
00147
00148 static short idct_mmx_perm[64];
00149
00150 static short idct_simple_mmx_perm[64]={
00151 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00152 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00153 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00154 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00155 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00156 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00157 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00158 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00159 };
00160
00161 void idct_mmx_init(void)
00162 {
00163 int i;
00164
00165
00166 for (i = 0; i < 64; i++) {
00167 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
00168
00169 }
00170 }
00171
00172 static DCTELEM block[64] __attribute__ ((aligned (8)));
00173 static DCTELEM block1[64] __attribute__ ((aligned (8)));
00174 static DCTELEM block_org[64] __attribute__ ((aligned (8)));
00175
00176 void dct_error(const char *name, int is_idct,
00177 void (*fdct_func)(DCTELEM *block),
00178 void (*fdct_ref)(DCTELEM *block), int form, int test)
00179 {
00180 int it, i, scale;
00181 int err_inf, v;
00182 int64_t err2, ti, ti1, it1;
00183 int64_t sysErr[64], sysErrMax=0;
00184 int maxout=0;
00185 int blockSumErrMax=0, blockSumErr;
00186
00187 srandom(0);
00188
00189 err_inf = 0;
00190 err2 = 0;
00191 for(i=0; i<64; i++) sysErr[i]=0;
00192 for(it=0;it<NB_ITS;it++) {
00193 for(i=0;i<64;i++)
00194 block1[i] = 0;
00195 switch(test){
00196 case 0:
00197 for(i=0;i<64;i++)
00198 block1[i] = (random() % 512) -256;
00199 if (is_idct){
00200 fdct(block1);
00201
00202 for(i=0;i<64;i++)
00203 block1[i]>>=3;
00204 }
00205 break;
00206 case 1:{
00207 int num= (random()%10)+1;
00208 for(i=0;i<num;i++)
00209 block1[random()%64] = (random() % 512) -256;
00210 }break;
00211 case 2:
00212 block1[0]= (random()%4096)-2048;
00213 block1[63]= (block1[0]&1)^1;
00214 break;
00215 }
00216
00217 #if 0 // simulate mismatch control
00218 { int sum=0;
00219 for(i=0;i<64;i++)
00220 sum+=block1[i];
00221
00222 if((sum&1)==0) block1[63]^=1;
00223 }
00224 #endif
00225
00226 for(i=0; i<64; i++)
00227 block_org[i]= block1[i];
00228
00229 if (form == MMX_PERM) {
00230 for(i=0;i<64;i++)
00231 block[idct_mmx_perm[i]] = block1[i];
00232 } else if (form == MMX_SIMPLE_PERM) {
00233 for(i=0;i<64;i++)
00234 block[idct_simple_mmx_perm[i]] = block1[i];
00235
00236 } else {
00237 for(i=0; i<64; i++)
00238 block[i]= block1[i];
00239 }
00240 #if 0 // simulate mismatch control for tested IDCT but not the ref
00241 { int sum=0;
00242 for(i=0;i<64;i++)
00243 sum+=block[i];
00244
00245 if((sum&1)==0) block[63]^=1;
00246 }
00247 #endif
00248
00249 fdct_func(block);
00250 emms_c();
00251
00252 if (form == SCALE_PERM) {
00253 for(i=0; i<64; i++) {
00254 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
00255 block[i] = (block[i] * scale ) >> AANSCALE_BITS;
00256 }
00257 }
00258
00259 fdct_ref(block1);
00260
00261 blockSumErr=0;
00262 for(i=0;i<64;i++) {
00263 v = abs(block[i] - block1[i]);
00264 if (v > err_inf)
00265 err_inf = v;
00266 err2 += v * v;
00267 sysErr[i] += block[i] - block1[i];
00268 blockSumErr += v;
00269 if( abs(block[i])>maxout) maxout=abs(block[i]);
00270 }
00271 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
00272 #if 0 // print different matrix pairs
00273 if(blockSumErr){
00274 printf("\n");
00275 for(i=0; i<64; i++){
00276 if((i&7)==0) printf("\n");
00277 printf("%4d ", block_org[i]);
00278 }
00279 for(i=0; i<64; i++){
00280 if((i&7)==0) printf("\n");
00281 printf("%4d ", block[i] - block1[i]);
00282 }
00283 }
00284 #endif
00285 }
00286 for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, FFABS(sysErr[i]));
00287
00288 #if 1 // dump systematic errors
00289 for(i=0; i<64; i++){
00290 if(i%8==0) printf("\n");
00291 printf("%5d ", (int)sysErr[i]);
00292 }
00293 printf("\n");
00294 #endif
00295
00296 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
00297 is_idct ? "IDCT" : "DCT",
00298 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
00299 #if 1 //Speed test
00300
00301 for(i=0;i<64;i++)
00302 block1[i] = 0;
00303 switch(test){
00304 case 0:
00305 for(i=0;i<64;i++)
00306 block1[i] = (random() % 512) -256;
00307 if (is_idct){
00308 fdct(block1);
00309
00310 for(i=0;i<64;i++)
00311 block1[i]>>=3;
00312 }
00313 break;
00314 case 1:{
00315 case 2:
00316 block1[0] = (random() % 512) -256;
00317 block1[1] = (random() % 512) -256;
00318 block1[2] = (random() % 512) -256;
00319 block1[3] = (random() % 512) -256;
00320 }break;
00321 }
00322
00323 if (form == MMX_PERM) {
00324 for(i=0;i<64;i++)
00325 block[idct_mmx_perm[i]] = block1[i];
00326 } else if(form == MMX_SIMPLE_PERM) {
00327 for(i=0;i<64;i++)
00328 block[idct_simple_mmx_perm[i]] = block1[i];
00329 } else {
00330 for(i=0; i<64; i++)
00331 block[i]= block1[i];
00332 }
00333
00334 ti = gettime();
00335 it1 = 0;
00336 do {
00337 for(it=0;it<NB_ITS_SPEED;it++) {
00338 for(i=0; i<64; i++)
00339 block[i]= block1[i];
00340
00341
00342 fdct_func(block);
00343 }
00344 it1 += NB_ITS_SPEED;
00345 ti1 = gettime() - ti;
00346 } while (ti1 < 1000000);
00347 emms_c();
00348
00349 printf("%s %s: %0.1f kdct/s\n",
00350 is_idct ? "IDCT" : "DCT",
00351 name, (double)it1 * 1000.0 / (double)ti1);
00352 #endif
00353 }
00354
00355 static uint8_t img_dest[64] __attribute__ ((aligned (8)));
00356 static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
00357
00358 void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
00359 {
00360 static int init;
00361 static double c8[8][8];
00362 static double c4[4][4];
00363 double block1[64], block2[64], block3[64];
00364 double s, sum, v;
00365 int i, j, k;
00366
00367 if (!init) {
00368 init = 1;
00369
00370 for(i=0;i<8;i++) {
00371 sum = 0;
00372 for(j=0;j<8;j++) {
00373 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
00374 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
00375 sum += c8[i][j] * c8[i][j];
00376 }
00377 }
00378
00379 for(i=0;i<4;i++) {
00380 sum = 0;
00381 for(j=0;j<4;j++) {
00382 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
00383 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
00384 sum += c4[i][j] * c4[i][j];
00385 }
00386 }
00387 }
00388
00389
00390 s = 0.5 * sqrt(2.0);
00391 for(i=0;i<4;i++) {
00392 for(j=0;j<8;j++) {
00393 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
00394 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
00395 }
00396 }
00397
00398
00399 for(i=0;i<8;i++) {
00400 for(j=0;j<8;j++) {
00401 sum = 0;
00402 for(k=0;k<8;k++)
00403 sum += c8[k][j] * block1[8*i+k];
00404 block2[8*i+j] = sum;
00405 }
00406 }
00407
00408
00409 for(i=0;i<8;i++) {
00410 for(j=0;j<4;j++) {
00411
00412 sum = 0;
00413 for(k=0;k<4;k++)
00414 sum += c4[k][j] * block2[8*(2*k)+i];
00415 block3[8*(2*j)+i] = sum;
00416
00417
00418 sum = 0;
00419 for(k=0;k<4;k++)
00420 sum += c4[k][j] * block2[8*(2*k+1)+i];
00421 block3[8*(2*j+1)+i] = sum;
00422 }
00423 }
00424
00425
00426 for(i=0;i<8;i++) {
00427 for(j=0;j<8;j++) {
00428 v = block3[8*i+j];
00429 if (v < 0)
00430 v = 0;
00431 else if (v > 255)
00432 v = 255;
00433 dest[i * linesize + j] = (int)rint(v);
00434 }
00435 }
00436 }
00437
00438 void idct248_error(const char *name,
00439 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
00440 {
00441 int it, i, it1, ti, ti1, err_max, v;
00442
00443 srandom(0);
00444
00445
00446
00447 err_max = 0;
00448 for(it=0;it<NB_ITS;it++) {
00449
00450
00451 for(i=0;i<64;i++)
00452 block1[i] = (random() % 256) - 128;
00453 block1[0] += 1024;
00454
00455 for(i=0; i<64; i++)
00456 block[i]= block1[i];
00457 idct248_ref(img_dest1, 8, block);
00458
00459 for(i=0; i<64; i++)
00460 block[i]= block1[i];
00461 idct248_put(img_dest, 8, block);
00462
00463 for(i=0;i<64;i++) {
00464 v = abs((int)img_dest[i] - (int)img_dest1[i]);
00465 if (v == 255)
00466 printf("%d %d\n", img_dest[i], img_dest1[i]);
00467 if (v > err_max)
00468 err_max = v;
00469 }
00470 #if 0
00471 printf("ref=\n");
00472 for(i=0;i<8;i++) {
00473 int j;
00474 for(j=0;j<8;j++) {
00475 printf(" %3d", img_dest1[i*8+j]);
00476 }
00477 printf("\n");
00478 }
00479
00480 printf("out=\n");
00481 for(i=0;i<8;i++) {
00482 int j;
00483 for(j=0;j<8;j++) {
00484 printf(" %3d", img_dest[i*8+j]);
00485 }
00486 printf("\n");
00487 }
00488 #endif
00489 }
00490 printf("%s %s: err_inf=%d\n",
00491 1 ? "IDCT248" : "DCT248",
00492 name, err_max);
00493
00494 ti = gettime();
00495 it1 = 0;
00496 do {
00497 for(it=0;it<NB_ITS_SPEED;it++) {
00498 for(i=0; i<64; i++)
00499 block[i]= block1[i];
00500
00501
00502 idct248_put(img_dest, 8, block);
00503 }
00504 it1 += NB_ITS_SPEED;
00505 ti1 = gettime() - ti;
00506 } while (ti1 < 1000000);
00507 emms_c();
00508
00509 printf("%s %s: %0.1f kdct/s\n",
00510 1 ? "IDCT248" : "DCT248",
00511 name, (double)it1 * 1000.0 / (double)ti1);
00512 }
00513
00514 void help(void)
00515 {
00516 printf("dct-test [-i] [<test-number>]\n"
00517 "test-number 0 -> test with random matrixes\n"
00518 " 1 -> test with random sparse matrixes\n"
00519 " 2 -> do 3. test from mpeg4 std\n"
00520 "-i test IDCT implementations\n"
00521 "-4 test IDCT248 implementations\n");
00522 }
00523
00524 int main(int argc, char **argv)
00525 {
00526 int test_idct = 0, test_248_dct = 0;
00527 int c,i;
00528 int test=1;
00529
00530 init_fdct();
00531 idct_mmx_init();
00532
00533 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
00534 for(i=0;i<MAX_NEG_CROP;i++) {
00535 cropTbl[i] = 0;
00536 cropTbl[i + MAX_NEG_CROP + 256] = 255;
00537 }
00538
00539 for(;;) {
00540 c = getopt(argc, argv, "ih4");
00541 if (c == -1)
00542 break;
00543 switch(c) {
00544 case 'i':
00545 test_idct = 1;
00546 break;
00547 case '4':
00548 test_248_dct = 1;
00549 break;
00550 default :
00551 case 'h':
00552 help();
00553 return 0;
00554 }
00555 }
00556
00557 if(optind <argc) test= atoi(argv[optind]);
00558
00559 printf("ffmpeg DCT/IDCT test\n");
00560
00561 if (test_248_dct) {
00562 idct248_error("SIMPLE-C", ff_simple_idct248_put);
00563 } else {
00564 for (i=0;algos[i].name;i++)
00565 if (algos[i].is_idct == test_idct) {
00566 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
00567 }
00568 }
00569 return 0;
00570 }