00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00030 #include "avcodec.h"
00031 #include "dsputil.h"
00032 #include "mpegvideo.h"
00033 #include "simple_idct.h"
00034 #include "faandct.h"
00035 #include "h263.h"
00036 #include "snow.h"
00037
00038
00039 void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
00040
00041
00042 void vorbis_inverse_coupling(float *mag, float *ang, int blocksize);
00043
00044
00045 void ff_flac_compute_autocorr(const int32_t *data, int len, int lag, double *autoc);
00046
00047 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
00048 uint32_t ff_squareTbl[512] = {0, };
00049
00050 const uint8_t ff_zigzag_direct[64] = {
00051 0, 1, 8, 16, 9, 2, 3, 10,
00052 17, 24, 32, 25, 18, 11, 4, 5,
00053 12, 19, 26, 33, 40, 48, 41, 34,
00054 27, 20, 13, 6, 7, 14, 21, 28,
00055 35, 42, 49, 56, 57, 50, 43, 36,
00056 29, 22, 15, 23, 30, 37, 44, 51,
00057 58, 59, 52, 45, 38, 31, 39, 46,
00058 53, 60, 61, 54, 47, 55, 62, 63
00059 };
00060
00061
00062
00063 const uint8_t ff_zigzag248_direct[64] = {
00064 0, 8, 1, 9, 16, 24, 2, 10,
00065 17, 25, 32, 40, 48, 56, 33, 41,
00066 18, 26, 3, 11, 4, 12, 19, 27,
00067 34, 42, 49, 57, 50, 58, 35, 43,
00068 20, 28, 5, 13, 6, 14, 21, 29,
00069 36, 44, 51, 59, 52, 60, 37, 45,
00070 22, 30, 7, 15, 23, 31, 38, 46,
00071 53, 61, 54, 62, 39, 47, 55, 63,
00072 };
00073
00074
00075 DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16[64]) = {0, };
00076
00077 const uint8_t ff_alternate_horizontal_scan[64] = {
00078 0, 1, 2, 3, 8, 9, 16, 17,
00079 10, 11, 4, 5, 6, 7, 15, 14,
00080 13, 12, 19, 18, 24, 25, 32, 33,
00081 26, 27, 20, 21, 22, 23, 28, 29,
00082 30, 31, 34, 35, 40, 41, 48, 49,
00083 42, 43, 36, 37, 38, 39, 44, 45,
00084 46, 47, 50, 51, 56, 57, 58, 59,
00085 52, 53, 54, 55, 60, 61, 62, 63,
00086 };
00087
00088 const uint8_t ff_alternate_vertical_scan[64] = {
00089 0, 8, 16, 24, 1, 9, 2, 10,
00090 17, 25, 32, 40, 48, 56, 57, 49,
00091 41, 33, 26, 18, 3, 11, 4, 12,
00092 19, 27, 34, 42, 50, 58, 35, 43,
00093 51, 59, 20, 28, 5, 13, 6, 14,
00094 21, 29, 36, 44, 52, 60, 37, 45,
00095 53, 61, 22, 30, 7, 15, 23, 31,
00096 38, 46, 54, 62, 39, 47, 55, 63,
00097 };
00098
00099
00100 const uint32_t ff_inverse[256]={
00101 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
00102 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
00103 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
00104 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
00105 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
00106 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
00107 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
00108 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
00109 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
00110 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
00111 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
00112 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
00113 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
00114 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
00115 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
00116 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
00117 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
00118 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
00119 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
00120 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
00121 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
00122 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
00123 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
00124 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
00125 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
00126 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
00127 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
00128 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
00129 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
00130 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
00131 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
00132 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
00133 };
00134
00135
00136 static const uint8_t simple_mmx_permutation[64]={
00137 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00138 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00139 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00140 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00141 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00142 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00143 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00144 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00145 };
00146
00147 static int pix_sum_c(uint8_t * pix, int line_size)
00148 {
00149 int s, i, j;
00150
00151 s = 0;
00152 for (i = 0; i < 16; i++) {
00153 for (j = 0; j < 16; j += 8) {
00154 s += pix[0];
00155 s += pix[1];
00156 s += pix[2];
00157 s += pix[3];
00158 s += pix[4];
00159 s += pix[5];
00160 s += pix[6];
00161 s += pix[7];
00162 pix += 8;
00163 }
00164 pix += line_size - 16;
00165 }
00166 return s;
00167 }
00168
00169 static int pix_norm1_c(uint8_t * pix, int line_size)
00170 {
00171 int s, i, j;
00172 uint32_t *sq = ff_squareTbl + 256;
00173
00174 s = 0;
00175 for (i = 0; i < 16; i++) {
00176 for (j = 0; j < 16; j += 8) {
00177 #if 0
00178 s += sq[pix[0]];
00179 s += sq[pix[1]];
00180 s += sq[pix[2]];
00181 s += sq[pix[3]];
00182 s += sq[pix[4]];
00183 s += sq[pix[5]];
00184 s += sq[pix[6]];
00185 s += sq[pix[7]];
00186 #else
00187 #if LONG_MAX > 2147483647
00188 register uint64_t x=*(uint64_t*)pix;
00189 s += sq[x&0xff];
00190 s += sq[(x>>8)&0xff];
00191 s += sq[(x>>16)&0xff];
00192 s += sq[(x>>24)&0xff];
00193 s += sq[(x>>32)&0xff];
00194 s += sq[(x>>40)&0xff];
00195 s += sq[(x>>48)&0xff];
00196 s += sq[(x>>56)&0xff];
00197 #else
00198 register uint32_t x=*(uint32_t*)pix;
00199 s += sq[x&0xff];
00200 s += sq[(x>>8)&0xff];
00201 s += sq[(x>>16)&0xff];
00202 s += sq[(x>>24)&0xff];
00203 x=*(uint32_t*)(pix+4);
00204 s += sq[x&0xff];
00205 s += sq[(x>>8)&0xff];
00206 s += sq[(x>>16)&0xff];
00207 s += sq[(x>>24)&0xff];
00208 #endif
00209 #endif
00210 pix += 8;
00211 }
00212 pix += line_size - 16;
00213 }
00214 return s;
00215 }
00216
00217 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
00218 int i;
00219
00220 for(i=0; i+8<=w; i+=8){
00221 dst[i+0]= bswap_32(src[i+0]);
00222 dst[i+1]= bswap_32(src[i+1]);
00223 dst[i+2]= bswap_32(src[i+2]);
00224 dst[i+3]= bswap_32(src[i+3]);
00225 dst[i+4]= bswap_32(src[i+4]);
00226 dst[i+5]= bswap_32(src[i+5]);
00227 dst[i+6]= bswap_32(src[i+6]);
00228 dst[i+7]= bswap_32(src[i+7]);
00229 }
00230 for(;i<w; i++){
00231 dst[i+0]= bswap_32(src[i+0]);
00232 }
00233 }
00234
00235 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00236 {
00237 int s, i;
00238 uint32_t *sq = ff_squareTbl + 256;
00239
00240 s = 0;
00241 for (i = 0; i < h; i++) {
00242 s += sq[pix1[0] - pix2[0]];
00243 s += sq[pix1[1] - pix2[1]];
00244 s += sq[pix1[2] - pix2[2]];
00245 s += sq[pix1[3] - pix2[3]];
00246 pix1 += line_size;
00247 pix2 += line_size;
00248 }
00249 return s;
00250 }
00251
00252 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00253 {
00254 int s, i;
00255 uint32_t *sq = ff_squareTbl + 256;
00256
00257 s = 0;
00258 for (i = 0; i < h; i++) {
00259 s += sq[pix1[0] - pix2[0]];
00260 s += sq[pix1[1] - pix2[1]];
00261 s += sq[pix1[2] - pix2[2]];
00262 s += sq[pix1[3] - pix2[3]];
00263 s += sq[pix1[4] - pix2[4]];
00264 s += sq[pix1[5] - pix2[5]];
00265 s += sq[pix1[6] - pix2[6]];
00266 s += sq[pix1[7] - pix2[7]];
00267 pix1 += line_size;
00268 pix2 += line_size;
00269 }
00270 return s;
00271 }
00272
00273 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
00274 {
00275 int s, i;
00276 uint32_t *sq = ff_squareTbl + 256;
00277
00278 s = 0;
00279 for (i = 0; i < h; i++) {
00280 s += sq[pix1[ 0] - pix2[ 0]];
00281 s += sq[pix1[ 1] - pix2[ 1]];
00282 s += sq[pix1[ 2] - pix2[ 2]];
00283 s += sq[pix1[ 3] - pix2[ 3]];
00284 s += sq[pix1[ 4] - pix2[ 4]];
00285 s += sq[pix1[ 5] - pix2[ 5]];
00286 s += sq[pix1[ 6] - pix2[ 6]];
00287 s += sq[pix1[ 7] - pix2[ 7]];
00288 s += sq[pix1[ 8] - pix2[ 8]];
00289 s += sq[pix1[ 9] - pix2[ 9]];
00290 s += sq[pix1[10] - pix2[10]];
00291 s += sq[pix1[11] - pix2[11]];
00292 s += sq[pix1[12] - pix2[12]];
00293 s += sq[pix1[13] - pix2[13]];
00294 s += sq[pix1[14] - pix2[14]];
00295 s += sq[pix1[15] - pix2[15]];
00296
00297 pix1 += line_size;
00298 pix2 += line_size;
00299 }
00300 return s;
00301 }
00302
00303
00304 #ifdef CONFIG_SNOW_ENCODER //dwt is in snow.c
00305 static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
00306 int s, i, j;
00307 const int dec_count= w==8 ? 3 : 4;
00308 int tmp[32*32];
00309 int level, ori;
00310 static const int scale[2][2][4][4]={
00311 {
00312 {
00313
00314 {268, 239, 239, 213},
00315 { 0, 224, 224, 152},
00316 { 0, 135, 135, 110},
00317 },{
00318
00319 {344, 310, 310, 280},
00320 { 0, 320, 320, 228},
00321 { 0, 175, 175, 136},
00322 { 0, 129, 129, 102},
00323 }
00324 },{
00325 {
00326
00327 {275, 245, 245, 218},
00328 { 0, 230, 230, 156},
00329 { 0, 138, 138, 113},
00330 },{
00331
00332 {352, 317, 317, 286},
00333 { 0, 328, 328, 233},
00334 { 0, 180, 180, 140},
00335 { 0, 132, 132, 105},
00336 }
00337 }
00338 };
00339
00340 for (i = 0; i < h; i++) {
00341 for (j = 0; j < w; j+=4) {
00342 tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
00343 tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
00344 tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
00345 tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
00346 }
00347 pix1 += line_size;
00348 pix2 += line_size;
00349 }
00350
00351 ff_spatial_dwt(tmp, w, h, 32, type, dec_count);
00352
00353 s=0;
00354 assert(w==h);
00355 for(level=0; level<dec_count; level++){
00356 for(ori= level ? 1 : 0; ori<4; ori++){
00357 int size= w>>(dec_count-level);
00358 int sx= (ori&1) ? size : 0;
00359 int stride= 32<<(dec_count-level);
00360 int sy= (ori&2) ? stride>>1 : 0;
00361
00362 for(i=0; i<size; i++){
00363 for(j=0; j<size; j++){
00364 int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
00365 s += FFABS(v);
00366 }
00367 }
00368 }
00369 }
00370 assert(s>=0);
00371 return s>>9;
00372 }
00373
00374 static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
00375 return w_c(v, pix1, pix2, line_size, 8, h, 1);
00376 }
00377
00378 static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
00379 return w_c(v, pix1, pix2, line_size, 8, h, 0);
00380 }
00381
00382 static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
00383 return w_c(v, pix1, pix2, line_size, 16, h, 1);
00384 }
00385
00386 static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
00387 return w_c(v, pix1, pix2, line_size, 16, h, 0);
00388 }
00389
00390 int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
00391 return w_c(v, pix1, pix2, line_size, 32, h, 1);
00392 }
00393
00394 int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
00395 return w_c(v, pix1, pix2, line_size, 32, h, 0);
00396 }
00397 #endif
00398
00399 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
00400 {
00401 int i;
00402
00403
00404 for(i=0;i<8;i++) {
00405 block[0] = pixels[0];
00406 block[1] = pixels[1];
00407 block[2] = pixels[2];
00408 block[3] = pixels[3];
00409 block[4] = pixels[4];
00410 block[5] = pixels[5];
00411 block[6] = pixels[6];
00412 block[7] = pixels[7];
00413 pixels += line_size;
00414 block += 8;
00415 }
00416 }
00417
00418 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
00419 const uint8_t *s2, int stride){
00420 int i;
00421
00422
00423 for(i=0;i<8;i++) {
00424 block[0] = s1[0] - s2[0];
00425 block[1] = s1[1] - s2[1];
00426 block[2] = s1[2] - s2[2];
00427 block[3] = s1[3] - s2[3];
00428 block[4] = s1[4] - s2[4];
00429 block[5] = s1[5] - s2[5];
00430 block[6] = s1[6] - s2[6];
00431 block[7] = s1[7] - s2[7];
00432 s1 += stride;
00433 s2 += stride;
00434 block += 8;
00435 }
00436 }
00437
00438
00439 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00440 int line_size)
00441 {
00442 int i;
00443 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00444
00445
00446 for(i=0;i<8;i++) {
00447 pixels[0] = cm[block[0]];
00448 pixels[1] = cm[block[1]];
00449 pixels[2] = cm[block[2]];
00450 pixels[3] = cm[block[3]];
00451 pixels[4] = cm[block[4]];
00452 pixels[5] = cm[block[5]];
00453 pixels[6] = cm[block[6]];
00454 pixels[7] = cm[block[7]];
00455
00456 pixels += line_size;
00457 block += 8;
00458 }
00459 }
00460
00461 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00462 int line_size)
00463 {
00464 int i;
00465 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00466
00467
00468 for(i=0;i<4;i++) {
00469 pixels[0] = cm[block[0]];
00470 pixels[1] = cm[block[1]];
00471 pixels[2] = cm[block[2]];
00472 pixels[3] = cm[block[3]];
00473
00474 pixels += line_size;
00475 block += 8;
00476 }
00477 }
00478
00479 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00480 int line_size)
00481 {
00482 int i;
00483 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00484
00485
00486 for(i=0;i<2;i++) {
00487 pixels[0] = cm[block[0]];
00488 pixels[1] = cm[block[1]];
00489
00490 pixels += line_size;
00491 block += 8;
00492 }
00493 }
00494
00495 static void put_signed_pixels_clamped_c(const DCTELEM *block,
00496 uint8_t *restrict pixels,
00497 int line_size)
00498 {
00499 int i, j;
00500
00501 for (i = 0; i < 8; i++) {
00502 for (j = 0; j < 8; j++) {
00503 if (*block < -128)
00504 *pixels = 0;
00505 else if (*block > 127)
00506 *pixels = 255;
00507 else
00508 *pixels = (uint8_t)(*block + 128);
00509 block++;
00510 pixels++;
00511 }
00512 pixels += (line_size - 8);
00513 }
00514 }
00515
00516 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00517 int line_size)
00518 {
00519 int i;
00520 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00521
00522
00523 for(i=0;i<8;i++) {
00524 pixels[0] = cm[pixels[0] + block[0]];
00525 pixels[1] = cm[pixels[1] + block[1]];
00526 pixels[2] = cm[pixels[2] + block[2]];
00527 pixels[3] = cm[pixels[3] + block[3]];
00528 pixels[4] = cm[pixels[4] + block[4]];
00529 pixels[5] = cm[pixels[5] + block[5]];
00530 pixels[6] = cm[pixels[6] + block[6]];
00531 pixels[7] = cm[pixels[7] + block[7]];
00532 pixels += line_size;
00533 block += 8;
00534 }
00535 }
00536
00537 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00538 int line_size)
00539 {
00540 int i;
00541 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00542
00543
00544 for(i=0;i<4;i++) {
00545 pixels[0] = cm[pixels[0] + block[0]];
00546 pixels[1] = cm[pixels[1] + block[1]];
00547 pixels[2] = cm[pixels[2] + block[2]];
00548 pixels[3] = cm[pixels[3] + block[3]];
00549 pixels += line_size;
00550 block += 8;
00551 }
00552 }
00553
00554 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00555 int line_size)
00556 {
00557 int i;
00558 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00559
00560
00561 for(i=0;i<2;i++) {
00562 pixels[0] = cm[pixels[0] + block[0]];
00563 pixels[1] = cm[pixels[1] + block[1]];
00564 pixels += line_size;
00565 block += 8;
00566 }
00567 }
00568
00569 static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
00570 {
00571 int i;
00572 for(i=0;i<8;i++) {
00573 pixels[0] += block[0];
00574 pixels[1] += block[1];
00575 pixels[2] += block[2];
00576 pixels[3] += block[3];
00577 pixels[4] += block[4];
00578 pixels[5] += block[5];
00579 pixels[6] += block[6];
00580 pixels[7] += block[7];
00581 pixels += line_size;
00582 block += 8;
00583 }
00584 }
00585
00586 static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
00587 {
00588 int i;
00589 for(i=0;i<4;i++) {
00590 pixels[0] += block[0];
00591 pixels[1] += block[1];
00592 pixels[2] += block[2];
00593 pixels[3] += block[3];
00594 pixels += line_size;
00595 block += 4;
00596 }
00597 }
00598
00599 static int sum_abs_dctelem_c(DCTELEM *block)
00600 {
00601 int sum=0, i;
00602 for(i=0; i<64; i++)
00603 sum+= FFABS(block[i]);
00604 return sum;
00605 }
00606
00607 #if 0
00608
00609 #define PIXOP2(OPNAME, OP) \
00610 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00611 {\
00612 int i;\
00613 for(i=0; i<h; i++){\
00614 OP(*((uint64_t*)block), AV_RN64(pixels));\
00615 pixels+=line_size;\
00616 block +=line_size;\
00617 }\
00618 }\
00619 \
00620 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00621 {\
00622 int i;\
00623 for(i=0; i<h; i++){\
00624 const uint64_t a= AV_RN64(pixels );\
00625 const uint64_t b= AV_RN64(pixels+1);\
00626 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00627 pixels+=line_size;\
00628 block +=line_size;\
00629 }\
00630 }\
00631 \
00632 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00633 {\
00634 int i;\
00635 for(i=0; i<h; i++){\
00636 const uint64_t a= AV_RN64(pixels );\
00637 const uint64_t b= AV_RN64(pixels+1);\
00638 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00639 pixels+=line_size;\
00640 block +=line_size;\
00641 }\
00642 }\
00643 \
00644 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00645 {\
00646 int i;\
00647 for(i=0; i<h; i++){\
00648 const uint64_t a= AV_RN64(pixels );\
00649 const uint64_t b= AV_RN64(pixels+line_size);\
00650 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00651 pixels+=line_size;\
00652 block +=line_size;\
00653 }\
00654 }\
00655 \
00656 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00657 {\
00658 int i;\
00659 for(i=0; i<h; i++){\
00660 const uint64_t a= AV_RN64(pixels );\
00661 const uint64_t b= AV_RN64(pixels+line_size);\
00662 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00663 pixels+=line_size;\
00664 block +=line_size;\
00665 }\
00666 }\
00667 \
00668 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00669 {\
00670 int i;\
00671 const uint64_t a= AV_RN64(pixels );\
00672 const uint64_t b= AV_RN64(pixels+1);\
00673 uint64_t l0= (a&0x0303030303030303ULL)\
00674 + (b&0x0303030303030303ULL)\
00675 + 0x0202020202020202ULL;\
00676 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00677 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00678 uint64_t l1,h1;\
00679 \
00680 pixels+=line_size;\
00681 for(i=0; i<h; i+=2){\
00682 uint64_t a= AV_RN64(pixels );\
00683 uint64_t b= AV_RN64(pixels+1);\
00684 l1= (a&0x0303030303030303ULL)\
00685 + (b&0x0303030303030303ULL);\
00686 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00687 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00688 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00689 pixels+=line_size;\
00690 block +=line_size;\
00691 a= AV_RN64(pixels );\
00692 b= AV_RN64(pixels+1);\
00693 l0= (a&0x0303030303030303ULL)\
00694 + (b&0x0303030303030303ULL)\
00695 + 0x0202020202020202ULL;\
00696 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00697 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00698 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00699 pixels+=line_size;\
00700 block +=line_size;\
00701 }\
00702 }\
00703 \
00704 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00705 {\
00706 int i;\
00707 const uint64_t a= AV_RN64(pixels );\
00708 const uint64_t b= AV_RN64(pixels+1);\
00709 uint64_t l0= (a&0x0303030303030303ULL)\
00710 + (b&0x0303030303030303ULL)\
00711 + 0x0101010101010101ULL;\
00712 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00713 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00714 uint64_t l1,h1;\
00715 \
00716 pixels+=line_size;\
00717 for(i=0; i<h; i+=2){\
00718 uint64_t a= AV_RN64(pixels );\
00719 uint64_t b= AV_RN64(pixels+1);\
00720 l1= (a&0x0303030303030303ULL)\
00721 + (b&0x0303030303030303ULL);\
00722 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00723 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00724 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00725 pixels+=line_size;\
00726 block +=line_size;\
00727 a= AV_RN64(pixels );\
00728 b= AV_RN64(pixels+1);\
00729 l0= (a&0x0303030303030303ULL)\
00730 + (b&0x0303030303030303ULL)\
00731 + 0x0101010101010101ULL;\
00732 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00733 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00734 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00735 pixels+=line_size;\
00736 block +=line_size;\
00737 }\
00738 }\
00739 \
00740 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
00741 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
00742 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
00743 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
00744 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
00745 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
00746 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
00747
00748 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
00749 #else // 64 bit variant
00750
00751 #define PIXOP2(OPNAME, OP) \
00752 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00753 int i;\
00754 for(i=0; i<h; i++){\
00755 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\
00756 pixels+=line_size;\
00757 block +=line_size;\
00758 }\
00759 }\
00760 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00761 int i;\
00762 for(i=0; i<h; i++){\
00763 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
00764 pixels+=line_size;\
00765 block +=line_size;\
00766 }\
00767 }\
00768 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00769 int i;\
00770 for(i=0; i<h; i++){\
00771 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
00772 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
00773 pixels+=line_size;\
00774 block +=line_size;\
00775 }\
00776 }\
00777 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00778 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
00779 }\
00780 \
00781 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00782 int src_stride1, int src_stride2, int h){\
00783 int i;\
00784 for(i=0; i<h; i++){\
00785 uint32_t a,b;\
00786 a= AV_RN32(&src1[i*src_stride1 ]);\
00787 b= AV_RN32(&src2[i*src_stride2 ]);\
00788 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
00789 a= AV_RN32(&src1[i*src_stride1+4]);\
00790 b= AV_RN32(&src2[i*src_stride2+4]);\
00791 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
00792 }\
00793 }\
00794 \
00795 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00796 int src_stride1, int src_stride2, int h){\
00797 int i;\
00798 for(i=0; i<h; i++){\
00799 uint32_t a,b;\
00800 a= AV_RN32(&src1[i*src_stride1 ]);\
00801 b= AV_RN32(&src2[i*src_stride2 ]);\
00802 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00803 a= AV_RN32(&src1[i*src_stride1+4]);\
00804 b= AV_RN32(&src2[i*src_stride2+4]);\
00805 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
00806 }\
00807 }\
00808 \
00809 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00810 int src_stride1, int src_stride2, int h){\
00811 int i;\
00812 for(i=0; i<h; i++){\
00813 uint32_t a,b;\
00814 a= AV_RN32(&src1[i*src_stride1 ]);\
00815 b= AV_RN32(&src2[i*src_stride2 ]);\
00816 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00817 }\
00818 }\
00819 \
00820 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00821 int src_stride1, int src_stride2, int h){\
00822 int i;\
00823 for(i=0; i<h; i++){\
00824 uint32_t a,b;\
00825 a= AV_RN16(&src1[i*src_stride1 ]);\
00826 b= AV_RN16(&src2[i*src_stride2 ]);\
00827 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00828 }\
00829 }\
00830 \
00831 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00832 int src_stride1, int src_stride2, int h){\
00833 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
00834 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
00835 }\
00836 \
00837 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00838 int src_stride1, int src_stride2, int h){\
00839 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
00840 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
00841 }\
00842 \
00843 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00844 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00845 }\
00846 \
00847 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00848 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00849 }\
00850 \
00851 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00852 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00853 }\
00854 \
00855 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00856 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00857 }\
00858 \
00859 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
00860 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00861 int i;\
00862 for(i=0; i<h; i++){\
00863 uint32_t a, b, c, d, l0, l1, h0, h1;\
00864 a= AV_RN32(&src1[i*src_stride1]);\
00865 b= AV_RN32(&src2[i*src_stride2]);\
00866 c= AV_RN32(&src3[i*src_stride3]);\
00867 d= AV_RN32(&src4[i*src_stride4]);\
00868 l0= (a&0x03030303UL)\
00869 + (b&0x03030303UL)\
00870 + 0x02020202UL;\
00871 h0= ((a&0xFCFCFCFCUL)>>2)\
00872 + ((b&0xFCFCFCFCUL)>>2);\
00873 l1= (c&0x03030303UL)\
00874 + (d&0x03030303UL);\
00875 h1= ((c&0xFCFCFCFCUL)>>2)\
00876 + ((d&0xFCFCFCFCUL)>>2);\
00877 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00878 a= AV_RN32(&src1[i*src_stride1+4]);\
00879 b= AV_RN32(&src2[i*src_stride2+4]);\
00880 c= AV_RN32(&src3[i*src_stride3+4]);\
00881 d= AV_RN32(&src4[i*src_stride4+4]);\
00882 l0= (a&0x03030303UL)\
00883 + (b&0x03030303UL)\
00884 + 0x02020202UL;\
00885 h0= ((a&0xFCFCFCFCUL)>>2)\
00886 + ((b&0xFCFCFCFCUL)>>2);\
00887 l1= (c&0x03030303UL)\
00888 + (d&0x03030303UL);\
00889 h1= ((c&0xFCFCFCFCUL)>>2)\
00890 + ((d&0xFCFCFCFCUL)>>2);\
00891 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00892 }\
00893 }\
00894 \
00895 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00896 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00897 }\
00898 \
00899 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00900 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00901 }\
00902 \
00903 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00904 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00905 }\
00906 \
00907 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00908 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00909 }\
00910 \
00911 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
00912 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00913 int i;\
00914 for(i=0; i<h; i++){\
00915 uint32_t a, b, c, d, l0, l1, h0, h1;\
00916 a= AV_RN32(&src1[i*src_stride1]);\
00917 b= AV_RN32(&src2[i*src_stride2]);\
00918 c= AV_RN32(&src3[i*src_stride3]);\
00919 d= AV_RN32(&src4[i*src_stride4]);\
00920 l0= (a&0x03030303UL)\
00921 + (b&0x03030303UL)\
00922 + 0x01010101UL;\
00923 h0= ((a&0xFCFCFCFCUL)>>2)\
00924 + ((b&0xFCFCFCFCUL)>>2);\
00925 l1= (c&0x03030303UL)\
00926 + (d&0x03030303UL);\
00927 h1= ((c&0xFCFCFCFCUL)>>2)\
00928 + ((d&0xFCFCFCFCUL)>>2);\
00929 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00930 a= AV_RN32(&src1[i*src_stride1+4]);\
00931 b= AV_RN32(&src2[i*src_stride2+4]);\
00932 c= AV_RN32(&src3[i*src_stride3+4]);\
00933 d= AV_RN32(&src4[i*src_stride4+4]);\
00934 l0= (a&0x03030303UL)\
00935 + (b&0x03030303UL)\
00936 + 0x01010101UL;\
00937 h0= ((a&0xFCFCFCFCUL)>>2)\
00938 + ((b&0xFCFCFCFCUL)>>2);\
00939 l1= (c&0x03030303UL)\
00940 + (d&0x03030303UL);\
00941 h1= ((c&0xFCFCFCFCUL)>>2)\
00942 + ((d&0xFCFCFCFCUL)>>2);\
00943 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00944 }\
00945 }\
00946 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
00947 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00948 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
00949 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
00950 }\
00951 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
00952 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00953 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
00954 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
00955 }\
00956 \
00957 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00958 {\
00959 int i, a0, b0, a1, b1;\
00960 a0= pixels[0];\
00961 b0= pixels[1] + 2;\
00962 a0 += b0;\
00963 b0 += pixels[2];\
00964 \
00965 pixels+=line_size;\
00966 for(i=0; i<h; i+=2){\
00967 a1= pixels[0];\
00968 b1= pixels[1];\
00969 a1 += b1;\
00970 b1 += pixels[2];\
00971 \
00972 block[0]= (a1+a0)>>2; \
00973 block[1]= (b1+b0)>>2;\
00974 \
00975 pixels+=line_size;\
00976 block +=line_size;\
00977 \
00978 a0= pixels[0];\
00979 b0= pixels[1] + 2;\
00980 a0 += b0;\
00981 b0 += pixels[2];\
00982 \
00983 block[0]= (a1+a0)>>2;\
00984 block[1]= (b1+b0)>>2;\
00985 pixels+=line_size;\
00986 block +=line_size;\
00987 }\
00988 }\
00989 \
00990 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00991 {\
00992 int i;\
00993 const uint32_t a= AV_RN32(pixels );\
00994 const uint32_t b= AV_RN32(pixels+1);\
00995 uint32_t l0= (a&0x03030303UL)\
00996 + (b&0x03030303UL)\
00997 + 0x02020202UL;\
00998 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
00999 + ((b&0xFCFCFCFCUL)>>2);\
01000 uint32_t l1,h1;\
01001 \
01002 pixels+=line_size;\
01003 for(i=0; i<h; i+=2){\
01004 uint32_t a= AV_RN32(pixels );\
01005 uint32_t b= AV_RN32(pixels+1);\
01006 l1= (a&0x03030303UL)\
01007 + (b&0x03030303UL);\
01008 h1= ((a&0xFCFCFCFCUL)>>2)\
01009 + ((b&0xFCFCFCFCUL)>>2);\
01010 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01011 pixels+=line_size;\
01012 block +=line_size;\
01013 a= AV_RN32(pixels );\
01014 b= AV_RN32(pixels+1);\
01015 l0= (a&0x03030303UL)\
01016 + (b&0x03030303UL)\
01017 + 0x02020202UL;\
01018 h0= ((a&0xFCFCFCFCUL)>>2)\
01019 + ((b&0xFCFCFCFCUL)>>2);\
01020 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01021 pixels+=line_size;\
01022 block +=line_size;\
01023 }\
01024 }\
01025 \
01026 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01027 {\
01028 int j;\
01029 for(j=0; j<2; j++){\
01030 int i;\
01031 const uint32_t a= AV_RN32(pixels );\
01032 const uint32_t b= AV_RN32(pixels+1);\
01033 uint32_t l0= (a&0x03030303UL)\
01034 + (b&0x03030303UL)\
01035 + 0x02020202UL;\
01036 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
01037 + ((b&0xFCFCFCFCUL)>>2);\
01038 uint32_t l1,h1;\
01039 \
01040 pixels+=line_size;\
01041 for(i=0; i<h; i+=2){\
01042 uint32_t a= AV_RN32(pixels );\
01043 uint32_t b= AV_RN32(pixels+1);\
01044 l1= (a&0x03030303UL)\
01045 + (b&0x03030303UL);\
01046 h1= ((a&0xFCFCFCFCUL)>>2)\
01047 + ((b&0xFCFCFCFCUL)>>2);\
01048 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01049 pixels+=line_size;\
01050 block +=line_size;\
01051 a= AV_RN32(pixels );\
01052 b= AV_RN32(pixels+1);\
01053 l0= (a&0x03030303UL)\
01054 + (b&0x03030303UL)\
01055 + 0x02020202UL;\
01056 h0= ((a&0xFCFCFCFCUL)>>2)\
01057 + ((b&0xFCFCFCFCUL)>>2);\
01058 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01059 pixels+=line_size;\
01060 block +=line_size;\
01061 }\
01062 pixels+=4-line_size*(h+1);\
01063 block +=4-line_size*h;\
01064 }\
01065 }\
01066 \
01067 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01068 {\
01069 int j;\
01070 for(j=0; j<2; j++){\
01071 int i;\
01072 const uint32_t a= AV_RN32(pixels );\
01073 const uint32_t b= AV_RN32(pixels+1);\
01074 uint32_t l0= (a&0x03030303UL)\
01075 + (b&0x03030303UL)\
01076 + 0x01010101UL;\
01077 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
01078 + ((b&0xFCFCFCFCUL)>>2);\
01079 uint32_t l1,h1;\
01080 \
01081 pixels+=line_size;\
01082 for(i=0; i<h; i+=2){\
01083 uint32_t a= AV_RN32(pixels );\
01084 uint32_t b= AV_RN32(pixels+1);\
01085 l1= (a&0x03030303UL)\
01086 + (b&0x03030303UL);\
01087 h1= ((a&0xFCFCFCFCUL)>>2)\
01088 + ((b&0xFCFCFCFCUL)>>2);\
01089 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01090 pixels+=line_size;\
01091 block +=line_size;\
01092 a= AV_RN32(pixels );\
01093 b= AV_RN32(pixels+1);\
01094 l0= (a&0x03030303UL)\
01095 + (b&0x03030303UL)\
01096 + 0x01010101UL;\
01097 h0= ((a&0xFCFCFCFCUL)>>2)\
01098 + ((b&0xFCFCFCFCUL)>>2);\
01099 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01100 pixels+=line_size;\
01101 block +=line_size;\
01102 }\
01103 pixels+=4-line_size*(h+1);\
01104 block +=4-line_size*h;\
01105 }\
01106 }\
01107 \
01108 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
01109 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
01110 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
01111 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
01112 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
01113 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
01114 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
01115 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
01116
01117 #define op_avg(a, b) a = rnd_avg32(a, b)
01118 #endif
01119 #define op_put(a, b) a = b
01120
01121 PIXOP2(avg, op_avg)
01122 PIXOP2(put, op_put)
01123 #undef op_avg
01124 #undef op_put
01125
01126 #define avg2(a,b) ((a+b+1)>>1)
01127 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
01128
01129 static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
01130 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
01131 }
01132
01133 static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
01134 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
01135 }
01136
01137 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
01138 {
01139 const int A=(16-x16)*(16-y16);
01140 const int B=( x16)*(16-y16);
01141 const int C=(16-x16)*( y16);
01142 const int D=( x16)*( y16);
01143 int i;
01144
01145 for(i=0; i<h; i++)
01146 {
01147 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
01148 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
01149 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
01150 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
01151 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
01152 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
01153 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
01154 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
01155 dst+= stride;
01156 src+= stride;
01157 }
01158 }
01159
01160 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
01161 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
01162 {
01163 int y, vx, vy;
01164 const int s= 1<<shift;
01165
01166 width--;
01167 height--;
01168
01169 for(y=0; y<h; y++){
01170 int x;
01171
01172 vx= ox;
01173 vy= oy;
01174 for(x=0; x<8; x++){
01175 int src_x, src_y, frac_x, frac_y, index;
01176
01177 src_x= vx>>16;
01178 src_y= vy>>16;
01179 frac_x= src_x&(s-1);
01180 frac_y= src_y&(s-1);
01181 src_x>>=shift;
01182 src_y>>=shift;
01183
01184 if((unsigned)src_x < width){
01185 if((unsigned)src_y < height){
01186 index= src_x + src_y*stride;
01187 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
01188 + src[index +1]* frac_x )*(s-frac_y)
01189 + ( src[index+stride ]*(s-frac_x)
01190 + src[index+stride+1]* frac_x )* frac_y
01191 + r)>>(shift*2);
01192 }else{
01193 index= src_x + av_clip(src_y, 0, height)*stride;
01194 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
01195 + src[index +1]* frac_x )*s
01196 + r)>>(shift*2);
01197 }
01198 }else{
01199 if((unsigned)src_y < height){
01200 index= av_clip(src_x, 0, width) + src_y*stride;
01201 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
01202 + src[index+stride ]* frac_y )*s
01203 + r)>>(shift*2);
01204 }else{
01205 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
01206 dst[y*stride + x]= src[index ];
01207 }
01208 }
01209
01210 vx+= dxx;
01211 vy+= dyx;
01212 }
01213 ox += dxy;
01214 oy += dyy;
01215 }
01216 }
01217
01218 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01219 switch(width){
01220 case 2: put_pixels2_c (dst, src, stride, height); break;
01221 case 4: put_pixels4_c (dst, src, stride, height); break;
01222 case 8: put_pixels8_c (dst, src, stride, height); break;
01223 case 16:put_pixels16_c(dst, src, stride, height); break;
01224 }
01225 }
01226
01227 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01228 int i,j;
01229 for (i=0; i < height; i++) {
01230 for (j=0; j < width; j++) {
01231 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
01232 }
01233 src += stride;
01234 dst += stride;
01235 }
01236 }
01237
01238 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01239 int i,j;
01240 for (i=0; i < height; i++) {
01241 for (j=0; j < width; j++) {
01242 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
01243 }
01244 src += stride;
01245 dst += stride;
01246 }
01247 }
01248
01249 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01250 int i,j;
01251 for (i=0; i < height; i++) {
01252 for (j=0; j < width; j++) {
01253 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
01254 }
01255 src += stride;
01256 dst += stride;
01257 }
01258 }
01259
01260 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01261 int i,j;
01262 for (i=0; i < height; i++) {
01263 for (j=0; j < width; j++) {
01264 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
01265 }
01266 src += stride;
01267 dst += stride;
01268 }
01269 }
01270
01271 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01272 int i,j;
01273 for (i=0; i < height; i++) {
01274 for (j=0; j < width; j++) {
01275 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
01276 }
01277 src += stride;
01278 dst += stride;
01279 }
01280 }
01281
01282 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01283 int i,j;
01284 for (i=0; i < height; i++) {
01285 for (j=0; j < width; j++) {
01286 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
01287 }
01288 src += stride;
01289 dst += stride;
01290 }
01291 }
01292
01293 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01294 int i,j;
01295 for (i=0; i < height; i++) {
01296 for (j=0; j < width; j++) {
01297 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
01298 }
01299 src += stride;
01300 dst += stride;
01301 }
01302 }
01303
01304 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01305 int i,j;
01306 for (i=0; i < height; i++) {
01307 for (j=0; j < width; j++) {
01308 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
01309 }
01310 src += stride;
01311 dst += stride;
01312 }
01313 }
01314
01315 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01316 switch(width){
01317 case 2: avg_pixels2_c (dst, src, stride, height); break;
01318 case 4: avg_pixels4_c (dst, src, stride, height); break;
01319 case 8: avg_pixels8_c (dst, src, stride, height); break;
01320 case 16:avg_pixels16_c(dst, src, stride, height); break;
01321 }
01322 }
01323
01324 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01325 int i,j;
01326 for (i=0; i < height; i++) {
01327 for (j=0; j < width; j++) {
01328 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
01329 }
01330 src += stride;
01331 dst += stride;
01332 }
01333 }
01334
01335 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01336 int i,j;
01337 for (i=0; i < height; i++) {
01338 for (j=0; j < width; j++) {
01339 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
01340 }
01341 src += stride;
01342 dst += stride;
01343 }
01344 }
01345
01346 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01347 int i,j;
01348 for (i=0; i < height; i++) {
01349 for (j=0; j < width; j++) {
01350 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
01351 }
01352 src += stride;
01353 dst += stride;
01354 }
01355 }
01356
01357 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01358 int i,j;
01359 for (i=0; i < height; i++) {
01360 for (j=0; j < width; j++) {
01361 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01362 }
01363 src += stride;
01364 dst += stride;
01365 }
01366 }
01367
01368 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01369 int i,j;
01370 for (i=0; i < height; i++) {
01371 for (j=0; j < width; j++) {
01372 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01373 }
01374 src += stride;
01375 dst += stride;
01376 }
01377 }
01378
01379 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01380 int i,j;
01381 for (i=0; i < height; i++) {
01382 for (j=0; j < width; j++) {
01383 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
01384 }
01385 src += stride;
01386 dst += stride;
01387 }
01388 }
01389
01390 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01391 int i,j;
01392 for (i=0; i < height; i++) {
01393 for (j=0; j < width; j++) {
01394 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01395 }
01396 src += stride;
01397 dst += stride;
01398 }
01399 }
01400
01401 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01402 int i,j;
01403 for (i=0; i < height; i++) {
01404 for (j=0; j < width; j++) {
01405 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01406 }
01407 src += stride;
01408 dst += stride;
01409 }
01410 }
01411 #if 0
01412 #define TPEL_WIDTH(width)\
01413 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01414 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
01415 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01416 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
01417 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01418 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
01419 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01420 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
01421 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01422 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
01423 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01424 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
01425 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01426 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
01427 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01428 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
01429 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01430 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
01431 #endif
01432
01433 #define H264_CHROMA_MC(OPNAME, OP)\
01434 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01435 const int A=(8-x)*(8-y);\
01436 const int B=( x)*(8-y);\
01437 const int C=(8-x)*( y);\
01438 const int D=( x)*( y);\
01439 int i;\
01440 \
01441 assert(x<8 && y<8 && x>=0 && y>=0);\
01442 \
01443 if(D){\
01444 for(i=0; i<h; i++){\
01445 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01446 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01447 dst+= stride;\
01448 src+= stride;\
01449 }\
01450 }else{\
01451 const int E= B+C;\
01452 const int step= C ? stride : 1;\
01453 for(i=0; i<h; i++){\
01454 OP(dst[0], (A*src[0] + E*src[step+0]));\
01455 OP(dst[1], (A*src[1] + E*src[step+1]));\
01456 dst+= stride;\
01457 src+= stride;\
01458 }\
01459 }\
01460 }\
01461 \
01462 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01463 const int A=(8-x)*(8-y);\
01464 const int B=( x)*(8-y);\
01465 const int C=(8-x)*( y);\
01466 const int D=( x)*( y);\
01467 int i;\
01468 \
01469 assert(x<8 && y<8 && x>=0 && y>=0);\
01470 \
01471 if(D){\
01472 for(i=0; i<h; i++){\
01473 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01474 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01475 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
01476 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
01477 dst+= stride;\
01478 src+= stride;\
01479 }\
01480 }else{\
01481 const int E= B+C;\
01482 const int step= C ? stride : 1;\
01483 for(i=0; i<h; i++){\
01484 OP(dst[0], (A*src[0] + E*src[step+0]));\
01485 OP(dst[1], (A*src[1] + E*src[step+1]));\
01486 OP(dst[2], (A*src[2] + E*src[step+2]));\
01487 OP(dst[3], (A*src[3] + E*src[step+3]));\
01488 dst+= stride;\
01489 src+= stride;\
01490 }\
01491 }\
01492 }\
01493 \
01494 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01495 const int A=(8-x)*(8-y);\
01496 const int B=( x)*(8-y);\
01497 const int C=(8-x)*( y);\
01498 const int D=( x)*( y);\
01499 int i;\
01500 \
01501 assert(x<8 && y<8 && x>=0 && y>=0);\
01502 \
01503 if(D){\
01504 for(i=0; i<h; i++){\
01505 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01506 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01507 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
01508 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
01509 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
01510 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
01511 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
01512 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
01513 dst+= stride;\
01514 src+= stride;\
01515 }\
01516 }else{\
01517 const int E= B+C;\
01518 const int step= C ? stride : 1;\
01519 for(i=0; i<h; i++){\
01520 OP(dst[0], (A*src[0] + E*src[step+0]));\
01521 OP(dst[1], (A*src[1] + E*src[step+1]));\
01522 OP(dst[2], (A*src[2] + E*src[step+2]));\
01523 OP(dst[3], (A*src[3] + E*src[step+3]));\
01524 OP(dst[4], (A*src[4] + E*src[step+4]));\
01525 OP(dst[5], (A*src[5] + E*src[step+5]));\
01526 OP(dst[6], (A*src[6] + E*src[step+6]));\
01527 OP(dst[7], (A*src[7] + E*src[step+7]));\
01528 dst+= stride;\
01529 src+= stride;\
01530 }\
01531 }\
01532 }
01533
01534 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
01535 #define op_put(a, b) a = (((b) + 32)>>6)
01536
01537 H264_CHROMA_MC(put_ , op_put)
01538 H264_CHROMA_MC(avg_ , op_avg)
01539 #undef op_avg
01540 #undef op_put
01541
01542 static void put_no_rnd_h264_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){
01543 const int A=(8-x)*(8-y);
01544 const int B=( x)*(8-y);
01545 const int C=(8-x)*( y);
01546 const int D=( x)*( y);
01547 int i;
01548
01549 assert(x<8 && y<8 && x>=0 && y>=0);
01550
01551 for(i=0; i<h; i++)
01552 {
01553 dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
01554 dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
01555 dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
01556 dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
01557 dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
01558 dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
01559 dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
01560 dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
01561 dst+= stride;
01562 src+= stride;
01563 }
01564 }
01565
01566 #define QPEL_MC(r, OPNAME, RND, OP) \
01567 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
01568 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01569 int i;\
01570 for(i=0; i<h; i++)\
01571 {\
01572 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
01573 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
01574 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
01575 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
01576 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
01577 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
01578 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
01579 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
01580 dst+=dstStride;\
01581 src+=srcStride;\
01582 }\
01583 }\
01584 \
01585 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01586 const int w=8;\
01587 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01588 int i;\
01589 for(i=0; i<w; i++)\
01590 {\
01591 const int src0= src[0*srcStride];\
01592 const int src1= src[1*srcStride];\
01593 const int src2= src[2*srcStride];\
01594 const int src3= src[3*srcStride];\
01595 const int src4= src[4*srcStride];\
01596 const int src5= src[5*srcStride];\
01597 const int src6= src[6*srcStride];\
01598 const int src7= src[7*srcStride];\
01599 const int src8= src[8*srcStride];\
01600 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
01601 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
01602 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
01603 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
01604 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
01605 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
01606 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
01607 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
01608 dst++;\
01609 src++;\
01610 }\
01611 }\
01612 \
01613 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
01614 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01615 int i;\
01616 \
01617 for(i=0; i<h; i++)\
01618 {\
01619 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
01620 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
01621 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
01622 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
01623 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
01624 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
01625 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
01626 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
01627 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
01628 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
01629 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
01630 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
01631 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
01632 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
01633 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
01634 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
01635 dst+=dstStride;\
01636 src+=srcStride;\
01637 }\
01638 }\
01639 \
01640 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01641 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01642 int i;\
01643 const int w=16;\
01644 for(i=0; i<w; i++)\
01645 {\
01646 const int src0= src[0*srcStride];\
01647 const int src1= src[1*srcStride];\
01648 const int src2= src[2*srcStride];\
01649 const int src3= src[3*srcStride];\
01650 const int src4= src[4*srcStride];\
01651 const int src5= src[5*srcStride];\
01652 const int src6= src[6*srcStride];\
01653 const int src7= src[7*srcStride];\
01654 const int src8= src[8*srcStride];\
01655 const int src9= src[9*srcStride];\
01656 const int src10= src[10*srcStride];\
01657 const int src11= src[11*srcStride];\
01658 const int src12= src[12*srcStride];\
01659 const int src13= src[13*srcStride];\
01660 const int src14= src[14*srcStride];\
01661 const int src15= src[15*srcStride];\
01662 const int src16= src[16*srcStride];\
01663 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
01664 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
01665 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
01666 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
01667 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
01668 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
01669 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
01670 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
01671 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
01672 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
01673 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
01674 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
01675 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
01676 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
01677 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
01678 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
01679 dst++;\
01680 src++;\
01681 }\
01682 }\
01683 \
01684 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
01685 OPNAME ## pixels8_c(dst, src, stride, 8);\
01686 }\
01687 \
01688 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01689 uint8_t half[64];\
01690 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
01691 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
01692 }\
01693 \
01694 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01695 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
01696 }\
01697 \
01698 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01699 uint8_t half[64];\
01700 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
01701 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
01702 }\
01703 \
01704 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01705 uint8_t full[16*9];\
01706 uint8_t half[64];\
01707 copy_block9(full, src, 16, stride, 9);\
01708 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
01709 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
01710 }\
01711 \
01712 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01713 uint8_t full[16*9];\
01714 copy_block9(full, src, 16, stride, 9);\
01715 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
01716 }\
01717 \
01718 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01719 uint8_t full[16*9];\
01720 uint8_t half[64];\
01721 copy_block9(full, src, 16, stride, 9);\
01722 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
01723 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
01724 }\
01725 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01726 uint8_t full[16*9];\
01727 uint8_t halfH[72];\
01728 uint8_t halfV[64];\
01729 uint8_t halfHV[64];\
01730 copy_block9(full, src, 16, stride, 9);\
01731 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01732 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01733 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01734 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01735 }\
01736 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01737 uint8_t full[16*9];\
01738 uint8_t halfH[72];\
01739 uint8_t halfHV[64];\
01740 copy_block9(full, src, 16, stride, 9);\
01741 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01742 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01743 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01744 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01745 }\
01746 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01747 uint8_t full[16*9];\
01748 uint8_t halfH[72];\
01749 uint8_t halfV[64];\
01750 uint8_t halfHV[64];\
01751 copy_block9(full, src, 16, stride, 9);\
01752 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01753 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01754 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01755 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01756 }\
01757 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01758 uint8_t full[16*9];\
01759 uint8_t halfH[72];\
01760 uint8_t halfHV[64];\
01761 copy_block9(full, src, 16, stride, 9);\
01762 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01763 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01764 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01765 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01766 }\
01767 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01768 uint8_t full[16*9];\
01769 uint8_t halfH[72];\
01770 uint8_t halfV[64];\
01771 uint8_t halfHV[64];\
01772 copy_block9(full, src, 16, stride, 9);\
01773 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01774 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01775 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01776 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01777 }\
01778 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01779 uint8_t full[16*9];\
01780 uint8_t halfH[72];\
01781 uint8_t halfHV[64];\
01782 copy_block9(full, src, 16, stride, 9);\
01783 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01784 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01785 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01786 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01787 }\
01788 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01789 uint8_t full[16*9];\
01790 uint8_t halfH[72];\
01791 uint8_t halfV[64];\
01792 uint8_t halfHV[64];\
01793 copy_block9(full, src, 16, stride, 9);\
01794 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
01795 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01796 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01797 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01798 }\
01799 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01800 uint8_t full[16*9];\
01801 uint8_t halfH[72];\
01802 uint8_t halfHV[64];\
01803 copy_block9(full, src, 16, stride, 9);\
01804 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01805 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01806 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01807 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01808 }\
01809 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01810 uint8_t halfH[72];\
01811 uint8_t halfHV[64];\
01812 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01813 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01814 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01815 }\
01816 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01817 uint8_t halfH[72];\
01818 uint8_t halfHV[64];\
01819 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01820 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01821 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01822 }\
01823 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01824 uint8_t full[16*9];\
01825 uint8_t halfH[72];\
01826 uint8_t halfV[64];\
01827 uint8_t halfHV[64];\
01828 copy_block9(full, src, 16, stride, 9);\
01829 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01830 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01831 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01832 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
01833 }\
01834 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01835 uint8_t full[16*9];\
01836 uint8_t halfH[72];\
01837 copy_block9(full, src, 16, stride, 9);\
01838 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01839 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01840 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01841 }\
01842 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01843 uint8_t full[16*9];\
01844 uint8_t halfH[72];\
01845 uint8_t halfV[64];\
01846 uint8_t halfHV[64];\
01847 copy_block9(full, src, 16, stride, 9);\
01848 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01849 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01850 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01851 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
01852 }\
01853 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01854 uint8_t full[16*9];\
01855 uint8_t halfH[72];\
01856 copy_block9(full, src, 16, stride, 9);\
01857 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01858 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01859 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01860 }\
01861 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01862 uint8_t halfH[72];\
01863 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01864 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01865 }\
01866 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
01867 OPNAME ## pixels16_c(dst, src, stride, 16);\
01868 }\
01869 \
01870 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01871 uint8_t half[256];\
01872 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01873 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
01874 }\
01875 \
01876 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01877 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
01878 }\
01879 \
01880 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01881 uint8_t half[256];\
01882 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01883 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
01884 }\
01885 \
01886 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01887 uint8_t full[24*17];\
01888 uint8_t half[256];\
01889 copy_block17(full, src, 24, stride, 17);\
01890 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01891 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
01892 }\
01893 \
01894 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01895 uint8_t full[24*17];\
01896 copy_block17(full, src, 24, stride, 17);\
01897 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
01898 }\
01899 \
01900 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01901 uint8_t full[24*17];\
01902 uint8_t half[256];\
01903 copy_block17(full, src, 24, stride, 17);\
01904 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01905 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
01906 }\
01907 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01908 uint8_t full[24*17];\
01909 uint8_t halfH[272];\
01910 uint8_t halfV[256];\
01911 uint8_t halfHV[256];\
01912 copy_block17(full, src, 24, stride, 17);\
01913 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01914 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01915 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01916 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01917 }\
01918 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01919 uint8_t full[24*17];\
01920 uint8_t halfH[272];\
01921 uint8_t halfHV[256];\
01922 copy_block17(full, src, 24, stride, 17);\
01923 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01924 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
01925 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01926 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
01927 }\
01928 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01929 uint8_t full[24*17];\
01930 uint8_t halfH[272];\
01931 uint8_t halfV[256];\
01932 uint8_t halfHV[256];\
01933 copy_block17(full, src, 24, stride, 17);\
01934 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01935 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01936 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01937 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01938 }\
01939 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01940 uint8_t full[24*17];\
01941 uint8_t halfH[272];\
01942 uint8_t halfHV[256];\
01943 copy_block17(full, src, 24, stride, 17);\
01944 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01945 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
01946 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01947 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
01948 }\
01949 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01950 uint8_t full[24*17];\
01951 uint8_t halfH[272];\
01952 uint8_t halfV[256];\
01953 uint8_t halfHV[256];\
01954 copy_block17(full, src, 24, stride, 17);\
01955 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01956 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01957 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01958 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01959 }\
01960 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01961 uint8_t full[24*17];\
01962 uint8_t halfH[272];\
01963 uint8_t halfHV[256];\
01964 copy_block17(full, src, 24, stride, 17);\
01965 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01966 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
01967 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01968 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01969 }\
01970 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01971 uint8_t full[24*17];\
01972 uint8_t halfH[272];\
01973 uint8_t halfV[256];\
01974 uint8_t halfHV[256];\
01975 copy_block17(full, src, 24, stride, 17);\
01976 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
01977 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01978 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01979 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01980 }\
01981 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01982 uint8_t full[24*17];\
01983 uint8_t halfH[272];\
01984 uint8_t halfHV[256];\
01985 copy_block17(full, src, 24, stride, 17);\
01986 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01987 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
01988 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01989 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01990 }\
01991 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01992 uint8_t halfH[272];\
01993 uint8_t halfHV[256];\
01994 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01995 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01996 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
01997 }\
01998 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01999 uint8_t halfH[272];\
02000 uint8_t halfHV[256];\
02001 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
02002 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02003 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
02004 }\
02005 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
02006 uint8_t full[24*17];\
02007 uint8_t halfH[272];\
02008 uint8_t halfV[256];\
02009 uint8_t halfHV[256];\
02010 copy_block17(full, src, 24, stride, 17);\
02011 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02012 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
02013 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02014 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
02015 }\
02016 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
02017 uint8_t full[24*17];\
02018 uint8_t halfH[272];\
02019 copy_block17(full, src, 24, stride, 17);\
02020 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02021 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
02022 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02023 }\
02024 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
02025 uint8_t full[24*17];\
02026 uint8_t halfH[272];\
02027 uint8_t halfV[256];\
02028 uint8_t halfHV[256];\
02029 copy_block17(full, src, 24, stride, 17);\
02030 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02031 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
02032 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02033 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
02034 }\
02035 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
02036 uint8_t full[24*17];\
02037 uint8_t halfH[272];\
02038 copy_block17(full, src, 24, stride, 17);\
02039 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02040 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
02041 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02042 }\
02043 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
02044 uint8_t halfH[272];\
02045 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
02046 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02047 }
02048
02049 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
02050 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
02051 #define op_put(a, b) a = cm[((b) + 16)>>5]
02052 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
02053
02054 QPEL_MC(0, put_ , _ , op_put)
02055 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
02056 QPEL_MC(0, avg_ , _ , op_avg)
02057
02058 #undef op_avg
02059 #undef op_avg_no_rnd
02060 #undef op_put
02061 #undef op_put_no_rnd
02062
02063 #if 1
02064 #define H264_LOWPASS(OPNAME, OP, OP2) \
02065 static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02066 const int h=2;\
02067 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02068 int i;\
02069 for(i=0; i<h; i++)\
02070 {\
02071 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
02072 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
02073 dst+=dstStride;\
02074 src+=srcStride;\
02075 }\
02076 }\
02077 \
02078 static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02079 const int w=2;\
02080 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02081 int i;\
02082 for(i=0; i<w; i++)\
02083 {\
02084 const int srcB= src[-2*srcStride];\
02085 const int srcA= src[-1*srcStride];\
02086 const int src0= src[0 *srcStride];\
02087 const int src1= src[1 *srcStride];\
02088 const int src2= src[2 *srcStride];\
02089 const int src3= src[3 *srcStride];\
02090 const int src4= src[4 *srcStride];\
02091 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02092 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02093 dst++;\
02094 src++;\
02095 }\
02096 }\
02097 \
02098 static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02099 const int h=2;\
02100 const int w=2;\
02101 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02102 int i;\
02103 src -= 2*srcStride;\
02104 for(i=0; i<h+5; i++)\
02105 {\
02106 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
02107 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
02108 tmp+=tmpStride;\
02109 src+=srcStride;\
02110 }\
02111 tmp -= tmpStride*(h+5-2);\
02112 for(i=0; i<w; i++)\
02113 {\
02114 const int tmpB= tmp[-2*tmpStride];\
02115 const int tmpA= tmp[-1*tmpStride];\
02116 const int tmp0= tmp[0 *tmpStride];\
02117 const int tmp1= tmp[1 *tmpStride];\
02118 const int tmp2= tmp[2 *tmpStride];\
02119 const int tmp3= tmp[3 *tmpStride];\
02120 const int tmp4= tmp[4 *tmpStride];\
02121 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02122 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02123 dst++;\
02124 tmp++;\
02125 }\
02126 }\
02127 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02128 const int h=4;\
02129 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02130 int i;\
02131 for(i=0; i<h; i++)\
02132 {\
02133 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
02134 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
02135 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
02136 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
02137 dst+=dstStride;\
02138 src+=srcStride;\
02139 }\
02140 }\
02141 \
02142 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02143 const int w=4;\
02144 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02145 int i;\
02146 for(i=0; i<w; i++)\
02147 {\
02148 const int srcB= src[-2*srcStride];\
02149 const int srcA= src[-1*srcStride];\
02150 const int src0= src[0 *srcStride];\
02151 const int src1= src[1 *srcStride];\
02152 const int src2= src[2 *srcStride];\
02153 const int src3= src[3 *srcStride];\
02154 const int src4= src[4 *srcStride];\
02155 const int src5= src[5 *srcStride];\
02156 const int src6= src[6 *srcStride];\
02157 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02158 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02159 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
02160 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
02161 dst++;\
02162 src++;\
02163 }\
02164 }\
02165 \
02166 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02167 const int h=4;\
02168 const int w=4;\
02169 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02170 int i;\
02171 src -= 2*srcStride;\
02172 for(i=0; i<h+5; i++)\
02173 {\
02174 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
02175 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
02176 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
02177 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
02178 tmp+=tmpStride;\
02179 src+=srcStride;\
02180 }\
02181 tmp -= tmpStride*(h+5-2);\
02182 for(i=0; i<w; i++)\
02183 {\
02184 const int tmpB= tmp[-2*tmpStride];\
02185 const int tmpA= tmp[-1*tmpStride];\
02186 const int tmp0= tmp[0 *tmpStride];\
02187 const int tmp1= tmp[1 *tmpStride];\
02188 const int tmp2= tmp[2 *tmpStride];\
02189 const int tmp3= tmp[3 *tmpStride];\
02190 const int tmp4= tmp[4 *tmpStride];\
02191 const int tmp5= tmp[5 *tmpStride];\
02192 const int tmp6= tmp[6 *tmpStride];\
02193 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02194 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02195 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
02196 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
02197 dst++;\
02198 tmp++;\
02199 }\
02200 }\
02201 \
02202 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02203 const int h=8;\
02204 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02205 int i;\
02206 for(i=0; i<h; i++)\
02207 {\
02208 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
02209 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
02210 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
02211 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
02212 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
02213 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
02214 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
02215 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
02216 dst+=dstStride;\
02217 src+=srcStride;\
02218 }\
02219 }\
02220 \
02221 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02222 const int w=8;\
02223 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02224 int i;\
02225 for(i=0; i<w; i++)\
02226 {\
02227 const int srcB= src[-2*srcStride];\
02228 const int srcA= src[-1*srcStride];\
02229 const int src0= src[0 *srcStride];\
02230 const int src1= src[1 *srcStride];\
02231 const int src2= src[2 *srcStride];\
02232 const int src3= src[3 *srcStride];\
02233 const int src4= src[4 *srcStride];\
02234 const int src5= src[5 *srcStride];\
02235 const int src6= src[6 *srcStride];\
02236 const int src7= src[7 *srcStride];\
02237 const int src8= src[8 *srcStride];\
02238 const int src9= src[9 *srcStride];\
02239 const int src10=src[10*srcStride];\
02240 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02241 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02242 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
02243 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
02244 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
02245 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
02246 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
02247 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
02248 dst++;\
02249 src++;\
02250 }\
02251 }\
02252 \
02253 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02254 const int h=8;\
02255 const int w=8;\
02256 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02257 int i;\
02258 src -= 2*srcStride;\
02259 for(i=0; i<h+5; i++)\
02260 {\
02261 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
02262 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
02263 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
02264 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
02265 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
02266 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
02267 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
02268 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
02269 tmp+=tmpStride;\
02270 src+=srcStride;\
02271 }\
02272 tmp -= tmpStride*(h+5-2);\
02273 for(i=0; i<w; i++)\
02274 {\
02275 const int tmpB= tmp[-2*tmpStride];\
02276 const int tmpA= tmp[-1*tmpStride];\
02277 const int tmp0= tmp[0 *tmpStride];\
02278 const int tmp1= tmp[1 *tmpStride];\
02279 const int tmp2= tmp[2 *tmpStride];\
02280 const int tmp3= tmp[3 *tmpStride];\
02281 const int tmp4= tmp[4 *tmpStride];\
02282 const int tmp5= tmp[5 *tmpStride];\
02283 const int tmp6= tmp[6 *tmpStride];\
02284 const int tmp7= tmp[7 *tmpStride];\
02285 const int tmp8= tmp[8 *tmpStride];\
02286 const int tmp9= tmp[9 *tmpStride];\
02287 const int tmp10=tmp[10*tmpStride];\
02288 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02289 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02290 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
02291 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
02292 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
02293 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
02294 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
02295 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
02296 dst++;\
02297 tmp++;\
02298 }\
02299 }\
02300 \
02301 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02302 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
02303 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
02304 src += 8*srcStride;\
02305 dst += 8*dstStride;\
02306 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
02307 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
02308 }\
02309 \
02310 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02311 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
02312 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
02313 src += 8*srcStride;\
02314 dst += 8*dstStride;\
02315 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
02316 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
02317 }\
02318 \
02319 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02320 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
02321 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
02322 src += 8*srcStride;\
02323 dst += 8*dstStride;\
02324 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
02325 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
02326 }\
02327
02328 #define H264_MC(OPNAME, SIZE) \
02329 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
02330 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
02331 }\
02332 \
02333 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
02334 uint8_t half[SIZE*SIZE];\
02335 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
02336 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
02337 }\
02338 \
02339 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
02340 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
02341 }\
02342 \
02343 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
02344 uint8_t half[SIZE*SIZE];\
02345 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
02346 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
02347 }\
02348 \
02349 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
02350 uint8_t full[SIZE*(SIZE+5)];\
02351 uint8_t * const full_mid= full + SIZE*2;\
02352 uint8_t half[SIZE*SIZE];\
02353 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02354 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
02355 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
02356 }\
02357 \
02358 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
02359 uint8_t full[SIZE*(SIZE+5)];\
02360 uint8_t * const full_mid= full + SIZE*2;\
02361 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02362 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
02363 }\
02364 \
02365 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
02366 uint8_t full[SIZE*(SIZE+5)];\
02367 uint8_t * const full_mid= full + SIZE*2;\
02368 uint8_t half[SIZE*SIZE];\
02369 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02370 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
02371 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
02372 }\
02373 \
02374 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
02375 uint8_t full[SIZE*(SIZE+5)];\
02376 uint8_t * const full_mid= full + SIZE*2;\
02377 uint8_t halfH[SIZE*SIZE];\
02378 uint8_t halfV[SIZE*SIZE];\
02379 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02380 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02381 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02382 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02383 }\
02384 \
02385 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
02386 uint8_t full[SIZE*(SIZE+5)];\
02387 uint8_t * const full_mid= full + SIZE*2;\
02388 uint8_t halfH[SIZE*SIZE];\
02389 uint8_t halfV[SIZE*SIZE];\
02390 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02391 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02392 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02393 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02394 }\
02395 \
02396 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
02397 uint8_t full[SIZE*(SIZE+5)];\
02398 uint8_t * const full_mid= full + SIZE*2;\
02399 uint8_t halfH[SIZE*SIZE];\
02400 uint8_t halfV[SIZE*SIZE];\
02401 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02402 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02403 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02404 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02405 }\
02406 \
02407 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
02408 uint8_t full[SIZE*(SIZE+5)];\
02409 uint8_t * const full_mid= full + SIZE*2;\
02410 uint8_t halfH[SIZE*SIZE];\
02411 uint8_t halfV[SIZE*SIZE];\
02412 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02413 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02414 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02415 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02416 }\
02417 \
02418 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
02419 int16_t tmp[SIZE*(SIZE+5)];\
02420 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
02421 }\
02422 \
02423 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
02424 int16_t tmp[SIZE*(SIZE+5)];\
02425 uint8_t halfH[SIZE*SIZE];\
02426 uint8_t halfHV[SIZE*SIZE];\
02427 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02428 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02429 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
02430 }\
02431 \
02432 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
02433 int16_t tmp[SIZE*(SIZE+5)];\
02434 uint8_t halfH[SIZE*SIZE];\
02435 uint8_t halfHV[SIZE*SIZE];\
02436 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02437 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02438 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
02439 }\
02440 \
02441 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
02442 uint8_t full[SIZE*(SIZE+5)];\
02443 uint8_t * const full_mid= full + SIZE*2;\
02444 int16_t tmp[SIZE*(SIZE+5)];\
02445 uint8_t halfV[SIZE*SIZE];\
02446 uint8_t halfHV[SIZE*SIZE];\
02447 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02448 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02449 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02450 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
02451 }\
02452 \
02453 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
02454 uint8_t full[SIZE*(SIZE+5)];\
02455 uint8_t * const full_mid= full + SIZE*2;\
02456 int16_t tmp[SIZE*(SIZE+5)];\
02457 uint8_t halfV[SIZE*SIZE];\
02458 uint8_t halfHV[SIZE*SIZE];\
02459 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02460 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02461 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02462 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
02463 }\
02464
02465 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
02466
02467 #define op_put(a, b) a = cm[((b) + 16)>>5]
02468 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
02469 #define op2_put(a, b) a = cm[((b) + 512)>>10]
02470
02471 H264_LOWPASS(put_ , op_put, op2_put)
02472 H264_LOWPASS(avg_ , op_avg, op2_avg)
02473 H264_MC(put_, 2)
02474 H264_MC(put_, 4)
02475 H264_MC(put_, 8)
02476 H264_MC(put_, 16)
02477 H264_MC(avg_, 4)
02478 H264_MC(avg_, 8)
02479 H264_MC(avg_, 16)
02480
02481 #undef op_avg
02482 #undef op_put
02483 #undef op2_avg
02484 #undef op2_put
02485 #endif
02486
02487 #define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom )
02488 #define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
02489 #define H264_WEIGHT(W,H) \
02490 static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
02491 int y; \
02492 offset <<= log2_denom; \
02493 if(log2_denom) offset += 1<<(log2_denom-1); \
02494 for(y=0; y<H; y++, block += stride){ \
02495 op_scale1(0); \
02496 op_scale1(1); \
02497 if(W==2) continue; \
02498 op_scale1(2); \
02499 op_scale1(3); \
02500 if(W==4) continue; \
02501 op_scale1(4); \
02502 op_scale1(5); \
02503 op_scale1(6); \
02504 op_scale1(7); \
02505 if(W==8) continue; \
02506 op_scale1(8); \
02507 op_scale1(9); \
02508 op_scale1(10); \
02509 op_scale1(11); \
02510 op_scale1(12); \
02511 op_scale1(13); \
02512 op_scale1(14); \
02513 op_scale1(15); \
02514 } \
02515 } \
02516 static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
02517 int y; \
02518 offset = ((offset + 1) | 1) << log2_denom; \
02519 for(y=0; y<H; y++, dst += stride, src += stride){ \
02520 op_scale2(0); \
02521 op_scale2(1); \
02522 if(W==2) continue; \
02523 op_scale2(2); \
02524 op_scale2(3); \
02525 if(W==4) continue; \
02526 op_scale2(4); \
02527 op_scale2(5); \
02528 op_scale2(6); \
02529 op_scale2(7); \
02530 if(W==8) continue; \
02531 op_scale2(8); \
02532 op_scale2(9); \
02533 op_scale2(10); \
02534 op_scale2(11); \
02535 op_scale2(12); \
02536 op_scale2(13); \
02537 op_scale2(14); \
02538 op_scale2(15); \
02539 } \
02540 }
02541
02542 H264_WEIGHT(16,16)
02543 H264_WEIGHT(16,8)
02544 H264_WEIGHT(8,16)
02545 H264_WEIGHT(8,8)
02546 H264_WEIGHT(8,4)
02547 H264_WEIGHT(4,8)
02548 H264_WEIGHT(4,4)
02549 H264_WEIGHT(4,2)
02550 H264_WEIGHT(2,4)
02551 H264_WEIGHT(2,2)
02552
02553 #undef op_scale1
02554 #undef op_scale2
02555 #undef H264_WEIGHT
02556
02557 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
02558 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02559 int i;
02560
02561 for(i=0; i<h; i++){
02562 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
02563 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
02564 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
02565 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
02566 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
02567 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
02568 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
02569 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
02570 dst+=dstStride;
02571 src+=srcStride;
02572 }
02573 }
02574
02575 #ifdef CONFIG_CAVS_DECODER
02576
02577 void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx);
02578
02579 void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
02580 put_pixels8_c(dst, src, stride, 8);
02581 }
02582 void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
02583 avg_pixels8_c(dst, src, stride, 8);
02584 }
02585 void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
02586 put_pixels16_c(dst, src, stride, 16);
02587 }
02588 void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
02589 avg_pixels16_c(dst, src, stride, 16);
02590 }
02591 #endif
02592
02593 #if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
02594
02595 void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx);
02596
02597 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) {
02598 put_pixels8_c(dst, src, stride, 8);
02599 }
02600 #endif
02601
02602 void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
02603
02604
02605 void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx);
02606
02607 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
02608 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02609 int i;
02610
02611 for(i=0; i<w; i++){
02612 const int src_1= src[ -srcStride];
02613 const int src0 = src[0 ];
02614 const int src1 = src[ srcStride];
02615 const int src2 = src[2*srcStride];
02616 const int src3 = src[3*srcStride];
02617 const int src4 = src[4*srcStride];
02618 const int src5 = src[5*srcStride];
02619 const int src6 = src[6*srcStride];
02620 const int src7 = src[7*srcStride];
02621 const int src8 = src[8*srcStride];
02622 const int src9 = src[9*srcStride];
02623 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
02624 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
02625 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
02626 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
02627 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
02628 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
02629 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
02630 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
02631 src++;
02632 dst++;
02633 }
02634 }
02635
02636 static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
02637 put_pixels8_c(dst, src, stride, 8);
02638 }
02639
02640 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
02641 uint8_t half[64];
02642 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
02643 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
02644 }
02645
02646 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
02647 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
02648 }
02649
02650 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
02651 uint8_t half[64];
02652 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
02653 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
02654 }
02655
02656 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
02657 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
02658 }
02659
02660 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
02661 uint8_t halfH[88];
02662 uint8_t halfV[64];
02663 uint8_t halfHV[64];
02664 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02665 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
02666 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
02667 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
02668 }
02669 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
02670 uint8_t halfH[88];
02671 uint8_t halfV[64];
02672 uint8_t halfHV[64];
02673 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02674 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
02675 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
02676 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
02677 }
02678 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
02679 uint8_t halfH[88];
02680 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02681 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
02682 }
02683
02684 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
02685 if(ENABLE_ANY_H263) {
02686 int x;
02687 const int strength= ff_h263_loop_filter_strength[qscale];
02688
02689 for(x=0; x<8; x++){
02690 int d1, d2, ad1;
02691 int p0= src[x-2*stride];
02692 int p1= src[x-1*stride];
02693 int p2= src[x+0*stride];
02694 int p3= src[x+1*stride];
02695 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
02696
02697 if (d<-2*strength) d1= 0;
02698 else if(d<- strength) d1=-2*strength - d;
02699 else if(d< strength) d1= d;
02700 else if(d< 2*strength) d1= 2*strength - d;
02701 else d1= 0;
02702
02703 p1 += d1;
02704 p2 -= d1;
02705 if(p1&256) p1= ~(p1>>31);
02706 if(p2&256) p2= ~(p2>>31);
02707
02708 src[x-1*stride] = p1;
02709 src[x+0*stride] = p2;
02710
02711 ad1= FFABS(d1)>>1;
02712
02713 d2= av_clip((p0-p3)/4, -ad1, ad1);
02714
02715 src[x-2*stride] = p0 - d2;
02716 src[x+ stride] = p3 + d2;
02717 }
02718 }
02719 }
02720
02721 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
02722 if(ENABLE_ANY_H263) {
02723 int y;
02724 const int strength= ff_h263_loop_filter_strength[qscale];
02725
02726 for(y=0; y<8; y++){
02727 int d1, d2, ad1;
02728 int p0= src[y*stride-2];
02729 int p1= src[y*stride-1];
02730 int p2= src[y*stride+0];
02731 int p3= src[y*stride+1];
02732 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
02733
02734 if (d<-2*strength) d1= 0;
02735 else if(d<- strength) d1=-2*strength - d;
02736 else if(d< strength) d1= d;
02737 else if(d< 2*strength) d1= 2*strength - d;
02738 else d1= 0;
02739
02740 p1 += d1;
02741 p2 -= d1;
02742 if(p1&256) p1= ~(p1>>31);
02743 if(p2&256) p2= ~(p2>>31);
02744
02745 src[y*stride-1] = p1;
02746 src[y*stride+0] = p2;
02747
02748 ad1= FFABS(d1)>>1;
02749
02750 d2= av_clip((p0-p3)/4, -ad1, ad1);
02751
02752 src[y*stride-2] = p0 - d2;
02753 src[y*stride+1] = p3 + d2;
02754 }
02755 }
02756 }
02757
02758 static void h261_loop_filter_c(uint8_t *src, int stride){
02759 int x,y,xy,yz;
02760 int temp[64];
02761
02762 for(x=0; x<8; x++){
02763 temp[x ] = 4*src[x ];
02764 temp[x + 7*8] = 4*src[x + 7*stride];
02765 }
02766 for(y=1; y<7; y++){
02767 for(x=0; x<8; x++){
02768 xy = y * stride + x;
02769 yz = y * 8 + x;
02770 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
02771 }
02772 }
02773
02774 for(y=0; y<8; y++){
02775 src[ y*stride] = (temp[ y*8] + 2)>>2;
02776 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
02777 for(x=1; x<7; x++){
02778 xy = y * stride + x;
02779 yz = y * 8 + x;
02780 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
02781 }
02782 }
02783 }
02784
02785 static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
02786 {
02787 int i, d;
02788 for( i = 0; i < 4; i++ ) {
02789 if( tc0[i] < 0 ) {
02790 pix += 4*ystride;
02791 continue;
02792 }
02793 for( d = 0; d < 4; d++ ) {
02794 const int p0 = pix[-1*xstride];
02795 const int p1 = pix[-2*xstride];
02796 const int p2 = pix[-3*xstride];
02797 const int q0 = pix[0];
02798 const int q1 = pix[1*xstride];
02799 const int q2 = pix[2*xstride];
02800
02801 if( FFABS( p0 - q0 ) < alpha &&
02802 FFABS( p1 - p0 ) < beta &&
02803 FFABS( q1 - q0 ) < beta ) {
02804
02805 int tc = tc0[i];
02806 int i_delta;
02807
02808 if( FFABS( p2 - p0 ) < beta ) {
02809 pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
02810 tc++;
02811 }
02812 if( FFABS( q2 - q0 ) < beta ) {
02813 pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
02814 tc++;
02815 }
02816
02817 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
02818 pix[-xstride] = av_clip_uint8( p0 + i_delta );
02819 pix[0] = av_clip_uint8( q0 - i_delta );
02820 }
02821 pix += ystride;
02822 }
02823 }
02824 }
02825 static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
02826 {
02827 h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0);
02828 }
02829 static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
02830 {
02831 h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0);
02832 }
02833
02834 static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
02835 {
02836 int i, d;
02837 for( i = 0; i < 4; i++ ) {
02838 const int tc = tc0[i];
02839 if( tc <= 0 ) {
02840 pix += 2*ystride;
02841 continue;
02842 }
02843 for( d = 0; d < 2; d++ ) {
02844 const int p0 = pix[-1*xstride];
02845 const int p1 = pix[-2*xstride];
02846 const int q0 = pix[0];
02847 const int q1 = pix[1*xstride];
02848
02849 if( FFABS( p0 - q0 ) < alpha &&
02850 FFABS( p1 - p0 ) < beta &&
02851 FFABS( q1 - q0 ) < beta ) {
02852
02853 int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
02854
02855 pix[-xstride] = av_clip_uint8( p0 + delta );
02856 pix[0] = av_clip_uint8( q0 - delta );
02857 }
02858 pix += ystride;
02859 }
02860 }
02861 }
02862 static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
02863 {
02864 h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0);
02865 }
02866 static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
02867 {
02868 h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0);
02869 }
02870
02871 static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
02872 {
02873 int d;
02874 for( d = 0; d < 8; d++ ) {
02875 const int p0 = pix[-1*xstride];
02876 const int p1 = pix[-2*xstride];
02877 const int q0 = pix[0];
02878 const int q1 = pix[1*xstride];
02879
02880 if( FFABS( p0 - q0 ) < alpha &&
02881 FFABS( p1 - p0 ) < beta &&
02882 FFABS( q1 - q0 ) < beta ) {
02883
02884 pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
02885 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
02886 }
02887 pix += ystride;
02888 }
02889 }
02890 static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
02891 {
02892 h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta);
02893 }
02894 static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
02895 {
02896 h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta);
02897 }
02898
02899 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02900 {
02901 int s, i;
02902
02903 s = 0;
02904 for(i=0;i<h;i++) {
02905 s += abs(pix1[0] - pix2[0]);
02906 s += abs(pix1[1] - pix2[1]);
02907 s += abs(pix1[2] - pix2[2]);
02908 s += abs(pix1[3] - pix2[3]);
02909 s += abs(pix1[4] - pix2[4]);
02910 s += abs(pix1[5] - pix2[5]);
02911 s += abs(pix1[6] - pix2[6]);
02912 s += abs(pix1[7] - pix2[7]);
02913 s += abs(pix1[8] - pix2[8]);
02914 s += abs(pix1[9] - pix2[9]);
02915 s += abs(pix1[10] - pix2[10]);
02916 s += abs(pix1[11] - pix2[11]);
02917 s += abs(pix1[12] - pix2[12]);
02918 s += abs(pix1[13] - pix2[13]);
02919 s += abs(pix1[14] - pix2[14]);
02920 s += abs(pix1[15] - pix2[15]);
02921 pix1 += line_size;
02922 pix2 += line_size;
02923 }
02924 return s;
02925 }
02926
02927 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02928 {
02929 int s, i;
02930
02931 s = 0;
02932 for(i=0;i<h;i++) {
02933 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
02934 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
02935 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
02936 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
02937 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
02938 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
02939 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
02940 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
02941 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
02942 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
02943 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
02944 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
02945 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
02946 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
02947 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
02948 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
02949 pix1 += line_size;
02950 pix2 += line_size;
02951 }
02952 return s;
02953 }
02954
02955 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02956 {
02957 int s, i;
02958 uint8_t *pix3 = pix2 + line_size;
02959
02960 s = 0;
02961 for(i=0;i<h;i++) {
02962 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
02963 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
02964 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
02965 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
02966 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
02967 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
02968 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
02969 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
02970 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
02971 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
02972 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
02973 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
02974 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
02975 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
02976 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
02977 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
02978 pix1 += line_size;
02979 pix2 += line_size;
02980 pix3 += line_size;
02981 }
02982 return s;
02983 }
02984
02985 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02986 {
02987 int s, i;
02988 uint8_t *pix3 = pix2 + line_size;
02989
02990 s = 0;
02991 for(i=0;i<h;i++) {
02992 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
02993 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
02994 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
02995 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
02996 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
02997 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
02998 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
02999 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
03000 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
03001 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
03002 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
03003 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
03004 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
03005 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
03006 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
03007 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
03008 pix1 += line_size;
03009 pix2 += line_size;
03010 pix3 += line_size;
03011 }
03012 return s;
03013 }
03014
03015 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
03016 {
03017 int s, i;
03018
03019 s = 0;
03020 for(i=0;i<h;i++) {
03021 s += abs(pix1[0] - pix2[0]);
03022 s += abs(pix1[1] - pix2[1]);
03023 s += abs(pix1[2] - pix2[2]);
03024 s += abs(pix1[3] - pix2[3]);
03025 s += abs(pix1[4] - pix2[4]);
03026 s += abs(pix1[5] - pix2[5]);
03027 s += abs(pix1[6] - pix2[6]);
03028 s += abs(pix1[7] - pix2[7]);
03029 pix1 += line_size;
03030 pix2 += line_size;
03031 }
03032 return s;
03033 }
03034
03035 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
03036 {
03037 int s, i;
03038
03039 s = 0;
03040 for(i=0;i<h;i++) {
03041 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
03042 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
03043 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
03044 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
03045 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
03046 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
03047 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
03048 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
03049 pix1 += line_size;
03050 pix2 += line_size;
03051 }
03052 return s;
03053 }
03054
03055 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
03056 {
03057 int s, i;
03058 uint8_t *pix3 = pix2 + line_size;
03059
03060 s = 0;
03061 for(i=0;i<h;i++) {
03062 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
03063 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
03064 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
03065 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
03066 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
03067 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
03068 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
03069 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
03070 pix1 += line_size;
03071 pix2 += line_size;
03072 pix3 += line_size;
03073 }
03074 return s;
03075 }
03076
03077 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
03078 {
03079 int s, i;
03080 uint8_t *pix3 = pix2 + line_size;
03081
03082 s = 0;
03083 for(i=0;i<h;i++) {
03084 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
03085 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
03086 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
03087 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
03088 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
03089 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
03090 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
03091 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
03092 pix1 += line_size;
03093 pix2 += line_size;
03094 pix3 += line_size;
03095 }
03096 return s;
03097 }
03098
03099 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
03100 MpegEncContext *c = v;
03101 int score1=0;
03102 int score2=0;
03103 int x,y;
03104
03105 for(y=0; y<h; y++){
03106 for(x=0; x<16; x++){
03107 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
03108 }
03109 if(y+1<h){
03110 for(x=0; x<15; x++){
03111 score2+= FFABS( s1[x ] - s1[x +stride]
03112 - s1[x+1] + s1[x+1+stride])
03113 -FFABS( s2[x ] - s2[x +stride]
03114 - s2[x+1] + s2[x+1+stride]);
03115 }
03116 }
03117 s1+= stride;
03118 s2+= stride;
03119 }
03120
03121 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
03122 else return score1 + FFABS(score2)*8;
03123 }
03124
03125 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
03126 MpegEncContext *c = v;
03127 int score1=0;
03128 int score2=0;
03129 int x,y;
03130
03131 for(y=0; y<h; y++){
03132 for(x=0; x<8; x++){
03133 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
03134 }
03135 if(y+1<h){
03136 for(x=0; x<7; x++){
03137 score2+= FFABS( s1[x ] - s1[x +stride]
03138 - s1[x+1] + s1[x+1+stride])
03139 -FFABS( s2[x ] - s2[x +stride]
03140 - s2[x+1] + s2[x+1+stride]);
03141 }
03142 }
03143 s1+= stride;
03144 s2+= stride;
03145 }
03146
03147 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
03148 else return score1 + FFABS(score2)*8;
03149 }
03150
03151 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
03152 int i;
03153 unsigned int sum=0;
03154
03155 for(i=0; i<8*8; i++){
03156 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
03157 int w= weight[i];
03158 b>>= RECON_SHIFT;
03159 assert(-512<b && b<512);
03160
03161 sum += (w*b)*(w*b)>>4;
03162 }
03163 return sum>>2;
03164 }
03165
03166 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
03167 int i;
03168
03169 for(i=0; i<8*8; i++){
03170 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
03171 }
03172 }
03173
03182 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
03183 {
03184 int i;
03185 DCTELEM temp[64];
03186
03187 if(last<=0) return;
03188
03189
03190 for(i=0; i<=last; i++){
03191 const int j= scantable[i];
03192 temp[j]= block[j];
03193 block[j]=0;
03194 }
03195
03196 for(i=0; i<=last; i++){
03197 const int j= scantable[i];
03198 const int perm_j= permutation[j];
03199 block[perm_j]= temp[j];
03200 }
03201 }
03202
03203 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
03204 return 0;
03205 }
03206
03207 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
03208 int i;
03209
03210 memset(cmp, 0, sizeof(void*)*5);
03211
03212 for(i=0; i<5; i++){
03213 switch(type&0xFF){
03214 case FF_CMP_SAD:
03215 cmp[i]= c->sad[i];
03216 break;
03217 case FF_CMP_SATD:
03218 cmp[i]= c->hadamard8_diff[i];
03219 break;
03220 case FF_CMP_SSE:
03221 cmp[i]= c->sse[i];
03222 break;
03223 case FF_CMP_DCT:
03224 cmp[i]= c->dct_sad[i];
03225 break;
03226 case FF_CMP_DCT264:
03227 cmp[i]= c->dct264_sad[i];
03228 break;
03229 case FF_CMP_DCTMAX:
03230 cmp[i]= c->dct_max[i];
03231 break;
03232 case FF_CMP_PSNR:
03233 cmp[i]= c->quant_psnr[i];
03234 break;
03235 case FF_CMP_BIT:
03236 cmp[i]= c->bit[i];
03237 break;
03238 case FF_CMP_RD:
03239 cmp[i]= c->rd[i];
03240 break;
03241 case FF_CMP_VSAD:
03242 cmp[i]= c->vsad[i];
03243 break;
03244 case FF_CMP_VSSE:
03245 cmp[i]= c->vsse[i];
03246 break;
03247 case FF_CMP_ZERO:
03248 cmp[i]= zero_cmp;
03249 break;
03250 case FF_CMP_NSSE:
03251 cmp[i]= c->nsse[i];
03252 break;
03253 #ifdef CONFIG_SNOW_ENCODER
03254 case FF_CMP_W53:
03255 cmp[i]= c->w53[i];
03256 break;
03257 case FF_CMP_W97:
03258 cmp[i]= c->w97[i];
03259 break;
03260 #endif
03261 default:
03262 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
03263 }
03264 }
03265 }
03266
03270 static void clear_blocks_c(DCTELEM *blocks)
03271 {
03272 memset(blocks, 0, sizeof(DCTELEM)*6*64);
03273 }
03274
03275 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
03276 int i;
03277 for(i=0; i+7<w; i+=8){
03278 dst[i+0] += src[i+0];
03279 dst[i+1] += src[i+1];
03280 dst[i+2] += src[i+2];
03281 dst[i+3] += src[i+3];
03282 dst[i+4] += src[i+4];
03283 dst[i+5] += src[i+5];
03284 dst[i+6] += src[i+6];
03285 dst[i+7] += src[i+7];
03286 }
03287 for(; i<w; i++)
03288 dst[i+0] += src[i+0];
03289 }
03290
03291 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
03292 int i;
03293 for(i=0; i+7<w; i+=8){
03294 dst[i+0] = src1[i+0]-src2[i+0];
03295 dst[i+1] = src1[i+1]-src2[i+1];
03296 dst[i+2] = src1[i+2]-src2[i+2];
03297 dst[i+3] = src1[i+3]-src2[i+3];
03298 dst[i+4] = src1[i+4]-src2[i+4];
03299 dst[i+5] = src1[i+5]-src2[i+5];
03300 dst[i+6] = src1[i+6]-src2[i+6];
03301 dst[i+7] = src1[i+7]-src2[i+7];
03302 }
03303 for(; i<w; i++)
03304 dst[i+0] = src1[i+0]-src2[i+0];
03305 }
03306
03307 static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
03308 int i;
03309 uint8_t l, lt;
03310
03311 l= *left;
03312 lt= *left_top;
03313
03314 for(i=0; i<w; i++){
03315 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
03316 lt= src1[i];
03317 l= src2[i];
03318 dst[i]= l - pred;
03319 }
03320
03321 *left= l;
03322 *left_top= lt;
03323 }
03324
03325 #define BUTTERFLY2(o1,o2,i1,i2) \
03326 o1= (i1)+(i2);\
03327 o2= (i1)-(i2);
03328
03329 #define BUTTERFLY1(x,y) \
03330 {\
03331 int a,b;\
03332 a= x;\
03333 b= y;\
03334 x= a+b;\
03335 y= a-b;\
03336 }
03337
03338 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
03339
03340 static int hadamard8_diff8x8_c( void *s, uint8_t *dst, uint8_t *src, int stride, int h){
03341 int i;
03342 int temp[64];
03343 int sum=0;
03344
03345 assert(h==8);
03346
03347 for(i=0; i<8; i++){
03348
03349 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
03350 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
03351 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
03352 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
03353
03354 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
03355 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
03356 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
03357 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
03358
03359 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
03360 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
03361 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
03362 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
03363 }
03364
03365 for(i=0; i<8; i++){
03366 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
03367 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
03368 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
03369 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
03370
03371 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
03372 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
03373 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
03374 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
03375
03376 sum +=
03377 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
03378 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
03379 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
03380 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
03381 }
03382 #if 0
03383 static int maxi=0;
03384 if(sum>maxi){
03385 maxi=sum;
03386 printf("MAX:%d\n", maxi);
03387 }
03388 #endif
03389 return sum;
03390 }
03391
03392 static int hadamard8_intra8x8_c( void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
03393 int i;
03394 int temp[64];
03395 int sum=0;
03396
03397 assert(h==8);
03398
03399 for(i=0; i<8; i++){
03400
03401 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
03402 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
03403 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
03404 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
03405
03406 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
03407 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
03408 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
03409 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
03410
03411 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
03412 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
03413 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
03414 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
03415 }
03416
03417 for(i=0; i<8; i++){
03418 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
03419 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
03420 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
03421 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
03422
03423 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
03424 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
03425 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
03426 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
03427
03428 sum +=
03429 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
03430 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
03431 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
03432 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
03433 }
03434
03435 sum -= FFABS(temp[8*0] + temp[8*4]);
03436
03437 return sum;
03438 }
03439
03440 static int dct_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03441 MpegEncContext * const s= (MpegEncContext *)c;
03442 DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
03443 DCTELEM * const temp= (DCTELEM*)aligned_temp;
03444
03445 assert(h==8);
03446
03447 s->dsp.diff_pixels(temp, src1, src2, stride);
03448 s->dsp.fdct(temp);
03449 return s->dsp.sum_abs_dctelem(temp);
03450 }
03451
03452 #ifdef CONFIG_GPL
03453 #define DCT8_1D {\
03454 const int s07 = SRC(0) + SRC(7);\
03455 const int s16 = SRC(1) + SRC(6);\
03456 const int s25 = SRC(2) + SRC(5);\
03457 const int s34 = SRC(3) + SRC(4);\
03458 const int a0 = s07 + s34;\
03459 const int a1 = s16 + s25;\
03460 const int a2 = s07 - s34;\
03461 const int a3 = s16 - s25;\
03462 const int d07 = SRC(0) - SRC(7);\
03463 const int d16 = SRC(1) - SRC(6);\
03464 const int d25 = SRC(2) - SRC(5);\
03465 const int d34 = SRC(3) - SRC(4);\
03466 const int a4 = d16 + d25 + (d07 + (d07>>1));\
03467 const int a5 = d07 - d34 - (d25 + (d25>>1));\
03468 const int a6 = d07 + d34 - (d16 + (d16>>1));\
03469 const int a7 = d16 - d25 + (d34 + (d34>>1));\
03470 DST(0, a0 + a1 ) ;\
03471 DST(1, a4 + (a7>>2)) ;\
03472 DST(2, a2 + (a3>>1)) ;\
03473 DST(3, a5 + (a6>>2)) ;\
03474 DST(4, a0 - a1 ) ;\
03475 DST(5, a6 - (a5>>2)) ;\
03476 DST(6, (a2>>1) - a3 ) ;\
03477 DST(7, (a4>>2) - a7 ) ;\
03478 }
03479
03480 static int dct264_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03481 MpegEncContext * const s= (MpegEncContext *)c;
03482 DCTELEM dct[8][8];
03483 int i;
03484 int sum=0;
03485
03486 s->dsp.diff_pixels(dct[0], src1, src2, stride);
03487
03488 #define SRC(x) dct[i][x]
03489 #define DST(x,v) dct[i][x]= v
03490 for( i = 0; i < 8; i++ )
03491 DCT8_1D
03492 #undef SRC
03493 #undef DST
03494
03495 #define SRC(x) dct[x][i]
03496 #define DST(x,v) sum += FFABS(v)
03497 for( i = 0; i < 8; i++ )
03498 DCT8_1D
03499 #undef SRC
03500 #undef DST
03501 return sum;
03502 }
03503 #endif
03504
03505 static int dct_max8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03506 MpegEncContext * const s= (MpegEncContext *)c;
03507 DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
03508 DCTELEM * const temp= (DCTELEM*)aligned_temp;
03509 int sum=0, i;
03510
03511 assert(h==8);
03512
03513 s->dsp.diff_pixels(temp, src1, src2, stride);
03514 s->dsp.fdct(temp);
03515
03516 for(i=0; i<64; i++)
03517 sum= FFMAX(sum, FFABS(temp[i]));
03518
03519 return sum;
03520 }
03521
03522 static int quant_psnr8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03523 MpegEncContext * const s= (MpegEncContext *)c;
03524 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64*2/8]);
03525 DCTELEM * const temp= (DCTELEM*)aligned_temp;
03526 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
03527 int sum=0, i;
03528
03529 assert(h==8);
03530 s->mb_intra=0;
03531
03532 s->dsp.diff_pixels(temp, src1, src2, stride);
03533
03534 memcpy(bak, temp, 64*sizeof(DCTELEM));
03535
03536 s->block_last_index[0]= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03537 s->dct_unquantize_inter(s, temp, 0, s->qscale);
03538 ff_simple_idct(temp);
03539
03540 for(i=0; i<64; i++)
03541 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
03542
03543 return sum;
03544 }
03545
03546 static int rd8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03547 MpegEncContext * const s= (MpegEncContext *)c;
03548 const uint8_t *scantable= s->intra_scantable.permutated;
03549 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
03550 DECLARE_ALIGNED_8 (uint64_t, aligned_bak[stride]);
03551 DCTELEM * const temp= (DCTELEM*)aligned_temp;
03552 uint8_t * const bak= (uint8_t*)aligned_bak;
03553 int i, last, run, bits, level, distoration, start_i;
03554 const int esc_length= s->ac_esc_length;
03555 uint8_t * length;
03556 uint8_t * last_length;
03557
03558 assert(h==8);
03559
03560 for(i=0; i<8; i++){
03561 ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0];
03562 ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1];
03563 }
03564
03565 s->dsp.diff_pixels(temp, src1, src2, stride);
03566
03567 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03568
03569 bits=0;
03570
03571 if (s->mb_intra) {
03572 start_i = 1;
03573 length = s->intra_ac_vlc_length;
03574 last_length= s->intra_ac_vlc_last_length;
03575 bits+= s->luma_dc_vlc_length[temp[0] + 256];
03576 } else {
03577 start_i = 0;
03578 length = s->inter_ac_vlc_length;
03579 last_length= s->inter_ac_vlc_last_length;
03580 }
03581
03582 if(last>=start_i){
03583 run=0;
03584 for(i=start_i; i<last; i++){
03585 int j= scantable[i];
03586 level= temp[j];
03587
03588 if(level){
03589 level+=64;
03590 if((level&(~127)) == 0){
03591 bits+= length[UNI_AC_ENC_INDEX(run, level)];
03592 }else
03593 bits+= esc_length;
03594 run=0;
03595 }else
03596 run++;
03597 }
03598 i= scantable[last];
03599
03600 level= temp[i] + 64;
03601
03602 assert(level - 64);
03603
03604 if((level&(~127)) == 0){
03605 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
03606 }else
03607 bits+= esc_length;
03608
03609 }
03610
03611 if(last>=0){
03612 if(s->mb_intra)
03613 s->dct_unquantize_intra(s, temp, 0, s->qscale);
03614 else
03615 s->dct_unquantize_inter(s, temp, 0, s->qscale);
03616 }
03617
03618 s->dsp.idct_add(bak, stride, temp);
03619
03620 distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8);
03621
03622 return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7);
03623 }
03624
03625 static int bit8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03626 MpegEncContext * const s= (MpegEncContext *)c;
03627 const uint8_t *scantable= s->intra_scantable.permutated;
03628 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
03629 DCTELEM * const temp= (DCTELEM*)aligned_temp;
03630 int i, last, run, bits, level, start_i;
03631 const int esc_length= s->ac_esc_length;
03632 uint8_t * length;
03633 uint8_t * last_length;
03634
03635 assert(h==8);
03636
03637 s->dsp.diff_pixels(temp, src1, src2, stride);
03638
03639 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03640
03641 bits=0;
03642
03643 if (s->mb_intra) {
03644 start_i = 1;
03645 length = s->intra_ac_vlc_length;
03646 last_length= s->intra_ac_vlc_last_length;
03647 bits+= s->luma_dc_vlc_length[temp[0] + 256];
03648 } else {
03649 start_i = 0;
03650 length = s->inter_ac_vlc_length;
03651 last_length= s->inter_ac_vlc_last_length;
03652 }
03653
03654 if(last>=start_i){
03655 run=0;
03656 for(i=start_i; i<last; i++){
03657 int j= scantable[i];
03658 level= temp[j];
03659
03660 if(level){
03661 level+=64;
03662 if((level&(~127)) == 0){
03663 bits+= length[UNI_AC_ENC_INDEX(run, level)];
03664 }else
03665 bits+= esc_length;
03666 run=0;
03667 }else
03668 run++;
03669 }
03670 i= scantable[last];
03671
03672 level= temp[i] + 64;
03673
03674 assert(level - 64);
03675
03676 if((level&(~127)) == 0){
03677 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
03678 }else
03679 bits+= esc_length;
03680 }
03681
03682 return bits;
03683 }
03684
03685 static int vsad_intra16_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
03686 int score=0;
03687 int x,y;
03688
03689 for(y=1; y<h; y++){
03690 for(x=0; x<16; x+=4){
03691 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride])
03692 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]);
03693 }
03694 s+= stride;
03695 }
03696
03697 return score;
03698 }
03699
03700 static int vsad16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
03701 int score=0;
03702 int x,y;
03703
03704 for(y=1; y<h; y++){
03705 for(x=0; x<16; x++){
03706 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
03707 }
03708 s1+= stride;
03709 s2+= stride;
03710 }
03711
03712 return score;
03713 }
03714
03715 #define SQ(a) ((a)*(a))
03716 static int vsse_intra16_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
03717 int score=0;
03718 int x,y;
03719
03720 for(y=1; y<h; y++){
03721 for(x=0; x<16; x+=4){
03722 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride])
03723 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
03724 }
03725 s+= stride;
03726 }
03727
03728 return score;
03729 }
03730
03731 static int vsse16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
03732 int score=0;
03733 int x,y;
03734
03735 for(y=1; y<h; y++){
03736 for(x=0; x<16; x++){
03737 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
03738 }
03739 s1+= stride;
03740 s2+= stride;
03741 }
03742
03743 return score;
03744 }
03745
03746 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
03747 int size){
03748 int score=0;
03749 int i;
03750 for(i=0; i<size; i++)
03751 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
03752 return score;
03753 }
03754
03755 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
03756 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
03757 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
03758 #ifdef CONFIG_GPL
03759 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
03760 #endif
03761 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
03762 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
03763 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
03764 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
03765
03766 static void vector_fmul_c(float *dst, const float *src, int len){
03767 int i;
03768 for(i=0; i<len; i++)
03769 dst[i] *= src[i];
03770 }
03771
03772 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
03773 int i;
03774 src1 += len-1;
03775 for(i=0; i<len; i++)
03776 dst[i] = src0[i] * src1[-i];
03777 }
03778
03779 void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step){
03780 int i;
03781 for(i=0; i<len; i++)
03782 dst[i*step] = src0[i] * src1[i] + src2[i] + src3;
03783 }
03784
03785 void ff_float_to_int16_c(int16_t *dst, const float *src, int len){
03786 int i;
03787 for(i=0; i<len; i++) {
03788 int_fast32_t tmp = ((const int32_t*)src)[i];
03789 if(tmp & 0xf0000){
03790 tmp = (0x43c0ffff - tmp)>>31;
03791
03792
03793
03794 }
03795 dst[i] = tmp - 0x8000;
03796 }
03797 }
03798
03799 #define W0 2048
03800 #define W1 2841
03801 #define W2 2676
03802 #define W3 2408
03803 #define W4 2048
03804 #define W5 1609
03805 #define W6 1108
03806 #define W7 565
03807
03808 static void wmv2_idct_row(short * b)
03809 {
03810 int s1,s2;
03811 int a0,a1,a2,a3,a4,a5,a6,a7;
03812
03813 a1 = W1*b[1]+W7*b[7];
03814 a7 = W7*b[1]-W1*b[7];
03815 a5 = W5*b[5]+W3*b[3];
03816 a3 = W3*b[5]-W5*b[3];
03817 a2 = W2*b[2]+W6*b[6];
03818 a6 = W6*b[2]-W2*b[6];
03819 a0 = W0*b[0]+W0*b[4];
03820 a4 = W0*b[0]-W0*b[4];
03821
03822 s1 = (181*(a1-a5+a7-a3)+128)>>8;
03823 s2 = (181*(a1-a5-a7+a3)+128)>>8;
03824
03825 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
03826 b[1] = (a4+a6 +s1 + (1<<7))>>8;
03827 b[2] = (a4-a6 +s2 + (1<<7))>>8;
03828 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
03829 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
03830 b[5] = (a4-a6 -s2 + (1<<7))>>8;
03831 b[6] = (a4+a6 -s1 + (1<<7))>>8;
03832 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
03833 }
03834 static void wmv2_idct_col(short * b)
03835 {
03836 int s1,s2;
03837 int a0,a1,a2,a3,a4,a5,a6,a7;
03838
03839 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
03840 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
03841 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
03842 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
03843 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
03844 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
03845 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
03846 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
03847
03848 s1 = (181*(a1-a5+a7-a3)+128)>>8;
03849 s2 = (181*(a1-a5-a7+a3)+128)>>8;
03850
03851 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
03852 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
03853 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
03854 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
03855
03856 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
03857 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
03858 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
03859 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
03860 }
03861 void ff_wmv2_idct_c(short * block){
03862 int i;
03863
03864 for(i=0;i<64;i+=8){
03865 wmv2_idct_row(block+i);
03866 }
03867 for(i=0;i<8;i++){
03868 wmv2_idct_col(block+i);
03869 }
03870 }
03871
03872
03873 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
03874 {
03875 ff_wmv2_idct_c(block);
03876 put_pixels_clamped_c(block, dest, line_size);
03877 }
03878 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
03879 {
03880 ff_wmv2_idct_c(block);
03881 add_pixels_clamped_c(block, dest, line_size);
03882 }
03883 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
03884 {
03885 j_rev_dct (block);
03886 put_pixels_clamped_c(block, dest, line_size);
03887 }
03888 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
03889 {
03890 j_rev_dct (block);
03891 add_pixels_clamped_c(block, dest, line_size);
03892 }
03893
03894 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
03895 {
03896 j_rev_dct4 (block);
03897 put_pixels_clamped4_c(block, dest, line_size);
03898 }
03899 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
03900 {
03901 j_rev_dct4 (block);
03902 add_pixels_clamped4_c(block, dest, line_size);
03903 }
03904
03905 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
03906 {
03907 j_rev_dct2 (block);
03908 put_pixels_clamped2_c(block, dest, line_size);
03909 }
03910 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
03911 {
03912 j_rev_dct2 (block);
03913 add_pixels_clamped2_c(block, dest, line_size);
03914 }
03915
03916 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
03917 {
03918 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
03919
03920 dest[0] = cm[(block[0] + 4)>>3];
03921 }
03922 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
03923 {
03924 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
03925
03926 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
03927 }
03928
03929 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
03930
03931
03932 void dsputil_static_init(void)
03933 {
03934 int i;
03935
03936 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
03937 for(i=0;i<MAX_NEG_CROP;i++) {
03938 ff_cropTbl[i] = 0;
03939 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
03940 }
03941
03942 for(i=0;i<512;i++) {
03943 ff_squareTbl[i] = (i - 256) * (i - 256);
03944 }
03945
03946 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
03947 }
03948
03949 int ff_check_alignment(void){
03950 static int did_fail=0;
03951 DECLARE_ALIGNED_16(int, aligned);
03952
03953 if((long)&aligned & 15){
03954 if(!did_fail){
03955 #if defined(HAVE_MMX) || defined(HAVE_ALTIVEC)
03956 av_log(NULL, AV_LOG_ERROR,
03957 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
03958 "and may be very slow or crash. This is not a bug in libavcodec,\n"
03959 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
03960 "Do not report crashes to FFmpeg developers.\n");
03961 #endif
03962 did_fail=1;
03963 }
03964 return -1;
03965 }
03966 return 0;
03967 }
03968
03969 void dsputil_init(DSPContext* c, AVCodecContext *avctx)
03970 {
03971 int i;
03972
03973 ff_check_alignment();
03974
03975 #ifdef CONFIG_ENCODERS
03976 if(avctx->dct_algo==FF_DCT_FASTINT) {
03977 c->fdct = fdct_ifast;
03978 c->fdct248 = fdct_ifast248;
03979 }
03980 else if(avctx->dct_algo==FF_DCT_FAAN) {
03981 c->fdct = ff_faandct;
03982 c->fdct248 = ff_faandct248;
03983 }
03984 else {
03985 c->fdct = ff_jpeg_fdct_islow;
03986 c->fdct248 = ff_fdct248_islow;
03987 }
03988 #endif //CONFIG_ENCODERS
03989
03990 if(avctx->lowres==1){
03991 if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !ENABLE_H264_DECODER){
03992 c->idct_put= ff_jref_idct4_put;
03993 c->idct_add= ff_jref_idct4_add;
03994 }else{
03995 c->idct_put= ff_h264_lowres_idct_put_c;
03996 c->idct_add= ff_h264_lowres_idct_add_c;
03997 }
03998 c->idct = j_rev_dct4;
03999 c->idct_permutation_type= FF_NO_IDCT_PERM;
04000 }else if(avctx->lowres==2){
04001 c->idct_put= ff_jref_idct2_put;
04002 c->idct_add= ff_jref_idct2_add;
04003 c->idct = j_rev_dct2;
04004 c->idct_permutation_type= FF_NO_IDCT_PERM;
04005 }else if(avctx->lowres==3){
04006 c->idct_put= ff_jref_idct1_put;
04007 c->idct_add= ff_jref_idct1_add;
04008 c->idct = j_rev_dct1;
04009 c->idct_permutation_type= FF_NO_IDCT_PERM;
04010 }else{
04011 if(avctx->idct_algo==FF_IDCT_INT){
04012 c->idct_put= ff_jref_idct_put;
04013 c->idct_add= ff_jref_idct_add;
04014 c->idct = j_rev_dct;
04015 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
04016 }else if((ENABLE_VP3_DECODER || ENABLE_VP5_DECODER || ENABLE_VP6_DECODER || ENABLE_THEORA_DECODER ) &&
04017 avctx->idct_algo==FF_IDCT_VP3){
04018 c->idct_put= ff_vp3_idct_put_c;
04019 c->idct_add= ff_vp3_idct_add_c;
04020 c->idct = ff_vp3_idct_c;
04021 c->idct_permutation_type= FF_NO_IDCT_PERM;
04022 }else if(avctx->idct_algo==FF_IDCT_WMV2){
04023 c->idct_put= ff_wmv2_idct_put_c;
04024 c->idct_add= ff_wmv2_idct_add_c;
04025 c->idct = ff_wmv2_idct_c;
04026 c->idct_permutation_type= FF_NO_IDCT_PERM;
04027 }else{
04028 c->idct_put= ff_simple_idct_put;
04029 c->idct_add= ff_simple_idct_add;
04030 c->idct = ff_simple_idct;
04031 c->idct_permutation_type= FF_NO_IDCT_PERM;
04032 }
04033 }
04034
04035 if (ENABLE_H264_DECODER) {
04036 c->h264_idct_add= ff_h264_idct_add_c;
04037 c->h264_idct8_add= ff_h264_idct8_add_c;
04038 c->h264_idct_dc_add= ff_h264_idct_dc_add_c;
04039 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c;
04040 }
04041
04042 c->get_pixels = get_pixels_c;
04043 c->diff_pixels = diff_pixels_c;
04044 c->put_pixels_clamped = put_pixels_clamped_c;
04045 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
04046 c->add_pixels_clamped = add_pixels_clamped_c;
04047 c->add_pixels8 = add_pixels8_c;
04048 c->add_pixels4 = add_pixels4_c;
04049 c->sum_abs_dctelem = sum_abs_dctelem_c;
04050 c->gmc1 = gmc1_c;
04051 c->gmc = ff_gmc_c;
04052 c->clear_blocks = clear_blocks_c;
04053 c->pix_sum = pix_sum_c;
04054 c->pix_norm1 = pix_norm1_c;
04055
04056
04057 c->pix_abs[0][0] = pix_abs16_c;
04058 c->pix_abs[0][1] = pix_abs16_x2_c;
04059 c->pix_abs[0][2] = pix_abs16_y2_c;
04060 c->pix_abs[0][3] = pix_abs16_xy2_c;
04061 c->pix_abs[1][0] = pix_abs8_c;
04062 c->pix_abs[1][1] = pix_abs8_x2_c;
04063 c->pix_abs[1][2] = pix_abs8_y2_c;
04064 c->pix_abs[1][3] = pix_abs8_xy2_c;
04065
04066 #define dspfunc(PFX, IDX, NUM) \
04067 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
04068 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
04069 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
04070 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
04071
04072 dspfunc(put, 0, 16);
04073 dspfunc(put_no_rnd, 0, 16);
04074 dspfunc(put, 1, 8);
04075 dspfunc(put_no_rnd, 1, 8);
04076 dspfunc(put, 2, 4);
04077 dspfunc(put, 3, 2);
04078
04079 dspfunc(avg, 0, 16);
04080 dspfunc(avg_no_rnd, 0, 16);
04081 dspfunc(avg, 1, 8);
04082 dspfunc(avg_no_rnd, 1, 8);
04083 dspfunc(avg, 2, 4);
04084 dspfunc(avg, 3, 2);
04085 #undef dspfunc
04086
04087 c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
04088 c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
04089
04090 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
04091 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
04092 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
04093 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
04094 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
04095 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
04096 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
04097 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
04098 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
04099
04100 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
04101 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
04102 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
04103 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
04104 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
04105 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
04106 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
04107 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
04108 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
04109
04110 #define dspfunc(PFX, IDX, NUM) \
04111 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
04112 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
04113 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
04114 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
04115 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
04116 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
04117 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
04118 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
04119 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
04120 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
04121 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
04122 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
04123 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
04124 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
04125 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
04126 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
04127
04128 dspfunc(put_qpel, 0, 16);
04129 dspfunc(put_no_rnd_qpel, 0, 16);
04130
04131 dspfunc(avg_qpel, 0, 16);
04132
04133
04134 dspfunc(put_qpel, 1, 8);
04135 dspfunc(put_no_rnd_qpel, 1, 8);
04136
04137 dspfunc(avg_qpel, 1, 8);
04138
04139
04140 dspfunc(put_h264_qpel, 0, 16);
04141 dspfunc(put_h264_qpel, 1, 8);
04142 dspfunc(put_h264_qpel, 2, 4);
04143 dspfunc(put_h264_qpel, 3, 2);
04144 dspfunc(avg_h264_qpel, 0, 16);
04145 dspfunc(avg_h264_qpel, 1, 8);
04146 dspfunc(avg_h264_qpel, 2, 4);
04147
04148 #undef dspfunc
04149 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
04150 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
04151 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
04152 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
04153 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
04154 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
04155 c->put_no_rnd_h264_chroma_pixels_tab[0]= put_no_rnd_h264_chroma_mc8_c;
04156
04157 c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c;
04158 c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c;
04159 c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c;
04160 c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c;
04161 c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c;
04162 c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c;
04163 c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c;
04164 c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c;
04165 c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c;
04166 c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c;
04167 c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c;
04168 c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c;
04169 c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c;
04170 c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c;
04171 c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c;
04172 c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c;
04173 c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c;
04174 c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c;
04175 c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c;
04176 c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c;
04177
04178 #ifdef CONFIG_CAVS_DECODER
04179 ff_cavsdsp_init(c,avctx);
04180 #endif
04181 #if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
04182 ff_vc1dsp_init(c,avctx);
04183 #endif
04184 #if defined(CONFIG_WMV2_DECODER) || defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
04185 ff_intrax8dsp_init(c,avctx);
04186 #endif
04187 #if defined(CONFIG_H264_ENCODER)
04188 ff_h264dspenc_init(c,avctx);
04189 #endif
04190
04191 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
04192 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
04193 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
04194 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
04195 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
04196 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
04197 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
04198 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
04199
04200 #define SET_CMP_FUNC(name) \
04201 c->name[0]= name ## 16_c;\
04202 c->name[1]= name ## 8x8_c;
04203
04204 SET_CMP_FUNC(hadamard8_diff)
04205 c->hadamard8_diff[4]= hadamard8_intra16_c;
04206 SET_CMP_FUNC(dct_sad)
04207 SET_CMP_FUNC(dct_max)
04208 #ifdef CONFIG_GPL
04209 SET_CMP_FUNC(dct264_sad)
04210 #endif
04211 c->sad[0]= pix_abs16_c;
04212 c->sad[1]= pix_abs8_c;
04213 c->sse[0]= sse16_c;
04214 c->sse[1]= sse8_c;
04215 c->sse[2]= sse4_c;
04216 SET_CMP_FUNC(quant_psnr)
04217 SET_CMP_FUNC(rd)
04218 SET_CMP_FUNC(bit)
04219 c->vsad[0]= vsad16_c;
04220 c->vsad[4]= vsad_intra16_c;
04221 c->vsse[0]= vsse16_c;
04222 c->vsse[4]= vsse_intra16_c;
04223 c->nsse[0]= nsse16_c;
04224 c->nsse[1]= nsse8_c;
04225 #ifdef CONFIG_SNOW_ENCODER
04226 c->w53[0]= w53_16_c;
04227 c->w53[1]= w53_8_c;
04228 c->w97[0]= w97_16_c;
04229 c->w97[1]= w97_8_c;
04230 #endif
04231
04232 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
04233
04234 c->add_bytes= add_bytes_c;
04235 c->diff_bytes= diff_bytes_c;
04236 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
04237 c->bswap_buf= bswap_buf;
04238
04239 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c;
04240 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c;
04241 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c;
04242 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;
04243 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c;
04244 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c;
04245 c->h264_loop_filter_strength= NULL;
04246
04247 if (ENABLE_ANY_H263) {
04248 c->h263_h_loop_filter= h263_h_loop_filter_c;
04249 c->h263_v_loop_filter= h263_v_loop_filter_c;
04250 }
04251
04252 c->h261_loop_filter= h261_loop_filter_c;
04253
04254 c->try_8x8basis= try_8x8basis_c;
04255 c->add_8x8basis= add_8x8basis_c;
04256
04257 #ifdef CONFIG_SNOW_DECODER
04258 c->vertical_compose97i = ff_snow_vertical_compose97i;
04259 c->horizontal_compose97i = ff_snow_horizontal_compose97i;
04260 c->inner_add_yblock = ff_snow_inner_add_yblock;
04261 #endif
04262
04263 #ifdef CONFIG_VORBIS_DECODER
04264 c->vorbis_inverse_coupling = vorbis_inverse_coupling;
04265 #endif
04266 #ifdef CONFIG_FLAC_ENCODER
04267 c->flac_compute_autocorr = ff_flac_compute_autocorr;
04268 #endif
04269 c->vector_fmul = vector_fmul_c;
04270 c->vector_fmul_reverse = vector_fmul_reverse_c;
04271 c->vector_fmul_add_add = ff_vector_fmul_add_add_c;
04272 c->float_to_int16 = ff_float_to_int16_c;
04273
04274 c->shrink[0]= ff_img_copy_plane;
04275 c->shrink[1]= ff_shrink22;
04276 c->shrink[2]= ff_shrink44;
04277 c->shrink[3]= ff_shrink88;
04278
04279 c->prefetch= just_return;
04280
04281 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
04282 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
04283
04284 if (ENABLE_MMX) dsputil_init_mmx (c, avctx);
04285 if (ENABLE_ARMV4L) dsputil_init_armv4l(c, avctx);
04286 if (ENABLE_MLIB) dsputil_init_mlib (c, avctx);
04287 if (ENABLE_VIS) dsputil_init_vis (c, avctx);
04288 if (ENABLE_ALPHA) dsputil_init_alpha (c, avctx);
04289 if (ENABLE_POWERPC) dsputil_init_ppc (c, avctx);
04290 if (ENABLE_MMI) dsputil_init_mmi (c, avctx);
04291 if (ENABLE_SH4) dsputil_init_sh4 (c, avctx);
04292 if (ENABLE_BFIN) dsputil_init_bfin (c, avctx);
04293
04294 for(i=0; i<64; i++){
04295 if(!c->put_2tap_qpel_pixels_tab[0][i])
04296 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
04297 if(!c->avg_2tap_qpel_pixels_tab[0][i])
04298 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
04299 }
04300
04301 switch(c->idct_permutation_type){
04302 case FF_NO_IDCT_PERM:
04303 for(i=0; i<64; i++)
04304 c->idct_permutation[i]= i;
04305 break;
04306 case FF_LIBMPEG2_IDCT_PERM:
04307 for(i=0; i<64; i++)
04308 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
04309 break;
04310 case FF_SIMPLE_IDCT_PERM:
04311 for(i=0; i<64; i++)
04312 c->idct_permutation[i]= simple_mmx_permutation[i];
04313 break;
04314 case FF_TRANSPOSE_IDCT_PERM:
04315 for(i=0; i<64; i++)
04316 c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
04317 break;
04318 case FF_PARTTRANS_IDCT_PERM:
04319 for(i=0; i<64; i++)
04320 c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
04321 break;
04322 default:
04323 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
04324 }
04325 }
04326