00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #define PIXOP2(OPNAME, OP) \
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081 \
00082 \
00083 static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00084 {\
00085 do {\
00086 OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \
00087 src1+=src_stride1; \
00088 src2+=src_stride2; \
00089 dst+=dst_stride; \
00090 } while(--h); \
00091 }\
00092 \
00093 static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00094 {\
00095 do {\
00096 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
00097 src1+=src_stride1; \
00098 src2+=src_stride2; \
00099 dst+=dst_stride; \
00100 } while(--h); \
00101 }\
00102 \
00103 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00104 {\
00105 do {\
00106 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
00107 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \
00108 OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),LP(src2+8)) ); \
00109 OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),LP(src2+12)) ); \
00110 src1+=src_stride1; \
00111 src2+=src_stride2; \
00112 dst+=dst_stride; \
00113 } while(--h); \
00114 }\
00115 \
00116 static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00117 {\
00118 do {\
00119 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
00120 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \
00121 OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),LP(src2+8)) ); \
00122 OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),LP(src2+12)) ); \
00123 src1+=src_stride1; \
00124 src2+=src_stride2; \
00125 dst+=dst_stride; \
00126 } while(--h); \
00127 }\
00128 \
00129 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00130 {\
00131 do { \
00132 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
00133 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \
00134 src1+=src_stride1; \
00135 src2+=src_stride2; \
00136 dst+=dst_stride; \
00137 } while(--h); \
00138 }\
00139 \
00140 static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00141 {\
00142 do {\
00143 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
00144 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \
00145 src1+=src_stride1; \
00146 src2+=src_stride2; \
00147 dst+=dst_stride; \
00148 } while(--h); \
00149 }\
00150 \
00151 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00152 {\
00153 do {\
00154 OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \
00155 OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \
00156 src1+=src_stride1; \
00157 src2+=src_stride2; \
00158 dst+=dst_stride; \
00159 } while(--h); \
00160 }\
00161 \
00162 static inline void OPNAME ## _pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00163 {\
00164 do {\
00165 OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \
00166 OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \
00167 src1+=src_stride1; \
00168 src2+=src_stride2; \
00169 dst+=dst_stride; \
00170 } while(--h); \
00171 }\
00172 \
00173 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00174 {\
00175 do {\
00176 OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \
00177 OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \
00178 OP(LP(dst+8),no_rnd_avg32(LP(src1+8),LP(src2+8)) ); \
00179 OP(LP(dst+12),no_rnd_avg32(LP(src1+12),LP(src2+12)) ); \
00180 src1+=src_stride1; \
00181 src2+=src_stride2; \
00182 dst+=dst_stride; \
00183 } while(--h); \
00184 }\
00185 \
00186 static inline void OPNAME ## _pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00187 {\
00188 do {\
00189 OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \
00190 OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \
00191 OP(LP(dst+8),rnd_avg32(LP(src1+8),LP(src2+8)) ); \
00192 OP(LP(dst+12),rnd_avg32(LP(src1+12),LP(src2+12)) ); \
00193 src1+=src_stride1; \
00194 src2+=src_stride2; \
00195 dst+=dst_stride; \
00196 } while(--h); \
00197 }\
00198 \
00199 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00200 { OPNAME ## _no_rnd_pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
00201 \
00202 static inline void OPNAME ## _pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00203 { OPNAME ## _pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
00204 \
00205 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00206 { OPNAME ## _no_rnd_pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
00207 \
00208 static inline void OPNAME ## _pixels8_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00209 { OPNAME ## _pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
00210 \
00211 static inline void OPNAME ## _pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00212 do { \
00213 uint32_t a0,a1,a2,a3; \
00214 UNPACK(a0,a1,LP(src1),LP(src2)); \
00215 UNPACK(a2,a3,LP(src3),LP(src4)); \
00216 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
00217 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \
00218 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
00219 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \
00220 src1+=src_stride1;\
00221 src2+=src_stride2;\
00222 src3+=src_stride3;\
00223 src4+=src_stride4;\
00224 dst+=dst_stride;\
00225 } while(--h); \
00226 } \
00227 \
00228 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00229 do { \
00230 uint32_t a0,a1,a2,a3; \
00231 UNPACK(a0,a1,LP(src1),LP(src2)); \
00232 UNPACK(a2,a3,LP(src3),LP(src4)); \
00233 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
00234 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \
00235 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
00236 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
00237 src1+=src_stride1;\
00238 src2+=src_stride2;\
00239 src3+=src_stride3;\
00240 src4+=src_stride4;\
00241 dst+=dst_stride;\
00242 } while(--h); \
00243 } \
00244 \
00245 static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00246 do { \
00247 uint32_t a0,a1,a2,a3; \
00248 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \
00249 UNPACK(a2,a3,LP(src3),LP(src4)); \
00250 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
00251 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \
00252 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
00253 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \
00254 src1+=src_stride1;\
00255 src2+=src_stride2;\
00256 src3+=src_stride3;\
00257 src4+=src_stride4;\
00258 dst+=dst_stride;\
00259 } while(--h); \
00260 } \
00261 \
00262 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00263 do { \
00264 uint32_t a0,a1,a2,a3; \
00265 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \
00266 UNPACK(a2,a3,LP(src3),LP(src4)); \
00267 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
00268 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \
00269 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
00270 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
00271 src1+=src_stride1;\
00272 src2+=src_stride2;\
00273 src3+=src_stride3;\
00274 src4+=src_stride4;\
00275 dst+=dst_stride;\
00276 } while(--h); \
00277 } \
00278 \
00279 static inline void OPNAME ## _pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00280 do { \
00281 uint32_t a0,a1,a2,a3; \
00282 UNPACK(a0,a1,LP(src1),LP(src2)); \
00283 UNPACK(a2,a3,LP(src3),LP(src4)); \
00284 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
00285 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \
00286 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
00287 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
00288 UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \
00289 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
00290 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
00291 UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \
00292 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
00293 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \
00294 src1+=src_stride1;\
00295 src2+=src_stride2;\
00296 src3+=src_stride3;\
00297 src4+=src_stride4;\
00298 dst+=dst_stride;\
00299 } while(--h); \
00300 } \
00301 \
00302 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00303 do { \
00304 uint32_t a0,a1,a2,a3; \
00305 UNPACK(a0,a1,LP(src1),LP(src2)); \
00306 UNPACK(a2,a3,LP(src3),LP(src4)); \
00307 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
00308 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \
00309 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
00310 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
00311 UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \
00312 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
00313 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \
00314 UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \
00315 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
00316 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \
00317 src1+=src_stride1;\
00318 src2+=src_stride2;\
00319 src3+=src_stride3;\
00320 src4+=src_stride4;\
00321 dst+=dst_stride;\
00322 } while(--h); \
00323 } \
00324 \
00325 static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00326 do { \
00327 uint32_t a0,a1,a2,a3; \
00328 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \
00329 UNPACK(a2,a3,LP(src3),LP(src4)); \
00330 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
00331 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \
00332 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
00333 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
00334 UNPACK(a0,a1,AV_RN32(src1+8),LP(src2+8)); \
00335 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
00336 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
00337 UNPACK(a0,a1,AV_RN32(src1+12),LP(src2+12)); \
00338 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
00339 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \
00340 src1+=src_stride1;\
00341 src2+=src_stride2;\
00342 src3+=src_stride3;\
00343 src4+=src_stride4;\
00344 dst+=dst_stride;\
00345 } while(--h); \
00346 } \
00347 \
00348 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00349 do { \
00350 uint32_t a0,a1,a2,a3; \
00351 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \
00352 UNPACK(a2,a3,LP(src3),LP(src4)); \
00353 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
00354 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \
00355 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
00356 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
00357 UNPACK(a0,a1,AV_RN32(src1+8),LP(src2+8)); \
00358 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
00359 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \
00360 UNPACK(a0,a1,AV_RN32(src1+12),LP(src2+12)); \
00361 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
00362 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \
00363 src1+=src_stride1;\
00364 src2+=src_stride2;\
00365 src3+=src_stride3;\
00366 src4+=src_stride4;\
00367 dst+=dst_stride;\
00368 } while(--h); \
00369 } \
00370 \
00371
00372 #define op_avg(a, b) a = rnd_avg32(a,b)
00373 #define op_put(a, b) a = b
00374
00375 PIXOP2(avg, op_avg)
00376 PIXOP2(put, op_put)
00377 #undef op_avg
00378 #undef op_put
00379
00380 #define avg2(a,b) ((a+b+1)>>1)
00381 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
00382
00383
00384 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
00385 {
00386 const int A=(16-x16)*(16-y16);
00387 const int B=( x16)*(16-y16);
00388 const int C=(16-x16)*( y16);
00389 const int D=( x16)*( y16);
00390
00391 do {
00392 int t0,t1,t2,t3;
00393 uint8_t *s0 = src;
00394 uint8_t *s1 = src+stride;
00395 t0 = *s0++; t2 = *s1++;
00396 t1 = *s0++; t3 = *s1++;
00397 dst[0]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
00398 t0 = *s0++; t2 = *s1++;
00399 dst[1]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
00400 t1 = *s0++; t3 = *s1++;
00401 dst[2]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
00402 t0 = *s0++; t2 = *s1++;
00403 dst[3]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
00404 t1 = *s0++; t3 = *s1++;
00405 dst[4]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
00406 t0 = *s0++; t2 = *s1++;
00407 dst[5]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
00408 t1 = *s0++; t3 = *s1++;
00409 dst[6]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
00410 t0 = *s0++; t2 = *s1++;
00411 dst[7]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
00412 dst+= stride;
00413 src+= stride;
00414 }while(--h);
00415 }
00416
00417 static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
00418 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
00419 {
00420 int y, vx, vy;
00421 const int s= 1<<shift;
00422
00423 width--;
00424 height--;
00425
00426 for(y=0; y<h; y++){
00427 int x;
00428
00429 vx= ox;
00430 vy= oy;
00431 for(x=0; x<8; x++){
00432 int src_x, src_y, frac_x, frac_y, index;
00433
00434 src_x= vx>>16;
00435 src_y= vy>>16;
00436 frac_x= src_x&(s-1);
00437 frac_y= src_y&(s-1);
00438 src_x>>=shift;
00439 src_y>>=shift;
00440
00441 if((unsigned)src_x < width){
00442 if((unsigned)src_y < height){
00443 index= src_x + src_y*stride;
00444 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
00445 + src[index +1]* frac_x )*(s-frac_y)
00446 + ( src[index+stride ]*(s-frac_x)
00447 + src[index+stride+1]* frac_x )* frac_y
00448 + r)>>(shift*2);
00449 }else{
00450 index= src_x + av_clip(src_y, 0, height)*stride;
00451 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
00452 + src[index +1]* frac_x )*s
00453 + r)>>(shift*2);
00454 }
00455 }else{
00456 if((unsigned)src_y < height){
00457 index= av_clip(src_x, 0, width) + src_y*stride;
00458 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
00459 + src[index+stride ]* frac_y )*s
00460 + r)>>(shift*2);
00461 }else{
00462 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
00463 dst[y*stride + x]= src[index ];
00464 }
00465 }
00466
00467 vx+= dxx;
00468 vy+= dyx;
00469 }
00470 ox += dxy;
00471 oy += dyy;
00472 }
00473 }
00474 #define H264_CHROMA_MC(OPNAME, OP)\
00475 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
00476 const int A=(8-x)*(8-y);\
00477 const int B=( x)*(8-y);\
00478 const int C=(8-x)*( y);\
00479 const int D=( x)*( y);\
00480 \
00481 assert(x<8 && y<8 && x>=0 && y>=0);\
00482 \
00483 do {\
00484 int t0,t1,t2,t3; \
00485 uint8_t *s0 = src; \
00486 uint8_t *s1 = src+stride; \
00487 t0 = *s0++; t2 = *s1++; \
00488 t1 = *s0++; t3 = *s1++; \
00489 OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
00490 t0 = *s0++; t2 = *s1++; \
00491 OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
00492 dst+= stride;\
00493 src+= stride;\
00494 }while(--h);\
00495 }\
00496 \
00497 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
00498 const int A=(8-x)*(8-y);\
00499 const int B=( x)*(8-y);\
00500 const int C=(8-x)*( y);\
00501 const int D=( x)*( y);\
00502 \
00503 assert(x<8 && y<8 && x>=0 && y>=0);\
00504 \
00505 do {\
00506 int t0,t1,t2,t3; \
00507 uint8_t *s0 = src; \
00508 uint8_t *s1 = src+stride; \
00509 t0 = *s0++; t2 = *s1++; \
00510 t1 = *s0++; t3 = *s1++; \
00511 OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
00512 t0 = *s0++; t2 = *s1++; \
00513 OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
00514 t1 = *s0++; t3 = *s1++; \
00515 OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
00516 t0 = *s0++; t2 = *s1++; \
00517 OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
00518 dst+= stride;\
00519 src+= stride;\
00520 }while(--h);\
00521 }\
00522 \
00523 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
00524 const int A=(8-x)*(8-y);\
00525 const int B=( x)*(8-y);\
00526 const int C=(8-x)*( y);\
00527 const int D=( x)*( y);\
00528 \
00529 assert(x<8 && y<8 && x>=0 && y>=0);\
00530 \
00531 do {\
00532 int t0,t1,t2,t3; \
00533 uint8_t *s0 = src; \
00534 uint8_t *s1 = src+stride; \
00535 t0 = *s0++; t2 = *s1++; \
00536 t1 = *s0++; t3 = *s1++; \
00537 OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
00538 t0 = *s0++; t2 = *s1++; \
00539 OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
00540 t1 = *s0++; t3 = *s1++; \
00541 OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
00542 t0 = *s0++; t2 = *s1++; \
00543 OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
00544 t1 = *s0++; t3 = *s1++; \
00545 OP(dst[4], (A*t0 + B*t1 + C*t2 + D*t3));\
00546 t0 = *s0++; t2 = *s1++; \
00547 OP(dst[5], (A*t1 + B*t0 + C*t3 + D*t2));\
00548 t1 = *s0++; t3 = *s1++; \
00549 OP(dst[6], (A*t0 + B*t1 + C*t2 + D*t3));\
00550 t0 = *s0++; t2 = *s1++; \
00551 OP(dst[7], (A*t1 + B*t0 + C*t3 + D*t2));\
00552 dst+= stride;\
00553 src+= stride;\
00554 }while(--h);\
00555 }
00556
00557 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
00558 #define op_put(a, b) a = (((b) + 32)>>6)
00559
00560 H264_CHROMA_MC(put_ , op_put)
00561 H264_CHROMA_MC(avg_ , op_avg)
00562 #undef op_avg
00563 #undef op_put
00564
00565 #define QPEL_MC(r, OPNAME, RND, OP) \
00566 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00567 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00568 do {\
00569 uint8_t *s = src; \
00570 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
00571 src0= *s++;\
00572 src1= *s++;\
00573 src2= *s++;\
00574 src3= *s++;\
00575 src4= *s++;\
00576 OP(dst[0], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
00577 src5= *s++;\
00578 OP(dst[1], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
00579 src6= *s++;\
00580 OP(dst[2], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
00581 src7= *s++;\
00582 OP(dst[3], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
00583 src8= *s++;\
00584 OP(dst[4], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
00585 OP(dst[5], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
00586 OP(dst[6], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
00587 OP(dst[7], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
00588 dst+=dstStride;\
00589 src+=srcStride;\
00590 }while(--h);\
00591 }\
00592 \
00593 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00594 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00595 int w=8;\
00596 do{\
00597 uint8_t *s = src, *d=dst;\
00598 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
00599 src0 = *s; s+=srcStride; \
00600 src1 = *s; s+=srcStride; \
00601 src2 = *s; s+=srcStride; \
00602 src3 = *s; s+=srcStride; \
00603 src4 = *s; s+=srcStride; \
00604 OP(*d, (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));d+=dstStride;\
00605 src5 = *s; s+=srcStride; \
00606 OP(*d, (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));d+=dstStride;\
00607 src6 = *s; s+=srcStride; \
00608 OP(*d, (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));d+=dstStride;\
00609 src7 = *s; s+=srcStride; \
00610 OP(*d, (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));d+=dstStride;\
00611 src8 = *s; \
00612 OP(*d, (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));d+=dstStride;\
00613 OP(*d, (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));d+=dstStride;\
00614 OP(*d, (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));d+=dstStride;\
00615 OP(*d, (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
00616 dst++;\
00617 src++;\
00618 }while(--w);\
00619 }\
00620 \
00621 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00622 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00623 do {\
00624 uint8_t *s = src;\
00625 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
00626 int src9,src10,src11,src12,src13,src14,src15,src16;\
00627 src0= *s++;\
00628 src1= *s++;\
00629 src2= *s++;\
00630 src3= *s++;\
00631 src4= *s++;\
00632 OP(dst[ 0], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
00633 src5= *s++;\
00634 OP(dst[ 1], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
00635 src6= *s++;\
00636 OP(dst[ 2], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
00637 src7= *s++;\
00638 OP(dst[ 3], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
00639 src8= *s++;\
00640 OP(dst[ 4], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
00641 src9= *s++;\
00642 OP(dst[ 5], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
00643 src10= *s++;\
00644 OP(dst[ 6], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
00645 src11= *s++;\
00646 OP(dst[ 7], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
00647 src12= *s++;\
00648 OP(dst[ 8], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
00649 src13= *s++;\
00650 OP(dst[ 9], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
00651 src14= *s++;\
00652 OP(dst[10], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
00653 src15= *s++;\
00654 OP(dst[11], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
00655 src16= *s++;\
00656 OP(dst[12], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
00657 OP(dst[13], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
00658 OP(dst[14], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
00659 OP(dst[15], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
00660 dst+=dstStride;\
00661 src+=srcStride;\
00662 }while(--h);\
00663 }\
00664 \
00665 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00666 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00667 int w=16;\
00668 do {\
00669 uint8_t *s = src, *d=dst;\
00670 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
00671 int src9,src10,src11,src12,src13,src14,src15,src16;\
00672 src0 = *s; s+=srcStride; \
00673 src1 = *s; s+=srcStride; \
00674 src2 = *s; s+=srcStride; \
00675 src3 = *s; s+=srcStride; \
00676 src4 = *s; s+=srcStride; \
00677 OP(*d, (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));d+=dstStride;\
00678 src5 = *s; s+=srcStride; \
00679 OP(*d, (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));d+=dstStride;\
00680 src6 = *s; s+=srcStride; \
00681 OP(*d, (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));d+=dstStride;\
00682 src7 = *s; s+=srcStride; \
00683 OP(*d, (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));d+=dstStride;\
00684 src8 = *s; s+=srcStride; \
00685 OP(*d, (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));d+=dstStride;\
00686 src9 = *s; s+=srcStride; \
00687 OP(*d, (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));d+=dstStride;\
00688 src10 = *s; s+=srcStride; \
00689 OP(*d, (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));d+=dstStride;\
00690 src11 = *s; s+=srcStride; \
00691 OP(*d, (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));d+=dstStride;\
00692 src12 = *s; s+=srcStride; \
00693 OP(*d, (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));d+=dstStride;\
00694 src13 = *s; s+=srcStride; \
00695 OP(*d, (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));d+=dstStride;\
00696 src14 = *s; s+=srcStride; \
00697 OP(*d, (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));d+=dstStride;\
00698 src15 = *s; s+=srcStride; \
00699 OP(*d, (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));d+=dstStride;\
00700 src16 = *s; \
00701 OP(*d, (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));d+=dstStride;\
00702 OP(*d, (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));d+=dstStride;\
00703 OP(*d, (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));d+=dstStride;\
00704 OP(*d, (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
00705 dst++;\
00706 src++;\
00707 }while(--w);\
00708 }\
00709 \
00710 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
00711 OPNAME ## pixels8_c(dst, src, stride, 8);\
00712 }\
00713 \
00714 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
00715 uint8_t half[64];\
00716 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00717 OPNAME ## pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8);\
00718 }\
00719 \
00720 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
00721 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
00722 }\
00723 \
00724 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
00725 uint8_t half[64];\
00726 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00727 OPNAME ## pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8);\
00728 }\
00729 \
00730 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
00731 uint8_t full[16*9];\
00732 uint8_t half[64];\
00733 copy_block9(full, src, 16, stride, 9);\
00734 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00735 OPNAME ## pixels8_l2_aligned(dst, full, half, stride, 16, 8, 8);\
00736 }\
00737 \
00738 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
00739 uint8_t full[16*9];\
00740 copy_block9(full, src, 16, stride, 9);\
00741 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
00742 }\
00743 \
00744 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
00745 uint8_t full[16*9];\
00746 uint8_t half[64];\
00747 copy_block9(full, src, 16, stride, 9);\
00748 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00749 OPNAME ## pixels8_l2_aligned(dst, full+16, half, stride, 16, 8, 8);\
00750 }\
00751 static void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
00752 uint8_t full[16*9];\
00753 uint8_t halfH[72];\
00754 uint8_t halfV[64];\
00755 uint8_t halfHV[64];\
00756 copy_block9(full, src, 16, stride, 9);\
00757 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00758 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
00759 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00760 OPNAME ## pixels8_l4_aligned(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00761 }\
00762 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
00763 uint8_t full[16*9];\
00764 uint8_t halfH[72];\
00765 uint8_t halfHV[64];\
00766 copy_block9(full, src, 16, stride, 9);\
00767 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00768 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\
00769 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00770 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
00771 }\
00772 static void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
00773 uint8_t full[16*9];\
00774 uint8_t halfH[72];\
00775 uint8_t halfV[64];\
00776 uint8_t halfHV[64];\
00777 copy_block9(full, src, 16, stride, 9);\
00778 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00779 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
00780 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00781 OPNAME ## pixels8_l4_aligned0(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00782 }\
00783 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
00784 uint8_t full[16*9];\
00785 uint8_t halfH[72];\
00786 uint8_t halfHV[64];\
00787 copy_block9(full, src, 16, stride, 9);\
00788 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00789 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\
00790 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00791 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
00792 }\
00793 static void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
00794 uint8_t full[16*9];\
00795 uint8_t halfH[72];\
00796 uint8_t halfV[64];\
00797 uint8_t halfHV[64];\
00798 copy_block9(full, src, 16, stride, 9);\
00799 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00800 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
00801 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00802 OPNAME ## pixels8_l4_aligned(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00803 }\
00804 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
00805 uint8_t full[16*9];\
00806 uint8_t halfH[72];\
00807 uint8_t halfHV[64];\
00808 copy_block9(full, src, 16, stride, 9);\
00809 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00810 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\
00811 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00812 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
00813 }\
00814 static void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
00815 uint8_t full[16*9];\
00816 uint8_t halfH[72];\
00817 uint8_t halfV[64];\
00818 uint8_t halfHV[64];\
00819 copy_block9(full, src, 16, stride, 9);\
00820 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
00821 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
00822 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00823 OPNAME ## pixels8_l4_aligned0(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00824 }\
00825 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
00826 uint8_t full[16*9];\
00827 uint8_t halfH[72];\
00828 uint8_t halfHV[64];\
00829 copy_block9(full, src, 16, stride, 9);\
00830 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00831 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\
00832 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00833 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
00834 }\
00835 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
00836 uint8_t halfH[72];\
00837 uint8_t halfHV[64];\
00838 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
00839 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00840 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
00841 }\
00842 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
00843 uint8_t halfH[72];\
00844 uint8_t halfHV[64];\
00845 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
00846 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00847 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
00848 }\
00849 static void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
00850 uint8_t full[16*9];\
00851 uint8_t halfH[72];\
00852 uint8_t halfV[64];\
00853 uint8_t halfHV[64];\
00854 copy_block9(full, src, 16, stride, 9);\
00855 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00856 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
00857 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00858 OPNAME ## pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);\
00859 }\
00860 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
00861 uint8_t full[16*9];\
00862 uint8_t halfH[72];\
00863 copy_block9(full, src, 16, stride, 9);\
00864 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00865 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\
00866 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
00867 }\
00868 static void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
00869 uint8_t full[16*9];\
00870 uint8_t halfH[72];\
00871 uint8_t halfV[64];\
00872 uint8_t halfHV[64];\
00873 copy_block9(full, src, 16, stride, 9);\
00874 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00875 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
00876 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00877 OPNAME ## pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);\
00878 }\
00879 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
00880 uint8_t full[16*9];\
00881 uint8_t halfH[72];\
00882 copy_block9(full, src, 16, stride, 9);\
00883 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00884 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\
00885 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
00886 }\
00887 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
00888 uint8_t halfH[72];\
00889 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
00890 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
00891 }\
00892 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
00893 OPNAME ## pixels16_c(dst, src, stride, 16);\
00894 }\
00895 \
00896 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
00897 uint8_t half[256];\
00898 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
00899 OPNAME ## pixels16_l2_aligned2(dst, src, half, stride, stride, 16, 16);\
00900 }\
00901 \
00902 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
00903 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
00904 }\
00905 \
00906 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
00907 uint8_t half[256];\
00908 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
00909 OPNAME ## pixels16_l2_aligned2(dst, src+1, half, stride, stride, 16, 16);\
00910 }\
00911 \
00912 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
00913 uint8_t full[24*17];\
00914 uint8_t half[256];\
00915 copy_block17(full, src, 24, stride, 17);\
00916 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
00917 OPNAME ## pixels16_l2_aligned(dst, full, half, stride, 24, 16, 16);\
00918 }\
00919 \
00920 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
00921 uint8_t full[24*17];\
00922 copy_block17(full, src, 24, stride, 17);\
00923 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
00924 }\
00925 \
00926 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
00927 uint8_t full[24*17];\
00928 uint8_t half[256];\
00929 copy_block17(full, src, 24, stride, 17);\
00930 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
00931 OPNAME ## pixels16_l2_aligned(dst, full+24, half, stride, 24, 16, 16);\
00932 }\
00933 static void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
00934 uint8_t full[24*17];\
00935 uint8_t halfH[272];\
00936 uint8_t halfV[256];\
00937 uint8_t halfHV[256];\
00938 copy_block17(full, src, 24, stride, 17);\
00939 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
00940 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
00941 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
00942 OPNAME ## pixels16_l4_aligned(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
00943 }\
00944 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
00945 uint8_t full[24*17];\
00946 uint8_t halfH[272];\
00947 uint8_t halfHV[256];\
00948 copy_block17(full, src, 24, stride, 17);\
00949 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
00950 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\
00951 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
00952 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
00953 }\
00954 static void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
00955 uint8_t full[24*17];\
00956 uint8_t halfH[272];\
00957 uint8_t halfV[256];\
00958 uint8_t halfHV[256];\
00959 copy_block17(full, src, 24, stride, 17);\
00960 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
00961 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
00962 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
00963 OPNAME ## pixels16_l4_aligned0(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
00964 }\
00965 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
00966 uint8_t full[24*17];\
00967 uint8_t halfH[272];\
00968 uint8_t halfHV[256];\
00969 copy_block17(full, src, 24, stride, 17);\
00970 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
00971 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\
00972 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
00973 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
00974 }\
00975 static void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
00976 uint8_t full[24*17];\
00977 uint8_t halfH[272];\
00978 uint8_t halfV[256];\
00979 uint8_t halfHV[256];\
00980 copy_block17(full, src, 24, stride, 17);\
00981 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
00982 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
00983 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
00984 OPNAME ## pixels16_l4_aligned(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
00985 }\
00986 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
00987 uint8_t full[24*17];\
00988 uint8_t halfH[272];\
00989 uint8_t halfHV[256];\
00990 copy_block17(full, src, 24, stride, 17);\
00991 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
00992 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\
00993 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
00994 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
00995 }\
00996 static void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
00997 uint8_t full[24*17];\
00998 uint8_t halfH[272];\
00999 uint8_t halfV[256];\
01000 uint8_t halfHV[256];\
01001 copy_block17(full, src, 24, stride, 17);\
01002 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
01003 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01004 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01005 OPNAME ## pixels16_l4_aligned0(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01006 }\
01007 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01008 uint8_t full[24*17];\
01009 uint8_t halfH[272];\
01010 uint8_t halfHV[256];\
01011 copy_block17(full, src, 24, stride, 17);\
01012 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01013 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\
01014 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01015 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01016 }\
01017 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01018 uint8_t halfH[272];\
01019 uint8_t halfHV[256];\
01020 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01021 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01022 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
01023 }\
01024 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01025 uint8_t halfH[272];\
01026 uint8_t halfHV[256];\
01027 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01028 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01029 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01030 }\
01031 static void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01032 uint8_t full[24*17];\
01033 uint8_t halfH[272];\
01034 uint8_t halfV[256];\
01035 uint8_t halfHV[256];\
01036 copy_block17(full, src, 24, stride, 17);\
01037 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01038 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01039 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01040 OPNAME ## pixels16_l2_aligned(dst, halfV, halfHV, stride, 16, 16, 16);\
01041 }\
01042 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01043 uint8_t full[24*17];\
01044 uint8_t halfH[272];\
01045 copy_block17(full, src, 24, stride, 17);\
01046 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01047 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\
01048 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01049 }\
01050 static void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01051 uint8_t full[24*17];\
01052 uint8_t halfH[272];\
01053 uint8_t halfV[256];\
01054 uint8_t halfHV[256];\
01055 copy_block17(full, src, 24, stride, 17);\
01056 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01057 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01058 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01059 OPNAME ## pixels16_l2_aligned(dst, halfV, halfHV, stride, 16, 16, 16);\
01060 }\
01061 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01062 uint8_t full[24*17];\
01063 uint8_t halfH[272];\
01064 copy_block17(full, src, 24, stride, 17);\
01065 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01066 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\
01067 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01068 }\
01069 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01070 uint8_t halfH[272];\
01071 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01072 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01073 }
01074
01075 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
01076 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
01077 #define op_put(a, b) a = cm[((b) + 16)>>5]
01078 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
01079
01080 QPEL_MC(0, put_ , _ , op_put)
01081 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
01082 QPEL_MC(0, avg_ , _ , op_avg)
01083
01084 #undef op_avg
01085 #undef op_avg_no_rnd
01086 #undef op_put
01087 #undef op_put_no_rnd
01088
01089 #if 1
01090 #define H264_LOWPASS(OPNAME, OP, OP2) \
01091 static inline void OPNAME ## h264_qpel_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,int w,int h){\
01092 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01093 do {\
01094 int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\
01095 uint8_t *s = src-2;\
01096 srcB = *s++;\
01097 srcA = *s++;\
01098 src0 = *s++;\
01099 src1 = *s++;\
01100 src2 = *s++;\
01101 src3 = *s++;\
01102 OP(dst[0], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
01103 src4 = *s++;\
01104 OP(dst[1], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
01105 src5 = *s++;\
01106 OP(dst[2], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
01107 src6 = *s++;\
01108 OP(dst[3], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
01109 if (w>4) { \
01110 int src7,src8,src9,src10; \
01111 src7 = *s++;\
01112 OP(dst[4], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
01113 src8 = *s++;\
01114 OP(dst[5], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
01115 src9 = *s++;\
01116 OP(dst[6], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
01117 src10 = *s++;\
01118 OP(dst[7], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
01119 if (w>8) { \
01120 int src11,src12,src13,src14,src15,src16,src17,src18; \
01121 src11 = *s++;\
01122 OP(dst[8] , (src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));\
01123 src12 = *s++;\
01124 OP(dst[9] , (src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));\
01125 src13 = *s++;\
01126 OP(dst[10], (src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));\
01127 src14 = *s++;\
01128 OP(dst[11], (src11+src12)*20 - (src10+src13)*5 + (src9 +src14));\
01129 src15 = *s++;\
01130 OP(dst[12], (src12+src13)*20 - (src11+src14)*5 + (src10+src15));\
01131 src16 = *s++;\
01132 OP(dst[13], (src13+src14)*20 - (src12+src15)*5 + (src11+src16));\
01133 src17 = *s++;\
01134 OP(dst[14], (src14+src15)*20 - (src13+src16)*5 + (src12+src17));\
01135 src18 = *s++;\
01136 OP(dst[15], (src15+src16)*20 - (src14+src17)*5 + (src13+src18));\
01137 } \
01138 } \
01139 dst+=dstStride;\
01140 src+=srcStride;\
01141 }while(--h);\
01142 }\
01143 \
01144 static inline void OPNAME ## h264_qpel_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,int w,int h){\
01145 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01146 do{\
01147 int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\
01148 uint8_t *s = src-2*srcStride,*d=dst;\
01149 srcB = *s; s+=srcStride;\
01150 srcA = *s; s+=srcStride;\
01151 src0 = *s; s+=srcStride;\
01152 src1 = *s; s+=srcStride;\
01153 src2 = *s; s+=srcStride;\
01154 src3 = *s; s+=srcStride;\
01155 OP(*d, (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));d+=dstStride;\
01156 src4 = *s; s+=srcStride;\
01157 OP(*d, (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));d+=dstStride;\
01158 src5 = *s; s+=srcStride;\
01159 OP(*d, (src2+src3)*20 - (src1+src4)*5 + (src0+src5));d+=dstStride;\
01160 src6 = *s; s+=srcStride;\
01161 OP(*d, (src3+src4)*20 - (src2+src5)*5 + (src1+src6));d+=dstStride;\
01162 if (h>4) { \
01163 int src7,src8,src9,src10; \
01164 src7 = *s; s+=srcStride;\
01165 OP(*d, (src4+src5)*20 - (src3+src6)*5 + (src2+src7));d+=dstStride;\
01166 src8 = *s; s+=srcStride;\
01167 OP(*d, (src5+src6)*20 - (src4+src7)*5 + (src3+src8));d+=dstStride;\
01168 src9 = *s; s+=srcStride;\
01169 OP(*d, (src6+src7)*20 - (src5+src8)*5 + (src4+src9));d+=dstStride;\
01170 src10 = *s; s+=srcStride;\
01171 OP(*d, (src7+src8)*20 - (src6+src9)*5 + (src5+src10));d+=dstStride;\
01172 if (h>8) { \
01173 int src11,src12,src13,src14,src15,src16,src17,src18; \
01174 src11 = *s; s+=srcStride;\
01175 OP(*d , (src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));d+=dstStride;\
01176 src12 = *s; s+=srcStride;\
01177 OP(*d , (src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));d+=dstStride;\
01178 src13 = *s; s+=srcStride;\
01179 OP(*d, (src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));d+=dstStride;\
01180 src14 = *s; s+=srcStride;\
01181 OP(*d, (src11+src12)*20 - (src10+src13)*5 + (src9 +src14));d+=dstStride;\
01182 src15 = *s; s+=srcStride;\
01183 OP(*d, (src12+src13)*20 - (src11+src14)*5 + (src10+src15));d+=dstStride;\
01184 src16 = *s; s+=srcStride;\
01185 OP(*d, (src13+src14)*20 - (src12+src15)*5 + (src11+src16));d+=dstStride;\
01186 src17 = *s; s+=srcStride;\
01187 OP(*d, (src14+src15)*20 - (src13+src16)*5 + (src12+src17));d+=dstStride;\
01188 src18 = *s; s+=srcStride;\
01189 OP(*d, (src15+src16)*20 - (src14+src17)*5 + (src13+src18));d+=dstStride;\
01190 } \
01191 } \
01192 dst++;\
01193 src++;\
01194 }while(--w);\
01195 }\
01196 \
01197 static inline void OPNAME ## h264_qpel_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride,int w,int h){\
01198 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01199 int i;\
01200 src -= 2*srcStride;\
01201 i= h+5; \
01202 do {\
01203 int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\
01204 uint8_t *s = src-2;\
01205 srcB = *s++;\
01206 srcA = *s++;\
01207 src0 = *s++;\
01208 src1 = *s++;\
01209 src2 = *s++;\
01210 src3 = *s++;\
01211 tmp[0] = ((src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
01212 src4 = *s++;\
01213 tmp[1] = ((src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
01214 src5 = *s++;\
01215 tmp[2] = ((src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
01216 src6 = *s++;\
01217 tmp[3] = ((src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
01218 if (w>4) { \
01219 int src7,src8,src9,src10; \
01220 src7 = *s++;\
01221 tmp[4] = ((src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
01222 src8 = *s++;\
01223 tmp[5] = ((src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
01224 src9 = *s++;\
01225 tmp[6] = ((src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
01226 src10 = *s++;\
01227 tmp[7] = ((src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
01228 if (w>8) { \
01229 int src11,src12,src13,src14,src15,src16,src17,src18; \
01230 src11 = *s++;\
01231 tmp[8] = ((src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));\
01232 src12 = *s++;\
01233 tmp[9] = ((src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));\
01234 src13 = *s++;\
01235 tmp[10] = ((src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));\
01236 src14 = *s++;\
01237 tmp[11] = ((src11+src12)*20 - (src10+src13)*5 + (src9 +src14));\
01238 src15 = *s++;\
01239 tmp[12] = ((src12+src13)*20 - (src11+src14)*5 + (src10+src15));\
01240 src16 = *s++;\
01241 tmp[13] = ((src13+src14)*20 - (src12+src15)*5 + (src11+src16));\
01242 src17 = *s++;\
01243 tmp[14] = ((src14+src15)*20 - (src13+src16)*5 + (src12+src17));\
01244 src18 = *s++;\
01245 tmp[15] = ((src15+src16)*20 - (src14+src17)*5 + (src13+src18));\
01246 } \
01247 } \
01248 tmp+=tmpStride;\
01249 src+=srcStride;\
01250 }while(--i);\
01251 tmp -= tmpStride*(h+5-2);\
01252 i = w; \
01253 do {\
01254 int tmpB,tmpA,tmp0,tmp1,tmp2,tmp3,tmp4,tmp5,tmp6;\
01255 int16_t *s = tmp-2*tmpStride; \
01256 uint8_t *d=dst;\
01257 tmpB = *s; s+=tmpStride;\
01258 tmpA = *s; s+=tmpStride;\
01259 tmp0 = *s; s+=tmpStride;\
01260 tmp1 = *s; s+=tmpStride;\
01261 tmp2 = *s; s+=tmpStride;\
01262 tmp3 = *s; s+=tmpStride;\
01263 OP2(*d, (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));d+=dstStride;\
01264 tmp4 = *s; s+=tmpStride;\
01265 OP2(*d, (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));d+=dstStride;\
01266 tmp5 = *s; s+=tmpStride;\
01267 OP2(*d, (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));d+=dstStride;\
01268 tmp6 = *s; s+=tmpStride;\
01269 OP2(*d, (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));d+=dstStride;\
01270 if (h>4) { \
01271 int tmp7,tmp8,tmp9,tmp10; \
01272 tmp7 = *s; s+=tmpStride;\
01273 OP2(*d, (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));d+=dstStride;\
01274 tmp8 = *s; s+=tmpStride;\
01275 OP2(*d, (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));d+=dstStride;\
01276 tmp9 = *s; s+=tmpStride;\
01277 OP2(*d, (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));d+=dstStride;\
01278 tmp10 = *s; s+=tmpStride;\
01279 OP2(*d, (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));d+=dstStride;\
01280 if (h>8) { \
01281 int tmp11,tmp12,tmp13,tmp14,tmp15,tmp16,tmp17,tmp18; \
01282 tmp11 = *s; s+=tmpStride;\
01283 OP2(*d , (tmp8 +tmp9 )*20 - (tmp7 +tmp10)*5 + (tmp6 +tmp11));d+=dstStride;\
01284 tmp12 = *s; s+=tmpStride;\
01285 OP2(*d , (tmp9 +tmp10)*20 - (tmp8 +tmp11)*5 + (tmp7 +tmp12));d+=dstStride;\
01286 tmp13 = *s; s+=tmpStride;\
01287 OP2(*d, (tmp10+tmp11)*20 - (tmp9 +tmp12)*5 + (tmp8 +tmp13));d+=dstStride;\
01288 tmp14 = *s; s+=tmpStride;\
01289 OP2(*d, (tmp11+tmp12)*20 - (tmp10+tmp13)*5 + (tmp9 +tmp14));d+=dstStride;\
01290 tmp15 = *s; s+=tmpStride;\
01291 OP2(*d, (tmp12+tmp13)*20 - (tmp11+tmp14)*5 + (tmp10+tmp15));d+=dstStride;\
01292 tmp16 = *s; s+=tmpStride;\
01293 OP2(*d, (tmp13+tmp14)*20 - (tmp12+tmp15)*5 + (tmp11+tmp16));d+=dstStride;\
01294 tmp17 = *s; s+=tmpStride;\
01295 OP2(*d, (tmp14+tmp15)*20 - (tmp13+tmp16)*5 + (tmp12+tmp17));d+=dstStride;\
01296 tmp18 = *s; s+=tmpStride;\
01297 OP2(*d, (tmp15+tmp16)*20 - (tmp14+tmp17)*5 + (tmp13+tmp18));d+=dstStride;\
01298 } \
01299 } \
01300 dst++;\
01301 tmp++;\
01302 }while(--i);\
01303 }\
01304 \
01305 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01306 OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,4,4); \
01307 }\
01308 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01309 OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,8,8); \
01310 }\
01311 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01312 OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,16,16); \
01313 }\
01314 \
01315 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01316 OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,4,4); \
01317 }\
01318 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01319 OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,8,8); \
01320 }\
01321 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01322 OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,16,16); \
01323 }\
01324 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
01325 OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,4,4); \
01326 }\
01327 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
01328 OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,8,8); \
01329 }\
01330 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
01331 OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,16,16); \
01332 }\
01333
01334 #define H264_MC(OPNAME, SIZE) \
01335 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
01336 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
01337 }\
01338 \
01339 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01340 uint8_t half[SIZE*SIZE];\
01341 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
01342 OPNAME ## pixels ## SIZE ## _l2_aligned2(dst, src, half, stride, stride, SIZE, SIZE);\
01343 }\
01344 \
01345 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01346 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
01347 }\
01348 \
01349 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01350 uint8_t half[SIZE*SIZE];\
01351 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
01352 OPNAME ## pixels ## SIZE ## _l2_aligned2(dst, src+1, half, stride, stride, SIZE, SIZE);\
01353 }\
01354 \
01355 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01356 uint8_t full[SIZE*(SIZE+5)];\
01357 uint8_t * const full_mid= full + SIZE*2;\
01358 uint8_t half[SIZE*SIZE];\
01359 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
01360 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
01361 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
01362 }\
01363 \
01364 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01365 uint8_t full[SIZE*(SIZE+5)];\
01366 uint8_t * const full_mid= full + SIZE*2;\
01367 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
01368 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
01369 }\
01370 \
01371 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01372 uint8_t full[SIZE*(SIZE+5)];\
01373 uint8_t * const full_mid= full + SIZE*2;\
01374 uint8_t half[SIZE*SIZE];\
01375 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
01376 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
01377 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
01378 }\
01379 \
01380 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01381 uint8_t full[SIZE*(SIZE+5)];\
01382 uint8_t * const full_mid= full + SIZE*2;\
01383 uint8_t halfH[SIZE*SIZE];\
01384 uint8_t halfV[SIZE*SIZE];\
01385 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
01386 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
01387 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
01388 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
01389 }\
01390 \
01391 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01392 uint8_t full[SIZE*(SIZE+5)];\
01393 uint8_t * const full_mid= full + SIZE*2;\
01394 uint8_t halfH[SIZE*SIZE];\
01395 uint8_t halfV[SIZE*SIZE];\
01396 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
01397 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
01398 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
01399 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
01400 }\
01401 \
01402 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01403 uint8_t full[SIZE*(SIZE+5)];\
01404 uint8_t * const full_mid= full + SIZE*2;\
01405 uint8_t halfH[SIZE*SIZE];\
01406 uint8_t halfV[SIZE*SIZE];\
01407 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
01408 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
01409 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
01410 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
01411 }\
01412 \
01413 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01414 uint8_t full[SIZE*(SIZE+5)];\
01415 uint8_t * const full_mid= full + SIZE*2;\
01416 uint8_t halfH[SIZE*SIZE];\
01417 uint8_t halfV[SIZE*SIZE];\
01418 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
01419 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
01420 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
01421 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
01422 }\
01423 \
01424 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01425 int16_t tmp[SIZE*(SIZE+5)];\
01426 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
01427 }\
01428 \
01429 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01430 int16_t tmp[SIZE*(SIZE+5)];\
01431 uint8_t halfH[SIZE*SIZE];\
01432 uint8_t halfHV[SIZE*SIZE];\
01433 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
01434 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
01435 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
01436 }\
01437 \
01438 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01439 int16_t tmp[SIZE*(SIZE+5)];\
01440 uint8_t halfH[SIZE*SIZE];\
01441 uint8_t halfHV[SIZE*SIZE];\
01442 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
01443 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
01444 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
01445 }\
01446 \
01447 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01448 uint8_t full[SIZE*(SIZE+5)];\
01449 uint8_t * const full_mid= full + SIZE*2;\
01450 int16_t tmp[SIZE*(SIZE+5)];\
01451 uint8_t halfV[SIZE*SIZE];\
01452 uint8_t halfHV[SIZE*SIZE];\
01453 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
01454 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
01455 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
01456 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
01457 }\
01458 \
01459 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01460 uint8_t full[SIZE*(SIZE+5)];\
01461 uint8_t * const full_mid= full + SIZE*2;\
01462 int16_t tmp[SIZE*(SIZE+5)];\
01463 uint8_t halfV[SIZE*SIZE];\
01464 uint8_t halfHV[SIZE*SIZE];\
01465 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
01466 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
01467 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
01468 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
01469 }\
01470
01471 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
01472
01473 #define op_put(a, b) a = cm[((b) + 16)>>5]
01474 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
01475 #define op2_put(a, b) a = cm[((b) + 512)>>10]
01476
01477 H264_LOWPASS(put_ , op_put, op2_put)
01478 H264_LOWPASS(avg_ , op_avg, op2_avg)
01479 H264_MC(put_, 4)
01480 H264_MC(put_, 8)
01481 H264_MC(put_, 16)
01482 H264_MC(avg_, 4)
01483 H264_MC(avg_, 8)
01484 H264_MC(avg_, 16)
01485
01486 #undef op_avg
01487 #undef op_put
01488 #undef op2_avg
01489 #undef op2_put
01490 #endif
01491
01492 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
01493 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01494
01495 do{
01496 int src_1,src0,src1,src2,src3,src4,src5,src6,src7,src8,src9;
01497 uint8_t *s = src;
01498 src_1 = s[-1];
01499 src0 = *s++;
01500 src1 = *s++;
01501 src2 = *s++;
01502 dst[0]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
01503 src3 = *s++;
01504 dst[1]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
01505 src4 = *s++;
01506 dst[2]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
01507 src5 = *s++;
01508 dst[3]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
01509 src6 = *s++;
01510 dst[4]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
01511 src7 = *s++;
01512 dst[5]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
01513 src8 = *s++;
01514 dst[6]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
01515 src9 = *s++;
01516 dst[7]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
01517 dst+=dstStride;
01518 src+=srcStride;
01519 }while(--h);
01520 }
01521
01522 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
01523 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01524
01525 do{
01526 int src_1,src0,src1,src2,src3,src4,src5,src6,src7,src8,src9;
01527 uint8_t *s = src,*d = dst;
01528 src_1 = *(s-srcStride);
01529 src0 = *s; s+=srcStride;
01530 src1 = *s; s+=srcStride;
01531 src2 = *s; s+=srcStride;
01532 *d= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; d+=dstStride;
01533 src3 = *s; s+=srcStride;
01534 *d= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; d+=dstStride;
01535 src4 = *s; s+=srcStride;
01536 *d= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; d+=dstStride;
01537 src5 = *s; s+=srcStride;
01538 *d= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; d+=dstStride;
01539 src6 = *s; s+=srcStride;
01540 *d= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; d+=dstStride;
01541 src7 = *s; s+=srcStride;
01542 *d= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; d+=dstStride;
01543 src8 = *s; s+=srcStride;
01544 *d= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; d+=dstStride;
01545 src9 = *s;
01546 *d= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; d+=dstStride;
01547 src++;
01548 dst++;
01549 }while(--w);
01550 }
01551
01552 static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
01553 put_pixels8_c(dst, src, stride, 8);
01554 }
01555
01556 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
01557 uint8_t half[64];
01558 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01559 put_pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8);
01560 }
01561
01562 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
01563 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
01564 }
01565
01566 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
01567 uint8_t half[64];
01568 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01569 put_pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8);
01570 }
01571
01572 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
01573 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
01574 }
01575
01576 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
01577 uint8_t halfH[88];
01578 uint8_t halfV[64];
01579 uint8_t halfHV[64];
01580 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01581 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
01582 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01583 put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);
01584 }
01585 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
01586 uint8_t halfH[88];
01587 uint8_t halfV[64];
01588 uint8_t halfHV[64];
01589 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01590 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
01591 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01592 put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);
01593 }
01594 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
01595 uint8_t halfH[88];
01596 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01597 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
01598 }