Libav
|
00001 /* 00002 * This is optimized for sh, which have post increment addressing (*p++). 00003 * Some CPU may be index (p[n]) faster than post increment (*p++). 00004 * 00005 * copyright (c) 2001-2003 BERO <bero@geocities.co.jp> 00006 * 00007 * This file is part of FFmpeg. 00008 * 00009 * FFmpeg is free software; you can redistribute it and/or 00010 * modify it under the terms of the GNU Lesser General Public 00011 * License as published by the Free Software Foundation; either 00012 * version 2.1 of the License, or (at your option) any later version. 00013 * 00014 * FFmpeg is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00017 * Lesser General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU Lesser General Public 00020 * License along with FFmpeg; if not, write to the Free Software 00021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00022 */ 00023 00024 #define PIXOP2(OPNAME, OP) \ 00025 \ 00026 static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 00027 {\ 00028 do {\ 00029 OP(LP(dst ),rnd_avg32(LPC(src1 ),LPC(src2 )) ); \ 00030 src1+=src_stride1; \ 00031 src2+=src_stride2; \ 00032 dst+=dst_stride; \ 00033 } while(--h); \ 00034 }\ 00035 \ 00036 static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 00037 {\ 00038 do {\ 00039 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \ 00040 src1+=src_stride1; \ 00041 src2+=src_stride2; \ 00042 dst+=dst_stride; \ 00043 } while(--h); \ 00044 }\ 00045 \ 00046 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 00047 {\ 00048 do {\ 00049 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \ 00050 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \ 00051 OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),LPC(src2+8)) ); \ 00052 OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),LPC(src2+12)) ); \ 00053 src1+=src_stride1; \ 00054 src2+=src_stride2; \ 00055 dst+=dst_stride; \ 00056 } while(--h); \ 00057 }\ 00058 \ 00059 static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 00060 {\ 00061 do {\ 00062 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \ 00063 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \ 00064 OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),LPC(src2+8)) ); \ 00065 OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),LPC(src2+12)) ); \ 00066 src1+=src_stride1; \ 00067 src2+=src_stride2; \ 00068 dst+=dst_stride; \ 00069 } while(--h); \ 00070 }\ 00071 \ 00072 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 00073 {\ 00074 do { /* onlye src2 aligned */\ 00075 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \ 00076 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \ 00077 src1+=src_stride1; \ 00078 src2+=src_stride2; \ 00079 dst+=dst_stride; \ 00080 } while(--h); \ 00081 }\ 00082 \ 00083 static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 00084 {\ 00085 do {\ 00086 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \ 00087 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \ 00088 src1+=src_stride1; \ 00089 src2+=src_stride2; \ 00090 dst+=dst_stride; \ 00091 } while(--h); \ 00092 }\ 00093 \ 00094 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 00095 {\ 00096 do {\ 00097 OP(LP(dst ),no_rnd_avg32(LPC(src1 ),LPC(src2 )) ); \ 00098 OP(LP(dst+4),no_rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \ 00099 src1+=src_stride1; \ 00100 src2+=src_stride2; \ 00101 dst+=dst_stride; \ 00102 } while(--h); \ 00103 }\ 00104 \ 00105 static inline void OPNAME ## _pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 00106 {\ 00107 do {\ 00108 OP(LP(dst ),rnd_avg32(LPC(src1 ),LPC(src2 )) ); \ 00109 OP(LP(dst+4),rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \ 00110 src1+=src_stride1; \ 00111 src2+=src_stride2; \ 00112 dst+=dst_stride; \ 00113 } while(--h); \ 00114 }\ 00115 \ 00116 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 00117 {\ 00118 do {\ 00119 OP(LP(dst ),no_rnd_avg32(LPC(src1 ),LPC(src2 )) ); \ 00120 OP(LP(dst+4),no_rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \ 00121 OP(LP(dst+8),no_rnd_avg32(LPC(src1+8),LPC(src2+8)) ); \ 00122 OP(LP(dst+12),no_rnd_avg32(LPC(src1+12),LPC(src2+12)) ); \ 00123 src1+=src_stride1; \ 00124 src2+=src_stride2; \ 00125 dst+=dst_stride; \ 00126 } while(--h); \ 00127 }\ 00128 \ 00129 static inline void OPNAME ## _pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 00130 {\ 00131 do {\ 00132 OP(LP(dst ),rnd_avg32(LPC(src1 ),LPC(src2 )) ); \ 00133 OP(LP(dst+4),rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \ 00134 OP(LP(dst+8),rnd_avg32(LPC(src1+8),LPC(src2+8)) ); \ 00135 OP(LP(dst+12),rnd_avg32(LPC(src1+12),LPC(src2+12)) ); \ 00136 src1+=src_stride1; \ 00137 src2+=src_stride2; \ 00138 dst+=dst_stride; \ 00139 } while(--h); \ 00140 }\ 00141 \ 00142 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 00143 { OPNAME ## _no_rnd_pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \ 00144 \ 00145 static inline void OPNAME ## _pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 00146 { OPNAME ## _pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \ 00147 \ 00148 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 00149 { OPNAME ## _no_rnd_pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \ 00150 \ 00151 static inline void OPNAME ## _pixels8_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ 00152 { OPNAME ## _pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \ 00153 \ 00154 static inline void OPNAME ## _pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 00155 do { \ 00156 uint32_t a0,a1,a2,a3; \ 00157 UNPACK(a0,a1,LPC(src1),LPC(src2)); \ 00158 UNPACK(a2,a3,LPC(src3),LPC(src4)); \ 00159 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ 00160 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \ 00161 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ 00162 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ 00163 src1+=src_stride1;\ 00164 src2+=src_stride2;\ 00165 src3+=src_stride3;\ 00166 src4+=src_stride4;\ 00167 dst+=dst_stride;\ 00168 } while(--h); \ 00169 } \ 00170 \ 00171 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 00172 do { \ 00173 uint32_t a0,a1,a2,a3; \ 00174 UNPACK(a0,a1,LPC(src1),LPC(src2)); \ 00175 UNPACK(a2,a3,LPC(src3),LPC(src4)); \ 00176 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ 00177 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \ 00178 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ 00179 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ 00180 src1+=src_stride1;\ 00181 src2+=src_stride2;\ 00182 src3+=src_stride3;\ 00183 src4+=src_stride4;\ 00184 dst+=dst_stride;\ 00185 } while(--h); \ 00186 } \ 00187 \ 00188 static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 00189 do { \ 00190 uint32_t a0,a1,a2,a3; /* src1 only not aligned */\ 00191 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \ 00192 UNPACK(a2,a3,LPC(src3),LPC(src4)); \ 00193 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ 00194 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \ 00195 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ 00196 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \ 00197 src1+=src_stride1;\ 00198 src2+=src_stride2;\ 00199 src3+=src_stride3;\ 00200 src4+=src_stride4;\ 00201 dst+=dst_stride;\ 00202 } while(--h); \ 00203 } \ 00204 \ 00205 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 00206 do { \ 00207 uint32_t a0,a1,a2,a3; \ 00208 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \ 00209 UNPACK(a2,a3,LPC(src3),LPC(src4)); \ 00210 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ 00211 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \ 00212 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ 00213 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ 00214 src1+=src_stride1;\ 00215 src2+=src_stride2;\ 00216 src3+=src_stride3;\ 00217 src4+=src_stride4;\ 00218 dst+=dst_stride;\ 00219 } while(--h); \ 00220 } \ 00221 \ 00222 static inline void OPNAME ## _pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 00223 do { \ 00224 uint32_t a0,a1,a2,a3; \ 00225 UNPACK(a0,a1,LPC(src1),LPC(src2)); \ 00226 UNPACK(a2,a3,LPC(src3),LPC(src4)); \ 00227 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ 00228 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \ 00229 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ 00230 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ 00231 UNPACK(a0,a1,LPC(src1+8),LPC(src2+8)); \ 00232 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \ 00233 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ 00234 UNPACK(a0,a1,LPC(src1+12),LPC(src2+12)); \ 00235 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \ 00236 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ 00237 src1+=src_stride1;\ 00238 src2+=src_stride2;\ 00239 src3+=src_stride3;\ 00240 src4+=src_stride4;\ 00241 dst+=dst_stride;\ 00242 } while(--h); \ 00243 } \ 00244 \ 00245 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 00246 do { \ 00247 uint32_t a0,a1,a2,a3; \ 00248 UNPACK(a0,a1,LPC(src1),LPC(src2)); \ 00249 UNPACK(a2,a3,LPC(src3),LPC(src4)); \ 00250 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ 00251 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \ 00252 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ 00253 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ 00254 UNPACK(a0,a1,LPC(src1+8),LPC(src2+8)); \ 00255 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \ 00256 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ 00257 UNPACK(a0,a1,LPC(src1+12),LPC(src2+12)); \ 00258 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \ 00259 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ 00260 src1+=src_stride1;\ 00261 src2+=src_stride2;\ 00262 src3+=src_stride3;\ 00263 src4+=src_stride4;\ 00264 dst+=dst_stride;\ 00265 } while(--h); \ 00266 } \ 00267 \ 00268 static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 00269 do { /* src1 is unaligned */\ 00270 uint32_t a0,a1,a2,a3; \ 00271 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \ 00272 UNPACK(a2,a3,LPC(src3),LPC(src4)); \ 00273 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \ 00274 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \ 00275 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ 00276 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ 00277 UNPACK(a0,a1,AV_RN32(src1+8),LPC(src2+8)); \ 00278 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \ 00279 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \ 00280 UNPACK(a0,a1,AV_RN32(src1+12),LPC(src2+12)); \ 00281 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \ 00282 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \ 00283 src1+=src_stride1;\ 00284 src2+=src_stride2;\ 00285 src3+=src_stride3;\ 00286 src4+=src_stride4;\ 00287 dst+=dst_stride;\ 00288 } while(--h); \ 00289 } \ 00290 \ 00291 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 00292 do { \ 00293 uint32_t a0,a1,a2,a3; \ 00294 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \ 00295 UNPACK(a2,a3,LPC(src3),LPC(src4)); \ 00296 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \ 00297 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \ 00298 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \ 00299 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \ 00300 UNPACK(a0,a1,AV_RN32(src1+8),LPC(src2+8)); \ 00301 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \ 00302 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \ 00303 UNPACK(a0,a1,AV_RN32(src1+12),LPC(src2+12)); \ 00304 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \ 00305 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \ 00306 src1+=src_stride1;\ 00307 src2+=src_stride2;\ 00308 src3+=src_stride3;\ 00309 src4+=src_stride4;\ 00310 dst+=dst_stride;\ 00311 } while(--h); \ 00312 } \ 00313 \ 00314 00315 #define op_avg(a, b) a = rnd_avg32(a,b) 00316 #define op_put(a, b) a = b 00317 00318 PIXOP2(avg, op_avg) 00319 PIXOP2(put, op_put) 00320 #undef op_avg 00321 #undef op_put 00322 00323 #define avg2(a,b) ((a+b+1)>>1) 00324 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) 00325 00326 00327 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder) 00328 { 00329 const int A=(16-x16)*(16-y16); 00330 const int B=( x16)*(16-y16); 00331 const int C=(16-x16)*( y16); 00332 const int D=( x16)*( y16); 00333 00334 do { 00335 int t0,t1,t2,t3; 00336 uint8_t *s0 = src; 00337 uint8_t *s1 = src+stride; 00338 t0 = *s0++; t2 = *s1++; 00339 t1 = *s0++; t3 = *s1++; 00340 dst[0]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8; 00341 t0 = *s0++; t2 = *s1++; 00342 dst[1]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8; 00343 t1 = *s0++; t3 = *s1++; 00344 dst[2]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8; 00345 t0 = *s0++; t2 = *s1++; 00346 dst[3]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8; 00347 t1 = *s0++; t3 = *s1++; 00348 dst[4]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8; 00349 t0 = *s0++; t2 = *s1++; 00350 dst[5]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8; 00351 t1 = *s0++; t3 = *s1++; 00352 dst[6]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8; 00353 t0 = *s0++; t2 = *s1++; 00354 dst[7]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8; 00355 dst+= stride; 00356 src+= stride; 00357 }while(--h); 00358 } 00359 00360 static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, 00361 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height) 00362 { 00363 int y, vx, vy; 00364 const int s= 1<<shift; 00365 00366 width--; 00367 height--; 00368 00369 for(y=0; y<h; y++){ 00370 int x; 00371 00372 vx= ox; 00373 vy= oy; 00374 for(x=0; x<8; x++){ //XXX FIXME optimize 00375 int src_x, src_y, frac_x, frac_y, index; 00376 00377 src_x= vx>>16; 00378 src_y= vy>>16; 00379 frac_x= src_x&(s-1); 00380 frac_y= src_y&(s-1); 00381 src_x>>=shift; 00382 src_y>>=shift; 00383 00384 if((unsigned)src_x < width){ 00385 if((unsigned)src_y < height){ 00386 index= src_x + src_y*stride; 00387 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) 00388 + src[index +1]* frac_x )*(s-frac_y) 00389 + ( src[index+stride ]*(s-frac_x) 00390 + src[index+stride+1]* frac_x )* frac_y 00391 + r)>>(shift*2); 00392 }else{ 00393 index= src_x + av_clip(src_y, 0, height)*stride; 00394 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) 00395 + src[index +1]* frac_x )*s 00396 + r)>>(shift*2); 00397 } 00398 }else{ 00399 if((unsigned)src_y < height){ 00400 index= av_clip(src_x, 0, width) + src_y*stride; 00401 dst[y*stride + x]= ( ( src[index ]*(s-frac_y) 00402 + src[index+stride ]* frac_y )*s 00403 + r)>>(shift*2); 00404 }else{ 00405 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride; 00406 dst[y*stride + x]= src[index ]; 00407 } 00408 } 00409 00410 vx+= dxx; 00411 vy+= dyx; 00412 } 00413 ox += dxy; 00414 oy += dyy; 00415 } 00416 } 00417 #define H264_CHROMA_MC(OPNAME, OP)\ 00418 static void OPNAME ## h264_chroma_mc2_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ 00419 const int A=(8-x)*(8-y);\ 00420 const int B=( x)*(8-y);\ 00421 const int C=(8-x)*( y);\ 00422 const int D=( x)*( y);\ 00423 \ 00424 assert(x<8 && y<8 && x>=0 && y>=0);\ 00425 \ 00426 do {\ 00427 int t0,t1,t2,t3; \ 00428 uint8_t *s0 = src; \ 00429 uint8_t *s1 = src+stride; \ 00430 t0 = *s0++; t2 = *s1++; \ 00431 t1 = *s0++; t3 = *s1++; \ 00432 OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\ 00433 t0 = *s0++; t2 = *s1++; \ 00434 OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\ 00435 dst+= stride;\ 00436 src+= stride;\ 00437 }while(--h);\ 00438 }\ 00439 \ 00440 static void OPNAME ## h264_chroma_mc4_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ 00441 const int A=(8-x)*(8-y);\ 00442 const int B=( x)*(8-y);\ 00443 const int C=(8-x)*( y);\ 00444 const int D=( x)*( y);\ 00445 \ 00446 assert(x<8 && y<8 && x>=0 && y>=0);\ 00447 \ 00448 do {\ 00449 int t0,t1,t2,t3; \ 00450 uint8_t *s0 = src; \ 00451 uint8_t *s1 = src+stride; \ 00452 t0 = *s0++; t2 = *s1++; \ 00453 t1 = *s0++; t3 = *s1++; \ 00454 OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\ 00455 t0 = *s0++; t2 = *s1++; \ 00456 OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\ 00457 t1 = *s0++; t3 = *s1++; \ 00458 OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\ 00459 t0 = *s0++; t2 = *s1++; \ 00460 OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\ 00461 dst+= stride;\ 00462 src+= stride;\ 00463 }while(--h);\ 00464 }\ 00465 \ 00466 static void OPNAME ## h264_chroma_mc8_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ 00467 const int A=(8-x)*(8-y);\ 00468 const int B=( x)*(8-y);\ 00469 const int C=(8-x)*( y);\ 00470 const int D=( x)*( y);\ 00471 \ 00472 assert(x<8 && y<8 && x>=0 && y>=0);\ 00473 \ 00474 do {\ 00475 int t0,t1,t2,t3; \ 00476 uint8_t *s0 = src; \ 00477 uint8_t *s1 = src+stride; \ 00478 t0 = *s0++; t2 = *s1++; \ 00479 t1 = *s0++; t3 = *s1++; \ 00480 OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\ 00481 t0 = *s0++; t2 = *s1++; \ 00482 OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\ 00483 t1 = *s0++; t3 = *s1++; \ 00484 OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\ 00485 t0 = *s0++; t2 = *s1++; \ 00486 OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\ 00487 t1 = *s0++; t3 = *s1++; \ 00488 OP(dst[4], (A*t0 + B*t1 + C*t2 + D*t3));\ 00489 t0 = *s0++; t2 = *s1++; \ 00490 OP(dst[5], (A*t1 + B*t0 + C*t3 + D*t2));\ 00491 t1 = *s0++; t3 = *s1++; \ 00492 OP(dst[6], (A*t0 + B*t1 + C*t2 + D*t3));\ 00493 t0 = *s0++; t2 = *s1++; \ 00494 OP(dst[7], (A*t1 + B*t0 + C*t3 + D*t2));\ 00495 dst+= stride;\ 00496 src+= stride;\ 00497 }while(--h);\ 00498 } 00499 00500 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) 00501 #define op_put(a, b) a = (((b) + 32)>>6) 00502 00503 H264_CHROMA_MC(put_ , op_put) 00504 H264_CHROMA_MC(avg_ , op_avg) 00505 #undef op_avg 00506 #undef op_put 00507 00508 #define QPEL_MC(r, OPNAME, RND, OP) \ 00509 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ 00510 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 00511 do {\ 00512 uint8_t *s = src; \ 00513 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\ 00514 src0= *s++;\ 00515 src1= *s++;\ 00516 src2= *s++;\ 00517 src3= *s++;\ 00518 src4= *s++;\ 00519 OP(dst[0], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\ 00520 src5= *s++;\ 00521 OP(dst[1], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\ 00522 src6= *s++;\ 00523 OP(dst[2], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\ 00524 src7= *s++;\ 00525 OP(dst[3], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\ 00526 src8= *s++;\ 00527 OP(dst[4], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\ 00528 OP(dst[5], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\ 00529 OP(dst[6], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\ 00530 OP(dst[7], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\ 00531 dst+=dstStride;\ 00532 src+=srcStride;\ 00533 }while(--h);\ 00534 }\ 00535 \ 00536 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 00537 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 00538 int w=8;\ 00539 do{\ 00540 uint8_t *s = src, *d=dst;\ 00541 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\ 00542 src0 = *s; s+=srcStride; \ 00543 src1 = *s; s+=srcStride; \ 00544 src2 = *s; s+=srcStride; \ 00545 src3 = *s; s+=srcStride; \ 00546 src4 = *s; s+=srcStride; \ 00547 OP(*d, (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));d+=dstStride;\ 00548 src5 = *s; s+=srcStride; \ 00549 OP(*d, (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));d+=dstStride;\ 00550 src6 = *s; s+=srcStride; \ 00551 OP(*d, (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));d+=dstStride;\ 00552 src7 = *s; s+=srcStride; \ 00553 OP(*d, (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));d+=dstStride;\ 00554 src8 = *s; \ 00555 OP(*d, (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));d+=dstStride;\ 00556 OP(*d, (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));d+=dstStride;\ 00557 OP(*d, (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));d+=dstStride;\ 00558 OP(*d, (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\ 00559 dst++;\ 00560 src++;\ 00561 }while(--w);\ 00562 }\ 00563 \ 00564 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ 00565 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 00566 do {\ 00567 uint8_t *s = src;\ 00568 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\ 00569 int src9,src10,src11,src12,src13,src14,src15,src16;\ 00570 src0= *s++;\ 00571 src1= *s++;\ 00572 src2= *s++;\ 00573 src3= *s++;\ 00574 src4= *s++;\ 00575 OP(dst[ 0], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\ 00576 src5= *s++;\ 00577 OP(dst[ 1], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\ 00578 src6= *s++;\ 00579 OP(dst[ 2], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\ 00580 src7= *s++;\ 00581 OP(dst[ 3], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\ 00582 src8= *s++;\ 00583 OP(dst[ 4], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\ 00584 src9= *s++;\ 00585 OP(dst[ 5], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\ 00586 src10= *s++;\ 00587 OP(dst[ 6], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\ 00588 src11= *s++;\ 00589 OP(dst[ 7], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\ 00590 src12= *s++;\ 00591 OP(dst[ 8], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\ 00592 src13= *s++;\ 00593 OP(dst[ 9], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\ 00594 src14= *s++;\ 00595 OP(dst[10], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\ 00596 src15= *s++;\ 00597 OP(dst[11], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\ 00598 src16= *s++;\ 00599 OP(dst[12], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\ 00600 OP(dst[13], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\ 00601 OP(dst[14], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\ 00602 OP(dst[15], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\ 00603 dst+=dstStride;\ 00604 src+=srcStride;\ 00605 }while(--h);\ 00606 }\ 00607 \ 00608 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 00609 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 00610 int w=16;\ 00611 do {\ 00612 uint8_t *s = src, *d=dst;\ 00613 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\ 00614 int src9,src10,src11,src12,src13,src14,src15,src16;\ 00615 src0 = *s; s+=srcStride; \ 00616 src1 = *s; s+=srcStride; \ 00617 src2 = *s; s+=srcStride; \ 00618 src3 = *s; s+=srcStride; \ 00619 src4 = *s; s+=srcStride; \ 00620 OP(*d, (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));d+=dstStride;\ 00621 src5 = *s; s+=srcStride; \ 00622 OP(*d, (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));d+=dstStride;\ 00623 src6 = *s; s+=srcStride; \ 00624 OP(*d, (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));d+=dstStride;\ 00625 src7 = *s; s+=srcStride; \ 00626 OP(*d, (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));d+=dstStride;\ 00627 src8 = *s; s+=srcStride; \ 00628 OP(*d, (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));d+=dstStride;\ 00629 src9 = *s; s+=srcStride; \ 00630 OP(*d, (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));d+=dstStride;\ 00631 src10 = *s; s+=srcStride; \ 00632 OP(*d, (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));d+=dstStride;\ 00633 src11 = *s; s+=srcStride; \ 00634 OP(*d, (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));d+=dstStride;\ 00635 src12 = *s; s+=srcStride; \ 00636 OP(*d, (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));d+=dstStride;\ 00637 src13 = *s; s+=srcStride; \ 00638 OP(*d, (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));d+=dstStride;\ 00639 src14 = *s; s+=srcStride; \ 00640 OP(*d, (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));d+=dstStride;\ 00641 src15 = *s; s+=srcStride; \ 00642 OP(*d, (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));d+=dstStride;\ 00643 src16 = *s; \ 00644 OP(*d, (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));d+=dstStride;\ 00645 OP(*d, (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));d+=dstStride;\ 00646 OP(*d, (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));d+=dstStride;\ 00647 OP(*d, (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\ 00648 dst++;\ 00649 src++;\ 00650 }while(--w);\ 00651 }\ 00652 \ 00653 static void OPNAME ## qpel8_mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){\ 00654 OPNAME ## pixels8_c(dst, src, stride, 8);\ 00655 }\ 00656 \ 00657 static void OPNAME ## qpel8_mc10_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00658 uint8_t half[64];\ 00659 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ 00660 OPNAME ## pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8);\ 00661 }\ 00662 \ 00663 static void OPNAME ## qpel8_mc20_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00664 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\ 00665 }\ 00666 \ 00667 static void OPNAME ## qpel8_mc30_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00668 uint8_t half[64];\ 00669 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ 00670 OPNAME ## pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8);\ 00671 }\ 00672 \ 00673 static void OPNAME ## qpel8_mc01_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00674 uint8_t full[16*9];\ 00675 uint8_t half[64];\ 00676 copy_block9(full, src, 16, stride, 9);\ 00677 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ 00678 OPNAME ## pixels8_l2_aligned(dst, full, half, stride, 16, 8, 8);\ 00679 }\ 00680 \ 00681 static void OPNAME ## qpel8_mc02_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00682 uint8_t full[16*9];\ 00683 copy_block9(full, src, 16, stride, 9);\ 00684 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\ 00685 }\ 00686 \ 00687 static void OPNAME ## qpel8_mc03_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00688 uint8_t full[16*9];\ 00689 uint8_t half[64];\ 00690 copy_block9(full, src, 16, stride, 9);\ 00691 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ 00692 OPNAME ## pixels8_l2_aligned(dst, full+16, half, stride, 16, 8, 8);\ 00693 }\ 00694 static void OPNAME ## qpel8_mc11_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00695 uint8_t full[16*9];\ 00696 uint8_t halfH[72];\ 00697 uint8_t halfHV[64];\ 00698 copy_block9(full, src, 16, stride, 9);\ 00699 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 00700 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\ 00701 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 00702 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\ 00703 }\ 00704 static void OPNAME ## qpel8_mc31_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00705 uint8_t full[16*9];\ 00706 uint8_t halfH[72];\ 00707 uint8_t halfHV[64];\ 00708 copy_block9(full, src, 16, stride, 9);\ 00709 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 00710 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\ 00711 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 00712 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\ 00713 }\ 00714 static void OPNAME ## qpel8_mc13_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00715 uint8_t full[16*9];\ 00716 uint8_t halfH[72];\ 00717 uint8_t halfHV[64];\ 00718 copy_block9(full, src, 16, stride, 9);\ 00719 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 00720 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\ 00721 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 00722 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\ 00723 }\ 00724 static void OPNAME ## qpel8_mc33_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00725 uint8_t full[16*9];\ 00726 uint8_t halfH[72];\ 00727 uint8_t halfHV[64];\ 00728 copy_block9(full, src, 16, stride, 9);\ 00729 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 00730 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\ 00731 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 00732 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\ 00733 }\ 00734 static void OPNAME ## qpel8_mc21_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00735 uint8_t halfH[72];\ 00736 uint8_t halfHV[64];\ 00737 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ 00738 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 00739 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\ 00740 }\ 00741 static void OPNAME ## qpel8_mc23_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00742 uint8_t halfH[72];\ 00743 uint8_t halfHV[64];\ 00744 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ 00745 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 00746 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\ 00747 }\ 00748 static void OPNAME ## qpel8_mc12_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00749 uint8_t full[16*9];\ 00750 uint8_t halfH[72];\ 00751 copy_block9(full, src, 16, stride, 9);\ 00752 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 00753 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\ 00754 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ 00755 }\ 00756 static void OPNAME ## qpel8_mc32_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00757 uint8_t full[16*9];\ 00758 uint8_t halfH[72];\ 00759 copy_block9(full, src, 16, stride, 9);\ 00760 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 00761 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\ 00762 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ 00763 }\ 00764 static void OPNAME ## qpel8_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00765 uint8_t halfH[72];\ 00766 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ 00767 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ 00768 }\ 00769 static void OPNAME ## qpel16_mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){\ 00770 OPNAME ## pixels16_c(dst, src, stride, 16);\ 00771 }\ 00772 \ 00773 static void OPNAME ## qpel16_mc10_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00774 uint8_t half[256];\ 00775 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ 00776 OPNAME ## pixels16_l2_aligned2(dst, src, half, stride, stride, 16, 16);\ 00777 }\ 00778 \ 00779 static void OPNAME ## qpel16_mc20_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00780 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\ 00781 }\ 00782 \ 00783 static void OPNAME ## qpel16_mc30_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00784 uint8_t half[256];\ 00785 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ 00786 OPNAME ## pixels16_l2_aligned2(dst, src+1, half, stride, stride, 16, 16);\ 00787 }\ 00788 \ 00789 static void OPNAME ## qpel16_mc01_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00790 uint8_t full[24*17];\ 00791 uint8_t half[256];\ 00792 copy_block17(full, src, 24, stride, 17);\ 00793 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ 00794 OPNAME ## pixels16_l2_aligned(dst, full, half, stride, 24, 16, 16);\ 00795 }\ 00796 \ 00797 static void OPNAME ## qpel16_mc02_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00798 uint8_t full[24*17];\ 00799 copy_block17(full, src, 24, stride, 17);\ 00800 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\ 00801 }\ 00802 \ 00803 static void OPNAME ## qpel16_mc03_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00804 uint8_t full[24*17];\ 00805 uint8_t half[256];\ 00806 copy_block17(full, src, 24, stride, 17);\ 00807 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ 00808 OPNAME ## pixels16_l2_aligned(dst, full+24, half, stride, 24, 16, 16);\ 00809 }\ 00810 static void OPNAME ## qpel16_mc11_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00811 uint8_t full[24*17];\ 00812 uint8_t halfH[272];\ 00813 uint8_t halfHV[256];\ 00814 copy_block17(full, src, 24, stride, 17);\ 00815 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 00816 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\ 00817 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 00818 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\ 00819 }\ 00820 static void OPNAME ## qpel16_mc31_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00821 uint8_t full[24*17];\ 00822 uint8_t halfH[272];\ 00823 uint8_t halfHV[256];\ 00824 copy_block17(full, src, 24, stride, 17);\ 00825 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 00826 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\ 00827 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 00828 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\ 00829 }\ 00830 static void OPNAME ## qpel16_mc13_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00831 uint8_t full[24*17];\ 00832 uint8_t halfH[272];\ 00833 uint8_t halfHV[256];\ 00834 copy_block17(full, src, 24, stride, 17);\ 00835 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 00836 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\ 00837 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 00838 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\ 00839 }\ 00840 static void OPNAME ## qpel16_mc33_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00841 uint8_t full[24*17];\ 00842 uint8_t halfH[272];\ 00843 uint8_t halfHV[256];\ 00844 copy_block17(full, src, 24, stride, 17);\ 00845 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 00846 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\ 00847 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 00848 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\ 00849 }\ 00850 static void OPNAME ## qpel16_mc21_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00851 uint8_t halfH[272];\ 00852 uint8_t halfHV[256];\ 00853 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ 00854 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 00855 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\ 00856 }\ 00857 static void OPNAME ## qpel16_mc23_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00858 uint8_t halfH[272];\ 00859 uint8_t halfHV[256];\ 00860 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ 00861 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 00862 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\ 00863 }\ 00864 static void OPNAME ## qpel16_mc12_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00865 uint8_t full[24*17];\ 00866 uint8_t halfH[272];\ 00867 copy_block17(full, src, 24, stride, 17);\ 00868 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 00869 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\ 00870 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ 00871 }\ 00872 static void OPNAME ## qpel16_mc32_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00873 uint8_t full[24*17];\ 00874 uint8_t halfH[272];\ 00875 copy_block17(full, src, 24, stride, 17);\ 00876 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 00877 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\ 00878 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ 00879 }\ 00880 static void OPNAME ## qpel16_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){\ 00881 uint8_t halfH[272];\ 00882 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ 00883 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ 00884 } 00885 00886 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) 00887 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1) 00888 #define op_put(a, b) a = cm[((b) + 16)>>5] 00889 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5] 00890 00891 QPEL_MC(0, put_ , _ , op_put) 00892 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) 00893 QPEL_MC(0, avg_ , _ , op_avg) 00894 //QPEL_MC(1, avg_no_rnd , _ , op_avg) 00895 #undef op_avg 00896 #undef op_avg_no_rnd 00897 #undef op_put 00898 #undef op_put_no_rnd 00899 00900 #if 1 00901 #define H264_LOWPASS(OPNAME, OP, OP2) \ 00902 static inline void OPNAME ## h264_qpel_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,int w,int h){\ 00903 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 00904 do {\ 00905 int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\ 00906 uint8_t *s = src-2;\ 00907 srcB = *s++;\ 00908 srcA = *s++;\ 00909 src0 = *s++;\ 00910 src1 = *s++;\ 00911 src2 = *s++;\ 00912 src3 = *s++;\ 00913 OP(dst[0], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ 00914 src4 = *s++;\ 00915 OP(dst[1], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ 00916 src5 = *s++;\ 00917 OP(dst[2], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ 00918 src6 = *s++;\ 00919 OP(dst[3], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ 00920 if (w>4) { /* it optimized */ \ 00921 int src7,src8,src9,src10; \ 00922 src7 = *s++;\ 00923 OP(dst[4], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\ 00924 src8 = *s++;\ 00925 OP(dst[5], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\ 00926 src9 = *s++;\ 00927 OP(dst[6], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\ 00928 src10 = *s++;\ 00929 OP(dst[7], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\ 00930 if (w>8) { \ 00931 int src11,src12,src13,src14,src15,src16,src17,src18; \ 00932 src11 = *s++;\ 00933 OP(dst[8] , (src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));\ 00934 src12 = *s++;\ 00935 OP(dst[9] , (src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));\ 00936 src13 = *s++;\ 00937 OP(dst[10], (src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));\ 00938 src14 = *s++;\ 00939 OP(dst[11], (src11+src12)*20 - (src10+src13)*5 + (src9 +src14));\ 00940 src15 = *s++;\ 00941 OP(dst[12], (src12+src13)*20 - (src11+src14)*5 + (src10+src15));\ 00942 src16 = *s++;\ 00943 OP(dst[13], (src13+src14)*20 - (src12+src15)*5 + (src11+src16));\ 00944 src17 = *s++;\ 00945 OP(dst[14], (src14+src15)*20 - (src13+src16)*5 + (src12+src17));\ 00946 src18 = *s++;\ 00947 OP(dst[15], (src15+src16)*20 - (src14+src17)*5 + (src13+src18));\ 00948 } \ 00949 } \ 00950 dst+=dstStride;\ 00951 src+=srcStride;\ 00952 }while(--h);\ 00953 }\ 00954 \ 00955 static inline void OPNAME ## h264_qpel_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,int w,int h){\ 00956 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 00957 do{\ 00958 int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\ 00959 uint8_t *s = src-2*srcStride,*d=dst;\ 00960 srcB = *s; s+=srcStride;\ 00961 srcA = *s; s+=srcStride;\ 00962 src0 = *s; s+=srcStride;\ 00963 src1 = *s; s+=srcStride;\ 00964 src2 = *s; s+=srcStride;\ 00965 src3 = *s; s+=srcStride;\ 00966 OP(*d, (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));d+=dstStride;\ 00967 src4 = *s; s+=srcStride;\ 00968 OP(*d, (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));d+=dstStride;\ 00969 src5 = *s; s+=srcStride;\ 00970 OP(*d, (src2+src3)*20 - (src1+src4)*5 + (src0+src5));d+=dstStride;\ 00971 src6 = *s; s+=srcStride;\ 00972 OP(*d, (src3+src4)*20 - (src2+src5)*5 + (src1+src6));d+=dstStride;\ 00973 if (h>4) { \ 00974 int src7,src8,src9,src10; \ 00975 src7 = *s; s+=srcStride;\ 00976 OP(*d, (src4+src5)*20 - (src3+src6)*5 + (src2+src7));d+=dstStride;\ 00977 src8 = *s; s+=srcStride;\ 00978 OP(*d, (src5+src6)*20 - (src4+src7)*5 + (src3+src8));d+=dstStride;\ 00979 src9 = *s; s+=srcStride;\ 00980 OP(*d, (src6+src7)*20 - (src5+src8)*5 + (src4+src9));d+=dstStride;\ 00981 src10 = *s; s+=srcStride;\ 00982 OP(*d, (src7+src8)*20 - (src6+src9)*5 + (src5+src10));d+=dstStride;\ 00983 if (h>8) { \ 00984 int src11,src12,src13,src14,src15,src16,src17,src18; \ 00985 src11 = *s; s+=srcStride;\ 00986 OP(*d , (src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));d+=dstStride;\ 00987 src12 = *s; s+=srcStride;\ 00988 OP(*d , (src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));d+=dstStride;\ 00989 src13 = *s; s+=srcStride;\ 00990 OP(*d, (src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));d+=dstStride;\ 00991 src14 = *s; s+=srcStride;\ 00992 OP(*d, (src11+src12)*20 - (src10+src13)*5 + (src9 +src14));d+=dstStride;\ 00993 src15 = *s; s+=srcStride;\ 00994 OP(*d, (src12+src13)*20 - (src11+src14)*5 + (src10+src15));d+=dstStride;\ 00995 src16 = *s; s+=srcStride;\ 00996 OP(*d, (src13+src14)*20 - (src12+src15)*5 + (src11+src16));d+=dstStride;\ 00997 src17 = *s; s+=srcStride;\ 00998 OP(*d, (src14+src15)*20 - (src13+src16)*5 + (src12+src17));d+=dstStride;\ 00999 src18 = *s; s+=srcStride;\ 01000 OP(*d, (src15+src16)*20 - (src14+src17)*5 + (src13+src18));d+=dstStride;\ 01001 } \ 01002 } \ 01003 dst++;\ 01004 src++;\ 01005 }while(--w);\ 01006 }\ 01007 \ 01008 static inline void OPNAME ## h264_qpel_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride,int w,int h){\ 01009 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 01010 int i;\ 01011 src -= 2*srcStride;\ 01012 i= h+5; \ 01013 do {\ 01014 int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\ 01015 uint8_t *s = src-2;\ 01016 srcB = *s++;\ 01017 srcA = *s++;\ 01018 src0 = *s++;\ 01019 src1 = *s++;\ 01020 src2 = *s++;\ 01021 src3 = *s++;\ 01022 tmp[0] = ((src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ 01023 src4 = *s++;\ 01024 tmp[1] = ((src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ 01025 src5 = *s++;\ 01026 tmp[2] = ((src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ 01027 src6 = *s++;\ 01028 tmp[3] = ((src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ 01029 if (w>4) { /* it optimized */ \ 01030 int src7,src8,src9,src10; \ 01031 src7 = *s++;\ 01032 tmp[4] = ((src4+src5)*20 - (src3+src6)*5 + (src2+src7));\ 01033 src8 = *s++;\ 01034 tmp[5] = ((src5+src6)*20 - (src4+src7)*5 + (src3+src8));\ 01035 src9 = *s++;\ 01036 tmp[6] = ((src6+src7)*20 - (src5+src8)*5 + (src4+src9));\ 01037 src10 = *s++;\ 01038 tmp[7] = ((src7+src8)*20 - (src6+src9)*5 + (src5+src10));\ 01039 if (w>8) { \ 01040 int src11,src12,src13,src14,src15,src16,src17,src18; \ 01041 src11 = *s++;\ 01042 tmp[8] = ((src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));\ 01043 src12 = *s++;\ 01044 tmp[9] = ((src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));\ 01045 src13 = *s++;\ 01046 tmp[10] = ((src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));\ 01047 src14 = *s++;\ 01048 tmp[11] = ((src11+src12)*20 - (src10+src13)*5 + (src9 +src14));\ 01049 src15 = *s++;\ 01050 tmp[12] = ((src12+src13)*20 - (src11+src14)*5 + (src10+src15));\ 01051 src16 = *s++;\ 01052 tmp[13] = ((src13+src14)*20 - (src12+src15)*5 + (src11+src16));\ 01053 src17 = *s++;\ 01054 tmp[14] = ((src14+src15)*20 - (src13+src16)*5 + (src12+src17));\ 01055 src18 = *s++;\ 01056 tmp[15] = ((src15+src16)*20 - (src14+src17)*5 + (src13+src18));\ 01057 } \ 01058 } \ 01059 tmp+=tmpStride;\ 01060 src+=srcStride;\ 01061 }while(--i);\ 01062 tmp -= tmpStride*(h+5-2);\ 01063 i = w; \ 01064 do {\ 01065 int tmpB,tmpA,tmp0,tmp1,tmp2,tmp3,tmp4,tmp5,tmp6;\ 01066 int16_t *s = tmp-2*tmpStride; \ 01067 uint8_t *d=dst;\ 01068 tmpB = *s; s+=tmpStride;\ 01069 tmpA = *s; s+=tmpStride;\ 01070 tmp0 = *s; s+=tmpStride;\ 01071 tmp1 = *s; s+=tmpStride;\ 01072 tmp2 = *s; s+=tmpStride;\ 01073 tmp3 = *s; s+=tmpStride;\ 01074 OP2(*d, (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));d+=dstStride;\ 01075 tmp4 = *s; s+=tmpStride;\ 01076 OP2(*d, (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));d+=dstStride;\ 01077 tmp5 = *s; s+=tmpStride;\ 01078 OP2(*d, (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));d+=dstStride;\ 01079 tmp6 = *s; s+=tmpStride;\ 01080 OP2(*d, (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));d+=dstStride;\ 01081 if (h>4) { \ 01082 int tmp7,tmp8,tmp9,tmp10; \ 01083 tmp7 = *s; s+=tmpStride;\ 01084 OP2(*d, (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));d+=dstStride;\ 01085 tmp8 = *s; s+=tmpStride;\ 01086 OP2(*d, (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));d+=dstStride;\ 01087 tmp9 = *s; s+=tmpStride;\ 01088 OP2(*d, (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));d+=dstStride;\ 01089 tmp10 = *s; s+=tmpStride;\ 01090 OP2(*d, (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));d+=dstStride;\ 01091 if (h>8) { \ 01092 int tmp11,tmp12,tmp13,tmp14,tmp15,tmp16,tmp17,tmp18; \ 01093 tmp11 = *s; s+=tmpStride;\ 01094 OP2(*d , (tmp8 +tmp9 )*20 - (tmp7 +tmp10)*5 + (tmp6 +tmp11));d+=dstStride;\ 01095 tmp12 = *s; s+=tmpStride;\ 01096 OP2(*d , (tmp9 +tmp10)*20 - (tmp8 +tmp11)*5 + (tmp7 +tmp12));d+=dstStride;\ 01097 tmp13 = *s; s+=tmpStride;\ 01098 OP2(*d, (tmp10+tmp11)*20 - (tmp9 +tmp12)*5 + (tmp8 +tmp13));d+=dstStride;\ 01099 tmp14 = *s; s+=tmpStride;\ 01100 OP2(*d, (tmp11+tmp12)*20 - (tmp10+tmp13)*5 + (tmp9 +tmp14));d+=dstStride;\ 01101 tmp15 = *s; s+=tmpStride;\ 01102 OP2(*d, (tmp12+tmp13)*20 - (tmp11+tmp14)*5 + (tmp10+tmp15));d+=dstStride;\ 01103 tmp16 = *s; s+=tmpStride;\ 01104 OP2(*d, (tmp13+tmp14)*20 - (tmp12+tmp15)*5 + (tmp11+tmp16));d+=dstStride;\ 01105 tmp17 = *s; s+=tmpStride;\ 01106 OP2(*d, (tmp14+tmp15)*20 - (tmp13+tmp16)*5 + (tmp12+tmp17));d+=dstStride;\ 01107 tmp18 = *s; s+=tmpStride;\ 01108 OP2(*d, (tmp15+tmp16)*20 - (tmp14+tmp17)*5 + (tmp13+tmp18));d+=dstStride;\ 01109 } \ 01110 } \ 01111 dst++;\ 01112 tmp++;\ 01113 }while(--i);\ 01114 }\ 01115 \ 01116 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 01117 OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,4,4); \ 01118 }\ 01119 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 01120 OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,8,8); \ 01121 }\ 01122 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 01123 OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,16,16); \ 01124 }\ 01125 \ 01126 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 01127 OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,4,4); \ 01128 }\ 01129 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 01130 OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,8,8); \ 01131 }\ 01132 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 01133 OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,16,16); \ 01134 }\ 01135 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 01136 OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,4,4); \ 01137 }\ 01138 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 01139 OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,8,8); \ 01140 }\ 01141 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 01142 OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,16,16); \ 01143 }\ 01144 01145 #define H264_MC(OPNAME, SIZE) \ 01146 static void OPNAME ## h264_qpel ## SIZE ## _mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){\ 01147 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\ 01148 }\ 01149 \ 01150 static void OPNAME ## h264_qpel ## SIZE ## _mc10_sh4(uint8_t *dst, uint8_t *src, int stride){\ 01151 uint8_t half[SIZE*SIZE];\ 01152 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ 01153 OPNAME ## pixels ## SIZE ## _l2_aligned2(dst, src, half, stride, stride, SIZE, SIZE);\ 01154 }\ 01155 \ 01156 static void OPNAME ## h264_qpel ## SIZE ## _mc20_sh4(uint8_t *dst, uint8_t *src, int stride){\ 01157 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\ 01158 }\ 01159 \ 01160 static void OPNAME ## h264_qpel ## SIZE ## _mc30_sh4(uint8_t *dst, uint8_t *src, int stride){\ 01161 uint8_t half[SIZE*SIZE];\ 01162 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ 01163 OPNAME ## pixels ## SIZE ## _l2_aligned2(dst, src+1, half, stride, stride, SIZE, SIZE);\ 01164 }\ 01165 \ 01166 static void OPNAME ## h264_qpel ## SIZE ## _mc01_sh4(uint8_t *dst, uint8_t *src, int stride){\ 01167 uint8_t full[SIZE*(SIZE+5)];\ 01168 uint8_t * const full_mid= full + SIZE*2;\ 01169 uint8_t half[SIZE*SIZE];\ 01170 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ 01171 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ 01172 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\ 01173 }\ 01174 \ 01175 static void OPNAME ## h264_qpel ## SIZE ## _mc02_sh4(uint8_t *dst, uint8_t *src, int stride){\ 01176 uint8_t full[SIZE*(SIZE+5)];\ 01177 uint8_t * const full_mid= full + SIZE*2;\ 01178 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ 01179 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\ 01180 }\ 01181 \ 01182 static void OPNAME ## h264_qpel ## SIZE ## _mc03_sh4(uint8_t *dst, uint8_t *src, int stride){\ 01183 uint8_t full[SIZE*(SIZE+5)];\ 01184 uint8_t * const full_mid= full + SIZE*2;\ 01185 uint8_t half[SIZE*SIZE];\ 01186 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ 01187 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ 01188 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\ 01189 }\ 01190 \ 01191 static void OPNAME ## h264_qpel ## SIZE ## _mc11_sh4(uint8_t *dst, uint8_t *src, int stride){\ 01192 uint8_t full[SIZE*(SIZE+5)];\ 01193 uint8_t * const full_mid= full + SIZE*2;\ 01194 uint8_t halfH[SIZE*SIZE];\ 01195 uint8_t halfV[SIZE*SIZE];\ 01196 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ 01197 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ 01198 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ 01199 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ 01200 }\ 01201 \ 01202 static void OPNAME ## h264_qpel ## SIZE ## _mc31_sh4(uint8_t *dst, uint8_t *src, int stride){\ 01203 uint8_t full[SIZE*(SIZE+5)];\ 01204 uint8_t * const full_mid= full + SIZE*2;\ 01205 uint8_t halfH[SIZE*SIZE];\ 01206 uint8_t halfV[SIZE*SIZE];\ 01207 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ 01208 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ 01209 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ 01210 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ 01211 }\ 01212 \ 01213 static void OPNAME ## h264_qpel ## SIZE ## _mc13_sh4(uint8_t *dst, uint8_t *src, int stride){\ 01214 uint8_t full[SIZE*(SIZE+5)];\ 01215 uint8_t * const full_mid= full + SIZE*2;\ 01216 uint8_t halfH[SIZE*SIZE];\ 01217 uint8_t halfV[SIZE*SIZE];\ 01218 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ 01219 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ 01220 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ 01221 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ 01222 }\ 01223 \ 01224 static void OPNAME ## h264_qpel ## SIZE ## _mc33_sh4(uint8_t *dst, uint8_t *src, int stride){\ 01225 uint8_t full[SIZE*(SIZE+5)];\ 01226 uint8_t * const full_mid= full + SIZE*2;\ 01227 uint8_t halfH[SIZE*SIZE];\ 01228 uint8_t halfV[SIZE*SIZE];\ 01229 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ 01230 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ 01231 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ 01232 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ 01233 }\ 01234 \ 01235 static void OPNAME ## h264_qpel ## SIZE ## _mc22_sh4(uint8_t *dst, uint8_t *src, int stride){\ 01236 int16_t tmp[SIZE*(SIZE+5)];\ 01237 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\ 01238 }\ 01239 \ 01240 static void OPNAME ## h264_qpel ## SIZE ## _mc21_sh4(uint8_t *dst, uint8_t *src, int stride){\ 01241 int16_t tmp[SIZE*(SIZE+5)];\ 01242 uint8_t halfH[SIZE*SIZE];\ 01243 uint8_t halfHV[SIZE*SIZE];\ 01244 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ 01245 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ 01246 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ 01247 }\ 01248 \ 01249 static void OPNAME ## h264_qpel ## SIZE ## _mc23_sh4(uint8_t *dst, uint8_t *src, int stride){\ 01250 int16_t tmp[SIZE*(SIZE+5)];\ 01251 uint8_t halfH[SIZE*SIZE];\ 01252 uint8_t halfHV[SIZE*SIZE];\ 01253 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ 01254 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ 01255 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ 01256 }\ 01257 \ 01258 static void OPNAME ## h264_qpel ## SIZE ## _mc12_sh4(uint8_t *dst, uint8_t *src, int stride){\ 01259 uint8_t full[SIZE*(SIZE+5)];\ 01260 uint8_t * const full_mid= full + SIZE*2;\ 01261 int16_t tmp[SIZE*(SIZE+5)];\ 01262 uint8_t halfV[SIZE*SIZE];\ 01263 uint8_t halfHV[SIZE*SIZE];\ 01264 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ 01265 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ 01266 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ 01267 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ 01268 }\ 01269 \ 01270 static void OPNAME ## h264_qpel ## SIZE ## _mc32_sh4(uint8_t *dst, uint8_t *src, int stride){\ 01271 uint8_t full[SIZE*(SIZE+5)];\ 01272 uint8_t * const full_mid= full + SIZE*2;\ 01273 int16_t tmp[SIZE*(SIZE+5)];\ 01274 uint8_t halfV[SIZE*SIZE];\ 01275 uint8_t halfHV[SIZE*SIZE];\ 01276 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ 01277 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ 01278 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ 01279 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ 01280 }\ 01281 01282 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) 01283 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7) 01284 #define op_put(a, b) a = cm[((b) + 16)>>5] 01285 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1) 01286 #define op2_put(a, b) a = cm[((b) + 512)>>10] 01287 01288 H264_LOWPASS(put_ , op_put, op2_put) 01289 H264_LOWPASS(avg_ , op_avg, op2_avg) 01290 H264_MC(put_, 4) 01291 H264_MC(put_, 8) 01292 H264_MC(put_, 16) 01293 H264_MC(avg_, 4) 01294 H264_MC(avg_, 8) 01295 H264_MC(avg_, 16) 01296 01297 #undef op_avg 01298 #undef op_put 01299 #undef op2_avg 01300 #undef op2_put 01301 #endif 01302 01303 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ 01304 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 01305 01306 do{ 01307 int src_1,src0,src1,src2,src3,src4,src5,src6,src7,src8,src9; 01308 uint8_t *s = src; 01309 src_1 = s[-1]; 01310 src0 = *s++; 01311 src1 = *s++; 01312 src2 = *s++; 01313 dst[0]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; 01314 src3 = *s++; 01315 dst[1]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; 01316 src4 = *s++; 01317 dst[2]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; 01318 src5 = *s++; 01319 dst[3]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; 01320 src6 = *s++; 01321 dst[4]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; 01322 src7 = *s++; 01323 dst[5]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; 01324 src8 = *s++; 01325 dst[6]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; 01326 src9 = *s++; 01327 dst[7]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; 01328 dst+=dstStride; 01329 src+=srcStride; 01330 }while(--h); 01331 } 01332 01333 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ 01334 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 01335 01336 do{ 01337 int src_1,src0,src1,src2,src3,src4,src5,src6,src7,src8,src9; 01338 uint8_t *s = src,*d = dst; 01339 src_1 = *(s-srcStride); 01340 src0 = *s; s+=srcStride; 01341 src1 = *s; s+=srcStride; 01342 src2 = *s; s+=srcStride; 01343 *d= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; d+=dstStride; 01344 src3 = *s; s+=srcStride; 01345 *d= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; d+=dstStride; 01346 src4 = *s; s+=srcStride; 01347 *d= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; d+=dstStride; 01348 src5 = *s; s+=srcStride; 01349 *d= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; d+=dstStride; 01350 src6 = *s; s+=srcStride; 01351 *d= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; d+=dstStride; 01352 src7 = *s; s+=srcStride; 01353 *d= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; d+=dstStride; 01354 src8 = *s; s+=srcStride; 01355 *d= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; d+=dstStride; 01356 src9 = *s; 01357 *d= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; d+=dstStride; 01358 src++; 01359 dst++; 01360 }while(--w); 01361 } 01362 01363 static void put_mspel8_mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){ 01364 put_pixels8_c(dst, src, stride, 8); 01365 } 01366 01367 static void put_mspel8_mc10_sh4(uint8_t *dst, uint8_t *src, int stride){ 01368 uint8_t half[64]; 01369 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); 01370 put_pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8); 01371 } 01372 01373 static void put_mspel8_mc20_sh4(uint8_t *dst, uint8_t *src, int stride){ 01374 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8); 01375 } 01376 01377 static void put_mspel8_mc30_sh4(uint8_t *dst, uint8_t *src, int stride){ 01378 uint8_t half[64]; 01379 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); 01380 put_pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8); 01381 } 01382 01383 static void put_mspel8_mc02_sh4(uint8_t *dst, uint8_t *src, int stride){ 01384 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8); 01385 } 01386 01387 static void put_mspel8_mc12_sh4(uint8_t *dst, uint8_t *src, int stride){ 01388 uint8_t halfH[88]; 01389 uint8_t halfV[64]; 01390 uint8_t halfHV[64]; 01391 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); 01392 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8); 01393 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); 01394 put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8); 01395 } 01396 static void put_mspel8_mc32_sh4(uint8_t *dst, uint8_t *src, int stride){ 01397 uint8_t halfH[88]; 01398 uint8_t halfV[64]; 01399 uint8_t halfHV[64]; 01400 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); 01401 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); 01402 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); 01403 put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8); 01404 } 01405 static void put_mspel8_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){ 01406 uint8_t halfH[88]; 01407 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); 01408 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); 01409 }