Libav
vp8dsp.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2010 David Conrad
3  * Copyright (C) 2010 Ronald S. Bultje
4  * Copyright (C) 2014 Peter Ross
5  *
6  * This file is part of Libav.
7  *
8  * Libav is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * Libav is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with Libav; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
28 #include "libavutil/common.h"
29 
30 #include "mathops.h"
31 #include "vp8dsp.h"
32 
33 #define MK_IDCT_DC_ADD4_C(name) \
34 static void name ## _idct_dc_add4uv_c(uint8_t *dst, int16_t block[4][16], \
35  ptrdiff_t stride) \
36 { \
37  name ## _idct_dc_add_c(dst + stride * 0 + 0, block[0], stride); \
38  name ## _idct_dc_add_c(dst + stride * 0 + 4, block[1], stride); \
39  name ## _idct_dc_add_c(dst + stride * 4 + 0, block[2], stride); \
40  name ## _idct_dc_add_c(dst + stride * 4 + 4, block[3], stride); \
41 } \
42  \
43 static void name ## _idct_dc_add4y_c(uint8_t *dst, int16_t block[4][16], \
44  ptrdiff_t stride) \
45 { \
46  name ## _idct_dc_add_c(dst + 0, block[0], stride); \
47  name ## _idct_dc_add_c(dst + 4, block[1], stride); \
48  name ## _idct_dc_add_c(dst + 8, block[2], stride); \
49  name ## _idct_dc_add_c(dst + 12, block[3], stride); \
50 }
51 
52 #if CONFIG_VP7_DECODER
53 static void vp7_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16])
54 {
55  int i, a1, b1, c1, d1;
56  int16_t tmp[16];
57 
58  for (i = 0; i < 4; i++) {
59  a1 = (dc[i * 4 + 0] + dc[i * 4 + 2]) * 23170;
60  b1 = (dc[i * 4 + 0] - dc[i * 4 + 2]) * 23170;
61  c1 = dc[i * 4 + 1] * 12540 - dc[i * 4 + 3] * 30274;
62  d1 = dc[i * 4 + 1] * 30274 + dc[i * 4 + 3] * 12540;
63  tmp[i * 4 + 0] = (a1 + d1) >> 14;
64  tmp[i * 4 + 3] = (a1 - d1) >> 14;
65  tmp[i * 4 + 1] = (b1 + c1) >> 14;
66  tmp[i * 4 + 2] = (b1 - c1) >> 14;
67  }
68 
69  for (i = 0; i < 4; i++) {
70  a1 = (tmp[i + 0] + tmp[i + 8]) * 23170;
71  b1 = (tmp[i + 0] - tmp[i + 8]) * 23170;
72  c1 = tmp[i + 4] * 12540 - tmp[i + 12] * 30274;
73  d1 = tmp[i + 4] * 30274 + tmp[i + 12] * 12540;
74  dc[i * 4 + 0] = 0;
75  dc[i * 4 + 1] = 0;
76  dc[i * 4 + 2] = 0;
77  dc[i * 4 + 3] = 0;
78  block[0][i][0] = (a1 + d1 + 0x20000) >> 18;
79  block[3][i][0] = (a1 - d1 + 0x20000) >> 18;
80  block[1][i][0] = (b1 + c1 + 0x20000) >> 18;
81  block[2][i][0] = (b1 - c1 + 0x20000) >> 18;
82  }
83 }
84 
85 static void vp7_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16])
86 {
87  int i, val = (23170 * (23170 * dc[0] >> 14) + 0x20000) >> 18;
88  dc[0] = 0;
89 
90  for (i = 0; i < 4; i++) {
91  block[i][0][0] = val;
92  block[i][1][0] = val;
93  block[i][2][0] = val;
94  block[i][3][0] = val;
95  }
96 }
97 
98 static void vp7_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
99 {
100  int i, a1, b1, c1, d1;
101  int16_t tmp[16];
102 
103  for (i = 0; i < 4; i++) {
104  a1 = (block[i * 4 + 0] + block[i * 4 + 2]) * 23170;
105  b1 = (block[i * 4 + 0] - block[i * 4 + 2]) * 23170;
106  c1 = block[i * 4 + 1] * 12540 - block[i * 4 + 3] * 30274;
107  d1 = block[i * 4 + 1] * 30274 + block[i * 4 + 3] * 12540;
108  block[i * 4 + 0] = 0;
109  block[i * 4 + 1] = 0;
110  block[i * 4 + 2] = 0;
111  block[i * 4 + 3] = 0;
112  tmp[i * 4 + 0] = (a1 + d1) >> 14;
113  tmp[i * 4 + 3] = (a1 - d1) >> 14;
114  tmp[i * 4 + 1] = (b1 + c1) >> 14;
115  tmp[i * 4 + 2] = (b1 - c1) >> 14;
116  }
117 
118  for (i = 0; i < 4; i++) {
119  a1 = (tmp[i + 0] + tmp[i + 8]) * 23170;
120  b1 = (tmp[i + 0] - tmp[i + 8]) * 23170;
121  c1 = tmp[i + 4] * 12540 - tmp[i + 12] * 30274;
122  d1 = tmp[i + 4] * 30274 + tmp[i + 12] * 12540;
123  dst[0 * stride + i] = av_clip_uint8(dst[0 * stride + i] +
124  ((a1 + d1 + 0x20000) >> 18));
125  dst[3 * stride + i] = av_clip_uint8(dst[3 * stride + i] +
126  ((a1 - d1 + 0x20000) >> 18));
127  dst[1 * stride + i] = av_clip_uint8(dst[1 * stride + i] +
128  ((b1 + c1 + 0x20000) >> 18));
129  dst[2 * stride + i] = av_clip_uint8(dst[2 * stride + i] +
130  ((b1 - c1 + 0x20000) >> 18));
131  }
132 }
133 
134 static void vp7_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
135 {
136  int i, dc = (23170 * (23170 * block[0] >> 14) + 0x20000) >> 18;
137  block[0] = 0;
138 
139  for (i = 0; i < 4; i++) {
140  dst[0] = av_clip_uint8(dst[0] + dc);
141  dst[1] = av_clip_uint8(dst[1] + dc);
142  dst[2] = av_clip_uint8(dst[2] + dc);
143  dst[3] = av_clip_uint8(dst[3] + dc);
144  dst += stride;
145  }
146 }
147 
149 #endif /* CONFIG_VP7_DECODER */
150 
151 // TODO: Maybe add dequant
152 #if CONFIG_VP8_DECODER
153 static void vp8_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16])
154 {
155  int i, t0, t1, t2, t3;
156 
157  for (i = 0; i < 4; i++) {
158  t0 = dc[0 * 4 + i] + dc[3 * 4 + i];
159  t1 = dc[1 * 4 + i] + dc[2 * 4 + i];
160  t2 = dc[1 * 4 + i] - dc[2 * 4 + i];
161  t3 = dc[0 * 4 + i] - dc[3 * 4 + i];
162 
163  dc[0 * 4 + i] = t0 + t1;
164  dc[1 * 4 + i] = t3 + t2;
165  dc[2 * 4 + i] = t0 - t1;
166  dc[3 * 4 + i] = t3 - t2;
167  }
168 
169  for (i = 0; i < 4; i++) {
170  t0 = dc[i * 4 + 0] + dc[i * 4 + 3] + 3; // rounding
171  t1 = dc[i * 4 + 1] + dc[i * 4 + 2];
172  t2 = dc[i * 4 + 1] - dc[i * 4 + 2];
173  t3 = dc[i * 4 + 0] - dc[i * 4 + 3] + 3; // rounding
174  dc[i * 4 + 0] = 0;
175  dc[i * 4 + 1] = 0;
176  dc[i * 4 + 2] = 0;
177  dc[i * 4 + 3] = 0;
178 
179  block[i][0][0] = (t0 + t1) >> 3;
180  block[i][1][0] = (t3 + t2) >> 3;
181  block[i][2][0] = (t0 - t1) >> 3;
182  block[i][3][0] = (t3 - t2) >> 3;
183  }
184 }
185 
186 static void vp8_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16])
187 {
188  int i, val = (dc[0] + 3) >> 3;
189  dc[0] = 0;
190 
191  for (i = 0; i < 4; i++) {
192  block[i][0][0] = val;
193  block[i][1][0] = val;
194  block[i][2][0] = val;
195  block[i][3][0] = val;
196  }
197 }
198 
199 #define MUL_20091(a) ((((a) * 20091) >> 16) + (a))
200 #define MUL_35468(a) (((a) * 35468) >> 16)
201 
202 static void vp8_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
203 {
204  int i, t0, t1, t2, t3;
205  int16_t tmp[16];
206 
207  for (i = 0; i < 4; i++) {
208  t0 = block[0 * 4 + i] + block[2 * 4 + i];
209  t1 = block[0 * 4 + i] - block[2 * 4 + i];
210  t2 = MUL_35468(block[1 * 4 + i]) - MUL_20091(block[3 * 4 + i]);
211  t3 = MUL_20091(block[1 * 4 + i]) + MUL_35468(block[3 * 4 + i]);
212  block[0 * 4 + i] = 0;
213  block[1 * 4 + i] = 0;
214  block[2 * 4 + i] = 0;
215  block[3 * 4 + i] = 0;
216 
217  tmp[i * 4 + 0] = t0 + t3;
218  tmp[i * 4 + 1] = t1 + t2;
219  tmp[i * 4 + 2] = t1 - t2;
220  tmp[i * 4 + 3] = t0 - t3;
221  }
222 
223  for (i = 0; i < 4; i++) {
224  t0 = tmp[0 * 4 + i] + tmp[2 * 4 + i];
225  t1 = tmp[0 * 4 + i] - tmp[2 * 4 + i];
226  t2 = MUL_35468(tmp[1 * 4 + i]) - MUL_20091(tmp[3 * 4 + i]);
227  t3 = MUL_20091(tmp[1 * 4 + i]) + MUL_35468(tmp[3 * 4 + i]);
228 
229  dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3));
230  dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3));
231  dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3));
232  dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3));
233  dst += stride;
234  }
235 }
236 
237 static void vp8_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
238 {
239  int i, dc = (block[0] + 4) >> 3;
240  block[0] = 0;
241 
242  for (i = 0; i < 4; i++) {
243  dst[0] = av_clip_uint8(dst[0] + dc);
244  dst[1] = av_clip_uint8(dst[1] + dc);
245  dst[2] = av_clip_uint8(dst[2] + dc);
246  dst[3] = av_clip_uint8(dst[3] + dc);
247  dst += stride;
248  }
249 }
250 
252 #endif /* CONFIG_VP8_DECODER */
253 
254 // because I like only having two parameters to pass functions...
255 #define LOAD_PIXELS \
256  int av_unused p3 = p[-4 * stride]; \
257  int av_unused p2 = p[-3 * stride]; \
258  int av_unused p1 = p[-2 * stride]; \
259  int av_unused p0 = p[-1 * stride]; \
260  int av_unused q0 = p[ 0 * stride]; \
261  int av_unused q1 = p[ 1 * stride]; \
262  int av_unused q2 = p[ 2 * stride]; \
263  int av_unused q3 = p[ 3 * stride];
264 
265 #define clip_int8(n) (cm[n + 0x80] - 0x80)
266 
267 static av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride,
268  int is4tap, int is_vp7)
269 {
271  int a, f1, f2;
272  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
273 
274  a = 3 * (q0 - p0);
275 
276  if (is4tap)
277  a += clip_int8(p1 - q1);
278 
279  a = clip_int8(a);
280 
281  // We deviate from the spec here with c(a+3) >> 3
282  // since that's what libvpx does.
283  f1 = FFMIN(a + 4, 127) >> 3;
284 
285  if (is_vp7)
286  f2 = f1 - ((a & 7) == 4);
287  else
288  f2 = FFMIN(a + 3, 127) >> 3;
289 
290  // Despite what the spec says, we do need to clamp here to
291  // be bitexact with libvpx.
292  p[-1 * stride] = cm[p0 + f2];
293  p[ 0 * stride] = cm[q0 - f1];
294 
295  // only used for _inner on blocks without high edge variance
296  if (!is4tap) {
297  a = (f1 + 1) >> 1;
298  p[-2 * stride] = cm[p1 + a];
299  p[ 1 * stride] = cm[q1 - a];
300  }
301 }
302 
303 static av_always_inline void vp7_filter_common(uint8_t *p, ptrdiff_t stride,
304  int is4tap)
305 {
306  filter_common(p, stride, is4tap, IS_VP7);
307 }
308 
309 static av_always_inline void vp8_filter_common(uint8_t *p, ptrdiff_t stride,
310  int is4tap)
311 {
312  filter_common(p, stride, is4tap, IS_VP8);
313 }
314 
315 static av_always_inline int vp7_simple_limit(uint8_t *p, ptrdiff_t stride,
316  int flim)
317 {
319  return FFABS(p0 - q0) <= flim;
320 }
321 
322 static av_always_inline int vp8_simple_limit(uint8_t *p, ptrdiff_t stride,
323  int flim)
324 {
326  return 2 * FFABS(p0 - q0) + (FFABS(p1 - q1) >> 1) <= flim;
327 }
328 
333 #define NORMAL_LIMIT(vpn) \
334 static av_always_inline int vp ## vpn ## _normal_limit(uint8_t *p, \
335  ptrdiff_t stride, \
336  int E, int I) \
337 { \
338  LOAD_PIXELS \
339  return vp ## vpn ## _simple_limit(p, stride, E) && \
340  FFABS(p3 - p2) <= I && FFABS(p2 - p1) <= I && \
341  FFABS(p1 - p0) <= I && FFABS(q3 - q2) <= I && \
342  FFABS(q2 - q1) <= I && FFABS(q1 - q0) <= I; \
343 }
344 
345 NORMAL_LIMIT(7)
346 NORMAL_LIMIT(8)
347 
348 // high edge variance
349 static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh)
350 {
352  return FFABS(p1 - p0) > thresh || FFABS(q1 - q0) > thresh;
353 }
354 
355 static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride)
356 {
357  int a0, a1, a2, w;
358  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
359 
361 
362  w = clip_int8(p1 - q1);
363  w = clip_int8(w + 3 * (q0 - p0));
364 
365  a0 = (27 * w + 63) >> 7;
366  a1 = (18 * w + 63) >> 7;
367  a2 = (9 * w + 63) >> 7;
368 
369  p[-3 * stride] = cm[p2 + a2];
370  p[-2 * stride] = cm[p1 + a1];
371  p[-1 * stride] = cm[p0 + a0];
372  p[ 0 * stride] = cm[q0 - a0];
373  p[ 1 * stride] = cm[q1 - a1];
374  p[ 2 * stride] = cm[q2 - a2];
375 }
376 
377 #define LOOP_FILTER(vpn, dir, size, stridea, strideb, maybe_inline) \
378 static maybe_inline \
379 void vpn ## _ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, \
380  ptrdiff_t stride, \
381  int flim_E, int flim_I, \
382  int hev_thresh) \
383 { \
384  int i; \
385  for (i = 0; i < size; i++) \
386  if (vpn ## _normal_limit(dst + i * stridea, strideb, \
387  flim_E, flim_I)) { \
388  if (hev(dst + i * stridea, strideb, hev_thresh)) \
389  vpn ## _filter_common(dst + i * stridea, strideb, 1); \
390  else \
391  filter_mbedge(dst + i * stridea, strideb); \
392  } \
393 } \
394  \
395 static maybe_inline \
396 void vpn ## _ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, \
397  ptrdiff_t stride, \
398  int flim_E, \
399  int flim_I, \
400  int hev_thresh) \
401 { \
402  int i; \
403  for (i = 0; i < size; i++) \
404  if (vpn ## _normal_limit(dst + i * stridea, strideb, \
405  flim_E, flim_I)) { \
406  int hv = hev(dst + i * stridea, strideb, hev_thresh); \
407  if (hv) \
408  vpn ## _filter_common(dst + i * stridea, strideb, 1); \
409  else \
410  vpn ## _filter_common(dst + i * stridea, strideb, 0); \
411  } \
412 }
413 
414 #define UV_LOOP_FILTER(vpn, dir, stridea, strideb) \
415 LOOP_FILTER(vpn, dir, 8, stridea, strideb, av_always_inline) \
416 static void vpn ## _ ## dir ## _loop_filter8uv_c(uint8_t *dstU, \
417  uint8_t *dstV, \
418  ptrdiff_t stride, int fE, \
419  int fI, int hev_thresh) \
420 { \
421  vpn ## _ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh); \
422  vpn ## _ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh); \
423 } \
424  \
425 static void vpn ## _ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, \
426  uint8_t *dstV, \
427  ptrdiff_t stride, \
428  int fE, int fI, \
429  int hev_thresh) \
430 { \
431  vpn ## _ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, \
432  hev_thresh); \
433  vpn ## _ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, \
434  hev_thresh); \
435 }
436 
437 #define LOOP_FILTER_SIMPLE(vpn) \
438 static void vpn ## _v_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, \
439  int flim) \
440 { \
441  int i; \
442  for (i = 0; i < 16; i++) \
443  if (vpn ## _simple_limit(dst + i, stride, flim)) \
444  vpn ## _filter_common(dst + i, stride, 1); \
445 } \
446  \
447 static void vpn ## _h_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, \
448  int flim) \
449 { \
450  int i; \
451  for (i = 0; i < 16; i++) \
452  if (vpn ## _simple_limit(dst + i * stride, 1, flim)) \
453  vpn ## _filter_common(dst + i * stride, 1, 1); \
454 }
455 
456 #define LOOP_FILTERS(vpn) \
457  LOOP_FILTER(vpn, v, 16, 1, stride, ) \
458  LOOP_FILTER(vpn, h, 16, stride, 1, ) \
459  UV_LOOP_FILTER(vpn, v, 1, stride) \
460  UV_LOOP_FILTER(vpn, h, stride, 1) \
461  LOOP_FILTER_SIMPLE(vpn) \
462 
463 static const uint8_t subpel_filters[7][6] = {
464  { 0, 6, 123, 12, 1, 0 },
465  { 2, 11, 108, 36, 8, 1 },
466  { 0, 9, 93, 50, 6, 0 },
467  { 3, 16, 77, 77, 16, 3 },
468  { 0, 6, 50, 93, 9, 0 },
469  { 1, 8, 36, 108, 11, 2 },
470  { 0, 1, 12, 123, 6, 0 },
471 };
472 
473 #define PUT_PIXELS(WIDTH) \
474 static void put_vp8_pixels ## WIDTH ## _c(uint8_t *dst, ptrdiff_t dststride, \
475  uint8_t *src, ptrdiff_t srcstride, \
476  int h, int x, int y) \
477 { \
478  int i; \
479  for (i = 0; i < h; i++, dst += dststride, src += srcstride) \
480  memcpy(dst, src, WIDTH); \
481 }
482 
483 PUT_PIXELS(16)
484 PUT_PIXELS(8)
485 PUT_PIXELS(4)
486 
487 #define FILTER_6TAP(src, F, stride) \
488  cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \
489  F[0] * src[x - 2 * stride] + F[3] * src[x + 1 * stride] - \
490  F[4] * src[x + 2 * stride] + F[5] * src[x + 3 * stride] + 64) >> 7]
491 
492 #define FILTER_4TAP(src, F, stride) \
493  cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \
494  F[3] * src[x + 1 * stride] - F[4] * src[x + 2 * stride] + 64) >> 7]
495 
496 #define VP8_EPEL_H(SIZE, TAPS) \
497 static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst, \
498  ptrdiff_t dststride, \
499  uint8_t *src, \
500  ptrdiff_t srcstride, \
501  int h, int mx, int my) \
502 { \
503  const uint8_t *filter = subpel_filters[mx - 1]; \
504  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
505  int x, y; \
506  for (y = 0; y < h; y++) { \
507  for (x = 0; x < SIZE; x++) \
508  dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \
509  dst += dststride; \
510  src += srcstride; \
511  } \
512 }
513 
514 #define VP8_EPEL_V(SIZE, TAPS) \
515 static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst, \
516  ptrdiff_t dststride, \
517  uint8_t *src, \
518  ptrdiff_t srcstride, \
519  int h, int mx, int my) \
520 { \
521  const uint8_t *filter = subpel_filters[my - 1]; \
522  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
523  int x, y; \
524  for (y = 0; y < h; y++) { \
525  for (x = 0; x < SIZE; x++) \
526  dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \
527  dst += dststride; \
528  src += srcstride; \
529  } \
530 }
531 
532 #define VP8_EPEL_HV(SIZE, HTAPS, VTAPS) \
533 static void \
534 put_vp8_epel ## SIZE ## _h ## HTAPS ## v ## VTAPS ## _c(uint8_t *dst, \
535  ptrdiff_t dststride, \
536  uint8_t *src, \
537  ptrdiff_t srcstride, \
538  int h, int mx, \
539  int my) \
540 { \
541  const uint8_t *filter = subpel_filters[mx - 1]; \
542  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
543  int x, y; \
544  uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \
545  uint8_t *tmp = tmp_array; \
546  src -= (2 - (VTAPS == 4)) * srcstride; \
547  \
548  for (y = 0; y < h + VTAPS - 1; y++) { \
549  for (x = 0; x < SIZE; x++) \
550  tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \
551  tmp += SIZE; \
552  src += srcstride; \
553  } \
554  tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \
555  filter = subpel_filters[my - 1]; \
556  \
557  for (y = 0; y < h; y++) { \
558  for (x = 0; x < SIZE; x++) \
559  dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \
560  dst += dststride; \
561  tmp += SIZE; \
562  } \
563 }
564 
565 VP8_EPEL_H(16, 4)
566 VP8_EPEL_H(8, 4)
567 VP8_EPEL_H(4, 4)
568 VP8_EPEL_H(16, 6)
569 VP8_EPEL_H(8, 6)
570 VP8_EPEL_H(4, 6)
571 VP8_EPEL_V(16, 4)
572 VP8_EPEL_V(8, 4)
573 VP8_EPEL_V(4, 4)
574 VP8_EPEL_V(16, 6)
575 VP8_EPEL_V(8, 6)
576 VP8_EPEL_V(4, 6)
577 
578 VP8_EPEL_HV(16, 4, 4)
579 VP8_EPEL_HV(8, 4, 4)
580 VP8_EPEL_HV(4, 4, 4)
581 VP8_EPEL_HV(16, 4, 6)
582 VP8_EPEL_HV(8, 4, 6)
583 VP8_EPEL_HV(4, 4, 6)
584 VP8_EPEL_HV(16, 6, 4)
585 VP8_EPEL_HV(8, 6, 4)
586 VP8_EPEL_HV(4, 6, 4)
587 VP8_EPEL_HV(16, 6, 6)
588 VP8_EPEL_HV(8, 6, 6)
589 VP8_EPEL_HV(4, 6, 6)
590 
591 #define VP8_BILINEAR(SIZE) \
592 static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t dstride, \
593  uint8_t *src, ptrdiff_t sstride, \
594  int h, int mx, int my) \
595 { \
596  int a = 8 - mx, b = mx; \
597  int x, y; \
598  for (y = 0; y < h; y++) { \
599  for (x = 0; x < SIZE; x++) \
600  dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \
601  dst += dstride; \
602  src += sstride; \
603  } \
604 } \
605  \
606 static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, ptrdiff_t dstride, \
607  uint8_t *src, ptrdiff_t sstride, \
608  int h, int mx, int my) \
609 { \
610  int c = 8 - my, d = my; \
611  int x, y; \
612  for (y = 0; y < h; y++) { \
613  for (x = 0; x < SIZE; x++) \
614  dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3; \
615  dst += dstride; \
616  src += sstride; \
617  } \
618 } \
619  \
620 static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, \
621  ptrdiff_t dstride, \
622  uint8_t *src, \
623  ptrdiff_t sstride, \
624  int h, int mx, int my) \
625 { \
626  int a = 8 - mx, b = mx; \
627  int c = 8 - my, d = my; \
628  int x, y; \
629  uint8_t tmp_array[(2 * SIZE + 1) * SIZE]; \
630  uint8_t *tmp = tmp_array; \
631  for (y = 0; y < h + 1; y++) { \
632  for (x = 0; x < SIZE; x++) \
633  tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \
634  tmp += SIZE; \
635  src += sstride; \
636  } \
637  tmp = tmp_array; \
638  for (y = 0; y < h; y++) { \
639  for (x = 0; x < SIZE; x++) \
640  dst[x] = (c * tmp[x] + d * tmp[x + SIZE] + 4) >> 3; \
641  dst += dstride; \
642  tmp += SIZE; \
643  } \
644 }
645 
646 VP8_BILINEAR(16)
647 VP8_BILINEAR(8)
648 VP8_BILINEAR(4)
649 
650 #define VP78_MC_FUNC(IDX, SIZE) \
651  dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \
652  dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \
653  dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \
654  dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \
655  dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \
656  dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \
657  dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \
658  dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \
659  dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c
660 
661 #define VP78_BILINEAR_MC_FUNC(IDX, SIZE) \
662  dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \
663  dsp->put_vp8_bilinear_pixels_tab[IDX][0][1] = put_vp8_bilinear ## SIZE ## _h_c; \
664  dsp->put_vp8_bilinear_pixels_tab[IDX][0][2] = put_vp8_bilinear ## SIZE ## _h_c; \
665  dsp->put_vp8_bilinear_pixels_tab[IDX][1][0] = put_vp8_bilinear ## SIZE ## _v_c; \
666  dsp->put_vp8_bilinear_pixels_tab[IDX][1][1] = put_vp8_bilinear ## SIZE ## _hv_c; \
667  dsp->put_vp8_bilinear_pixels_tab[IDX][1][2] = put_vp8_bilinear ## SIZE ## _hv_c; \
668  dsp->put_vp8_bilinear_pixels_tab[IDX][2][0] = put_vp8_bilinear ## SIZE ## _v_c; \
669  dsp->put_vp8_bilinear_pixels_tab[IDX][2][1] = put_vp8_bilinear ## SIZE ## _hv_c; \
670  dsp->put_vp8_bilinear_pixels_tab[IDX][2][2] = put_vp8_bilinear ## SIZE ## _hv_c
671 
673 {
674  VP78_MC_FUNC(0, 16);
675  VP78_MC_FUNC(1, 8);
676  VP78_MC_FUNC(2, 4);
677 
678  VP78_BILINEAR_MC_FUNC(0, 16);
679  VP78_BILINEAR_MC_FUNC(1, 8);
680  VP78_BILINEAR_MC_FUNC(2, 4);
681 
682  if (ARCH_ARM)
683  ff_vp78dsp_init_arm(dsp);
684  if (ARCH_PPC)
685  ff_vp78dsp_init_ppc(dsp);
686  if (ARCH_X86)
687  ff_vp78dsp_init_x86(dsp);
688 }
689 
690 #if CONFIG_VP7_DECODER
691 LOOP_FILTERS(vp7)
692 
694 {
695  dsp->vp8_luma_dc_wht = vp7_luma_dc_wht_c;
696  dsp->vp8_luma_dc_wht_dc = vp7_luma_dc_wht_dc_c;
697  dsp->vp8_idct_add = vp7_idct_add_c;
698  dsp->vp8_idct_dc_add = vp7_idct_dc_add_c;
699  dsp->vp8_idct_dc_add4y = vp7_idct_dc_add4y_c;
700  dsp->vp8_idct_dc_add4uv = vp7_idct_dc_add4uv_c;
701 
702  dsp->vp8_v_loop_filter16y = vp7_v_loop_filter16_c;
703  dsp->vp8_h_loop_filter16y = vp7_h_loop_filter16_c;
704  dsp->vp8_v_loop_filter8uv = vp7_v_loop_filter8uv_c;
705  dsp->vp8_h_loop_filter8uv = vp7_h_loop_filter8uv_c;
706 
707  dsp->vp8_v_loop_filter16y_inner = vp7_v_loop_filter16_inner_c;
708  dsp->vp8_h_loop_filter16y_inner = vp7_h_loop_filter16_inner_c;
709  dsp->vp8_v_loop_filter8uv_inner = vp7_v_loop_filter8uv_inner_c;
710  dsp->vp8_h_loop_filter8uv_inner = vp7_h_loop_filter8uv_inner_c;
711 
712  dsp->vp8_v_loop_filter_simple = vp7_v_loop_filter_simple_c;
713  dsp->vp8_h_loop_filter_simple = vp7_h_loop_filter_simple_c;
714 }
715 #endif /* CONFIG_VP7_DECODER */
716 
717 #if CONFIG_VP8_DECODER
718 LOOP_FILTERS(vp8)
719 
721 {
722  dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c;
723  dsp->vp8_luma_dc_wht_dc = vp8_luma_dc_wht_dc_c;
724  dsp->vp8_idct_add = vp8_idct_add_c;
725  dsp->vp8_idct_dc_add = vp8_idct_dc_add_c;
726  dsp->vp8_idct_dc_add4y = vp8_idct_dc_add4y_c;
727  dsp->vp8_idct_dc_add4uv = vp8_idct_dc_add4uv_c;
728 
729  dsp->vp8_v_loop_filter16y = vp8_v_loop_filter16_c;
730  dsp->vp8_h_loop_filter16y = vp8_h_loop_filter16_c;
731  dsp->vp8_v_loop_filter8uv = vp8_v_loop_filter8uv_c;
732  dsp->vp8_h_loop_filter8uv = vp8_h_loop_filter8uv_c;
733 
734  dsp->vp8_v_loop_filter16y_inner = vp8_v_loop_filter16_inner_c;
735  dsp->vp8_h_loop_filter16y_inner = vp8_h_loop_filter16_inner_c;
736  dsp->vp8_v_loop_filter8uv_inner = vp8_v_loop_filter8uv_inner_c;
737  dsp->vp8_h_loop_filter8uv_inner = vp8_h_loop_filter8uv_inner_c;
738 
739  dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c;
740  dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c;
741 
742  if (ARCH_ARM)
743  ff_vp8dsp_init_arm(dsp);
744  if (ARCH_X86)
745  ff_vp8dsp_init_x86(dsp);
746 }
747 #endif /* CONFIG_VP8_DECODER */
void ff_vp7dsp_init(VP8DSPContext *c)
#define VP8_EPEL_V(SIZE, TAPS)
Definition: vp8dsp.c:514
av_cold void ff_vp78dsp_init_ppc(VP8DSPContext *c)
#define MAX_NEG_CROP
Definition: mathops.h:30
static av_always_inline void vp7_filter_common(uint8_t *p, ptrdiff_t stride, int is4tap)
Definition: vp8dsp.c:303
av_cold void ff_vp8dsp_init_arm(VP8DSPContext *dsp)
int stride
Definition: mace.c:144
av_cold void ff_vp78dsp_init_arm(VP8DSPContext *dsp)
uint8_t
#define av_cold
Definition: attributes.h:66
VP8 compatible video decoder.
static const uint8_t subpel_filters[7][6]
Definition: vp8dsp.c:463
#define cm
Definition: dvbsubdec.c:34
#define VP8_EPEL_H(SIZE, TAPS)
Definition: vp8dsp.c:496
av_cold void ff_vp78dsp_init(VP8DSPContext *dsp)
Definition: vp8dsp.c:672
#define MK_IDCT_DC_ADD4_C(name)
Definition: vp8dsp.c:33
static av_always_inline void vp8_filter_common(uint8_t *p, ptrdiff_t stride, int is4tap)
Definition: vp8dsp.c:309
#define VP8_BILINEAR(SIZE)
Definition: vp8dsp.c:591
#define FFMIN(a, b)
Definition: common.h:57
#define VP78_BILINEAR_MC_FUNC(IDX, SIZE)
Definition: vp8dsp.c:661
#define FFABS(a)
Definition: common.h:52
#define VP78_MC_FUNC(IDX, SIZE)
Definition: vp8dsp.c:650
#define ARCH_PPC
Definition: config.h:24
#define ARCH_ARM
Definition: config.h:14
#define LOOP_FILTERS(vpn)
Definition: vp8dsp.c:456
#define IS_VP8
Definition: vp8dsp.h:103
#define VP8_EPEL_HV(SIZE, HTAPS, VTAPS)
Definition: vp8dsp.c:532
void ff_vp78dsp_init_x86(VP8DSPContext *c)
Definition: vp8dsp_init.c:318
static av_always_inline int vp8_simple_limit(uint8_t *p, ptrdiff_t stride, int flim)
Definition: vp8dsp.c:322
static av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride, int is4tap, int is_vp7)
Definition: vp8dsp.c:267
static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride)
Definition: vp8dsp.c:355
void ff_vp8dsp_init_x86(VP8DSPContext *c)
Definition: vp8dsp_init.c:368
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_dlog(ac->avr,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> dc
#define NORMAL_LIMIT(vpn)
E - limit at the macroblock edge I - limit for interior difference.
Definition: vp8dsp.c:333
static av_always_inline int vp7_simple_limit(uint8_t *p, ptrdiff_t stride, int flim)
Definition: vp8dsp.c:315
common internal and external API header
#define ARCH_X86
Definition: config.h:33
#define PUT_PIXELS(WIDTH)
Definition: vp8dsp.c:473
#define ff_crop_tab
#define IS_VP7
Definition: vp8dsp.h:102
#define av_always_inline
Definition: attributes.h:40
#define LOAD_PIXELS
Definition: vp8dsp.c:255
#define clip_int8(n)
Definition: vp8dsp.c:265
static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh)
Definition: vp8dsp.c:349
void ff_vp8dsp_init(VP8DSPContext *c)
static int16_t block[64]
Definition: dct-test.c:88