Libav

libavcodec/h264.c

Go to the documentation of this file.
00001 /*
00002  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
00003  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #include "internal.h"
00029 #include "dsputil.h"
00030 #include "avcodec.h"
00031 #include "mpegvideo.h"
00032 #include "h264.h"
00033 #include "h264data.h"
00034 #include "h264_mvpred.h"
00035 #include "h264_parser.h"
00036 #include "golomb.h"
00037 #include "mathops.h"
00038 #include "rectangle.h"
00039 #include "vdpau_internal.h"
00040 
00041 #include "cabac.h"
00042 
00043 //#undef NDEBUG
00044 #include <assert.h>
00045 
00046 static const uint8_t rem6[52]={
00047 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
00048 };
00049 
00050 static const uint8_t div6[52]={
00051 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
00052 };
00053 
00054 void ff_h264_write_back_intra_pred_mode(H264Context *h){
00055     int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
00056 
00057     AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4);
00058     mode[4]= h->intra4x4_pred_mode_cache[7+8*3];
00059     mode[5]= h->intra4x4_pred_mode_cache[7+8*2];
00060     mode[6]= h->intra4x4_pred_mode_cache[7+8*1];
00061 }
00062 
00066 int ff_h264_check_intra4x4_pred_mode(H264Context *h){
00067     MpegEncContext * const s = &h->s;
00068     static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
00069     static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
00070     int i;
00071 
00072     if(!(h->top_samples_available&0x8000)){
00073         for(i=0; i<4; i++){
00074             int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
00075             if(status<0){
00076                 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00077                 return -1;
00078             } else if(status){
00079                 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
00080             }
00081         }
00082     }
00083 
00084     if((h->left_samples_available&0x8888)!=0x8888){
00085         static const int mask[4]={0x8000,0x2000,0x80,0x20};
00086         for(i=0; i<4; i++){
00087             if(!(h->left_samples_available&mask[i])){
00088                 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
00089                 if(status<0){
00090                     av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00091                     return -1;
00092                 } else if(status){
00093                     h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
00094                 }
00095             }
00096         }
00097     }
00098 
00099     return 0;
00100 } //FIXME cleanup like ff_h264_check_intra_pred_mode
00101 
00105 int ff_h264_check_intra_pred_mode(H264Context *h, int mode){
00106     MpegEncContext * const s = &h->s;
00107     static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
00108     static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
00109 
00110     if(mode > 6U) {
00111         av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
00112         return -1;
00113     }
00114 
00115     if(!(h->top_samples_available&0x8000)){
00116         mode= top[ mode ];
00117         if(mode<0){
00118             av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00119             return -1;
00120         }
00121     }
00122 
00123     if((h->left_samples_available&0x8080) != 0x8080){
00124         mode= left[ mode ];
00125         if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
00126             mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
00127         }
00128         if(mode<0){
00129             av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00130             return -1;
00131         }
00132     }
00133 
00134     return mode;
00135 }
00136 
00137 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
00138     int i, si, di;
00139     uint8_t *dst;
00140     int bufidx;
00141 
00142 //    src[0]&0x80;                //forbidden bit
00143     h->nal_ref_idc= src[0]>>5;
00144     h->nal_unit_type= src[0]&0x1F;
00145 
00146     src++; length--;
00147 #if 0
00148     for(i=0; i<length; i++)
00149         printf("%2X ", src[i]);
00150 #endif
00151 
00152 #if HAVE_FAST_UNALIGNED
00153 # if HAVE_FAST_64BIT
00154 #   define RS 7
00155     for(i=0; i+1<length; i+=9){
00156         if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
00157 # else
00158 #   define RS 3
00159     for(i=0; i+1<length; i+=5){
00160         if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U))
00161 # endif
00162             continue;
00163         if(i>0 && !src[i]) i--;
00164         while(src[i]) i++;
00165 #else
00166 #   define RS 0
00167     for(i=0; i+1<length; i+=2){
00168         if(src[i]) continue;
00169         if(i>0 && src[i-1]==0) i--;
00170 #endif
00171         if(i+2<length && src[i+1]==0 && src[i+2]<=3){
00172             if(src[i+2]!=3){
00173                 /* startcode, so we must be past the end */
00174                 length=i;
00175             }
00176             break;
00177         }
00178         i-= RS;
00179     }
00180 
00181     if(i>=length-1){ //no escaped 0
00182         *dst_length= length;
00183         *consumed= length+1; //+1 for the header
00184         return src;
00185     }
00186 
00187     bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
00188     av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
00189     dst= h->rbsp_buffer[bufidx];
00190 
00191     if (dst == NULL){
00192         return NULL;
00193     }
00194 
00195 //printf("decoding esc\n");
00196     memcpy(dst, src, i);
00197     si=di=i;
00198     while(si+2<length){
00199         //remove escapes (very rare 1:2^22)
00200         if(src[si+2]>3){
00201             dst[di++]= src[si++];
00202             dst[di++]= src[si++];
00203         }else if(src[si]==0 && src[si+1]==0){
00204             if(src[si+2]==3){ //escape
00205                 dst[di++]= 0;
00206                 dst[di++]= 0;
00207                 si+=3;
00208                 continue;
00209             }else //next start code
00210                 goto nsc;
00211         }
00212 
00213         dst[di++]= src[si++];
00214     }
00215     while(si<length)
00216         dst[di++]= src[si++];
00217 nsc:
00218 
00219     memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
00220 
00221     *dst_length= di;
00222     *consumed= si + 1;//+1 for the header
00223 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
00224     return dst;
00225 }
00226 
00227 int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
00228     int v= *src;
00229     int r;
00230 
00231     tprintf(h->s.avctx, "rbsp trailing %X\n", v);
00232 
00233     for(r=1; r<9; r++){
00234         if(v&1) return r;
00235         v>>=1;
00236     }
00237     return 0;
00238 }
00239 
00244 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
00245 #define stride 16
00246     int i;
00247     int temp[16]; //FIXME check if this is a good idea
00248     static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
00249     static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
00250 
00251 //memset(block, 64, 2*256);
00252 //return;
00253     for(i=0; i<4; i++){
00254         const int offset= y_offset[i];
00255         const int z0= block[offset+stride*0] + block[offset+stride*4];
00256         const int z1= block[offset+stride*0] - block[offset+stride*4];
00257         const int z2= block[offset+stride*1] - block[offset+stride*5];
00258         const int z3= block[offset+stride*1] + block[offset+stride*5];
00259 
00260         temp[4*i+0]= z0+z3;
00261         temp[4*i+1]= z1+z2;
00262         temp[4*i+2]= z1-z2;
00263         temp[4*i+3]= z0-z3;
00264     }
00265 
00266     for(i=0; i<4; i++){
00267         const int offset= x_offset[i];
00268         const int z0= temp[4*0+i] + temp[4*2+i];
00269         const int z1= temp[4*0+i] - temp[4*2+i];
00270         const int z2= temp[4*1+i] - temp[4*3+i];
00271         const int z3= temp[4*1+i] + temp[4*3+i];
00272 
00273         block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
00274         block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
00275         block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
00276         block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
00277     }
00278 }
00279 
00280 #if 0
00281 
00285 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
00286 //    const int qmul= dequant_coeff[qp][0];
00287     int i;
00288     int temp[16]; //FIXME check if this is a good idea
00289     static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
00290     static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
00291 
00292     for(i=0; i<4; i++){
00293         const int offset= y_offset[i];
00294         const int z0= block[offset+stride*0] + block[offset+stride*4];
00295         const int z1= block[offset+stride*0] - block[offset+stride*4];
00296         const int z2= block[offset+stride*1] - block[offset+stride*5];
00297         const int z3= block[offset+stride*1] + block[offset+stride*5];
00298 
00299         temp[4*i+0]= z0+z3;
00300         temp[4*i+1]= z1+z2;
00301         temp[4*i+2]= z1-z2;
00302         temp[4*i+3]= z0-z3;
00303     }
00304 
00305     for(i=0; i<4; i++){
00306         const int offset= x_offset[i];
00307         const int z0= temp[4*0+i] + temp[4*2+i];
00308         const int z1= temp[4*0+i] - temp[4*2+i];
00309         const int z2= temp[4*1+i] - temp[4*3+i];
00310         const int z3= temp[4*1+i] + temp[4*3+i];
00311 
00312         block[stride*0 +offset]= (z0 + z3)>>1;
00313         block[stride*2 +offset]= (z1 + z2)>>1;
00314         block[stride*8 +offset]= (z1 - z2)>>1;
00315         block[stride*10+offset]= (z0 - z3)>>1;
00316     }
00317 }
00318 #endif
00319 
00320 #undef xStride
00321 #undef stride
00322 
00323 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
00324     const int stride= 16*2;
00325     const int xStride= 16;
00326     int a,b,c,d,e;
00327 
00328     a= block[stride*0 + xStride*0];
00329     b= block[stride*0 + xStride*1];
00330     c= block[stride*1 + xStride*0];
00331     d= block[stride*1 + xStride*1];
00332 
00333     e= a-b;
00334     a= a+b;
00335     b= c-d;
00336     c= c+d;
00337 
00338     block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
00339     block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
00340     block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
00341     block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
00342 }
00343 
00344 #if 0
00345 static void chroma_dc_dct_c(DCTELEM *block){
00346     const int stride= 16*2;
00347     const int xStride= 16;
00348     int a,b,c,d,e;
00349 
00350     a= block[stride*0 + xStride*0];
00351     b= block[stride*0 + xStride*1];
00352     c= block[stride*1 + xStride*0];
00353     d= block[stride*1 + xStride*1];
00354 
00355     e= a-b;
00356     a= a+b;
00357     b= c-d;
00358     c= c+d;
00359 
00360     block[stride*0 + xStride*0]= (a+c);
00361     block[stride*0 + xStride*1]= (e+b);
00362     block[stride*1 + xStride*0]= (a-c);
00363     block[stride*1 + xStride*1]= (e-b);
00364 }
00365 #endif
00366 
00367 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
00368                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00369                            int src_x_offset, int src_y_offset,
00370                            qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
00371     MpegEncContext * const s = &h->s;
00372     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
00373     int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
00374     const int luma_xy= (mx&3) + ((my&3)<<2);
00375     uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
00376     uint8_t * src_cb, * src_cr;
00377     int extra_width= h->emu_edge_width;
00378     int extra_height= h->emu_edge_height;
00379     int emu=0;
00380     const int full_mx= mx>>2;
00381     const int full_my= my>>2;
00382     const int pic_width  = 16*s->mb_width;
00383     const int pic_height = 16*s->mb_height >> MB_FIELD;
00384 
00385     if(mx&7) extra_width -= 3;
00386     if(my&7) extra_height -= 3;
00387 
00388     if(   full_mx < 0-extra_width
00389        || full_my < 0-extra_height
00390        || full_mx + 16/*FIXME*/ > pic_width + extra_width
00391        || full_my + 16/*FIXME*/ > pic_height + extra_height){
00392         ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00393             src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
00394         emu=1;
00395     }
00396 
00397     qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
00398     if(!square){
00399         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
00400     }
00401 
00402     if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
00403 
00404     if(MB_FIELD){
00405         // chroma offset when predicting from a field of opposite parity
00406         my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
00407         emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
00408     }
00409     src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
00410     src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
00411 
00412     if(emu){
00413         ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
00414             src_cb= s->edge_emu_buffer;
00415     }
00416     chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
00417 
00418     if(emu){
00419         ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
00420             src_cr= s->edge_emu_buffer;
00421     }
00422     chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
00423 }
00424 
00425 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
00426                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00427                            int x_offset, int y_offset,
00428                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00429                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
00430                            int list0, int list1){
00431     MpegEncContext * const s = &h->s;
00432     qpel_mc_func *qpix_op=  qpix_put;
00433     h264_chroma_mc_func chroma_op= chroma_put;
00434 
00435     dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
00436     dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
00437     dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
00438     x_offset += 8*s->mb_x;
00439     y_offset += 8*(s->mb_y >> MB_FIELD);
00440 
00441     if(list0){
00442         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
00443         mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
00444                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
00445                            qpix_op, chroma_op);
00446 
00447         qpix_op=  qpix_avg;
00448         chroma_op= chroma_avg;
00449     }
00450 
00451     if(list1){
00452         Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
00453         mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
00454                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
00455                            qpix_op, chroma_op);
00456     }
00457 }
00458 
00459 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
00460                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00461                            int x_offset, int y_offset,
00462                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00463                            h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
00464                            h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
00465                            int list0, int list1){
00466     MpegEncContext * const s = &h->s;
00467 
00468     dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
00469     dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
00470     dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
00471     x_offset += 8*s->mb_x;
00472     y_offset += 8*(s->mb_y >> MB_FIELD);
00473 
00474     if(list0 && list1){
00475         /* don't optimize for luma-only case, since B-frames usually
00476          * use implicit weights => chroma too. */
00477         uint8_t *tmp_cb = s->obmc_scratchpad;
00478         uint8_t *tmp_cr = s->obmc_scratchpad + 8;
00479         uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
00480         int refn0 = h->ref_cache[0][ scan8[n] ];
00481         int refn1 = h->ref_cache[1][ scan8[n] ];
00482 
00483         mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
00484                     dest_y, dest_cb, dest_cr,
00485                     x_offset, y_offset, qpix_put, chroma_put);
00486         mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
00487                     tmp_y, tmp_cb, tmp_cr,
00488                     x_offset, y_offset, qpix_put, chroma_put);
00489 
00490         if(h->use_weight == 2){
00491             int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
00492             int weight1 = 64 - weight0;
00493             luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
00494             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
00495             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
00496         }else{
00497             luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
00498                             h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
00499                             h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
00500             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00501                             h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
00502                             h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
00503             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00504                             h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
00505                             h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
00506         }
00507     }else{
00508         int list = list1 ? 1 : 0;
00509         int refn = h->ref_cache[list][ scan8[n] ];
00510         Picture *ref= &h->ref_list[list][refn];
00511         mc_dir_part(h, ref, n, square, chroma_height, delta, list,
00512                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
00513                     qpix_put, chroma_put);
00514 
00515         luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
00516                        h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
00517         if(h->use_weight_chroma){
00518             chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00519                              h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
00520             chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00521                              h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
00522         }
00523     }
00524 }
00525 
00526 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
00527                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00528                            int x_offset, int y_offset,
00529                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00530                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
00531                            h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00532                            int list0, int list1){
00533     if((h->use_weight==2 && list0 && list1
00534         && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
00535        || h->use_weight==1)
00536         mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
00537                          x_offset, y_offset, qpix_put, chroma_put,
00538                          weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
00539     else
00540         mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
00541                     x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
00542 }
00543 
00544 static inline void prefetch_motion(H264Context *h, int list){
00545     /* fetch pixels for estimated mv 4 macroblocks ahead
00546      * optimized for 64byte cache lines */
00547     MpegEncContext * const s = &h->s;
00548     const int refn = h->ref_cache[list][scan8[0]];
00549     if(refn >= 0){
00550         const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
00551         const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
00552         uint8_t **src= h->ref_list[list][refn].data;
00553         int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
00554         s->dsp.prefetch(src[0]+off, s->linesize, 4);
00555         off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
00556         s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
00557     }
00558 }
00559 
00560 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00561                       qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
00562                       qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
00563                       h264_weight_func *weight_op, h264_biweight_func *weight_avg){
00564     MpegEncContext * const s = &h->s;
00565     const int mb_xy= h->mb_xy;
00566     const int mb_type= s->current_picture.mb_type[mb_xy];
00567 
00568     assert(IS_INTER(mb_type));
00569 
00570     prefetch_motion(h, 0);
00571 
00572     if(IS_16X16(mb_type)){
00573         mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
00574                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
00575                 weight_op, weight_avg,
00576                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
00577     }else if(IS_16X8(mb_type)){
00578         mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
00579                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
00580                 &weight_op[1], &weight_avg[1],
00581                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
00582         mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
00583                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
00584                 &weight_op[1], &weight_avg[1],
00585                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
00586     }else if(IS_8X16(mb_type)){
00587         mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
00588                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00589                 &weight_op[2], &weight_avg[2],
00590                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
00591         mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
00592                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00593                 &weight_op[2], &weight_avg[2],
00594                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
00595     }else{
00596         int i;
00597 
00598         assert(IS_8X8(mb_type));
00599 
00600         for(i=0; i<4; i++){
00601             const int sub_mb_type= h->sub_mb_type[i];
00602             const int n= 4*i;
00603             int x_offset= (i&1)<<2;
00604             int y_offset= (i&2)<<1;
00605 
00606             if(IS_SUB_8X8(sub_mb_type)){
00607                 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00608                     qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00609                     &weight_op[3], &weight_avg[3],
00610                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
00611             }else if(IS_SUB_8X4(sub_mb_type)){
00612                 mc_part(h, n  , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00613                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
00614                     &weight_op[4], &weight_avg[4],
00615                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
00616                 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
00617                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
00618                     &weight_op[4], &weight_avg[4],
00619                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
00620             }else if(IS_SUB_4X8(sub_mb_type)){
00621                 mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00622                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00623                     &weight_op[5], &weight_avg[5],
00624                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
00625                 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
00626                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00627                     &weight_op[5], &weight_avg[5],
00628                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
00629             }else{
00630                 int j;
00631                 assert(IS_SUB_4X4(sub_mb_type));
00632                 for(j=0; j<4; j++){
00633                     int sub_x_offset= x_offset + 2*(j&1);
00634                     int sub_y_offset= y_offset +   (j&2);
00635                     mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
00636                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00637                         &weight_op[6], &weight_avg[6],
00638                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
00639                 }
00640             }
00641         }
00642     }
00643 
00644     prefetch_motion(h, 1);
00645 }
00646 
00647 
00648 static void free_tables(H264Context *h){
00649     int i;
00650     H264Context *hx;
00651     av_freep(&h->intra4x4_pred_mode);
00652     av_freep(&h->chroma_pred_mode_table);
00653     av_freep(&h->cbp_table);
00654     av_freep(&h->mvd_table[0]);
00655     av_freep(&h->mvd_table[1]);
00656     av_freep(&h->direct_table);
00657     av_freep(&h->non_zero_count);
00658     av_freep(&h->slice_table_base);
00659     h->slice_table= NULL;
00660     av_freep(&h->list_counts);
00661 
00662     av_freep(&h->mb2b_xy);
00663     av_freep(&h->mb2br_xy);
00664 
00665     for(i = 0; i < MAX_THREADS; i++) {
00666         hx = h->thread_context[i];
00667         if(!hx) continue;
00668         av_freep(&hx->top_borders[1]);
00669         av_freep(&hx->top_borders[0]);
00670         av_freep(&hx->s.obmc_scratchpad);
00671         av_freep(&hx->rbsp_buffer[1]);
00672         av_freep(&hx->rbsp_buffer[0]);
00673         hx->rbsp_buffer_size[0] = 0;
00674         hx->rbsp_buffer_size[1] = 0;
00675         if (i) av_freep(&h->thread_context[i]);
00676     }
00677 }
00678 
00679 static void init_dequant8_coeff_table(H264Context *h){
00680     int i,q,x;
00681     const int transpose = (h->h264dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
00682     h->dequant8_coeff[0] = h->dequant8_buffer[0];
00683     h->dequant8_coeff[1] = h->dequant8_buffer[1];
00684 
00685     for(i=0; i<2; i++ ){
00686         if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
00687             h->dequant8_coeff[1] = h->dequant8_buffer[0];
00688             break;
00689         }
00690 
00691         for(q=0; q<52; q++){
00692             int shift = div6[q];
00693             int idx = rem6[q];
00694             for(x=0; x<64; x++)
00695                 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
00696                     ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
00697                     h->pps.scaling_matrix8[i][x]) << shift;
00698         }
00699     }
00700 }
00701 
00702 static void init_dequant4_coeff_table(H264Context *h){
00703     int i,j,q,x;
00704     const int transpose = (h->h264dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
00705     for(i=0; i<6; i++ ){
00706         h->dequant4_coeff[i] = h->dequant4_buffer[i];
00707         for(j=0; j<i; j++){
00708             if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
00709                 h->dequant4_coeff[i] = h->dequant4_buffer[j];
00710                 break;
00711             }
00712         }
00713         if(j<i)
00714             continue;
00715 
00716         for(q=0; q<52; q++){
00717             int shift = div6[q] + 2;
00718             int idx = rem6[q];
00719             for(x=0; x<16; x++)
00720                 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
00721                     ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
00722                     h->pps.scaling_matrix4[i][x]) << shift;
00723         }
00724     }
00725 }
00726 
00727 static void init_dequant_tables(H264Context *h){
00728     int i,x;
00729     init_dequant4_coeff_table(h);
00730     if(h->pps.transform_8x8_mode)
00731         init_dequant8_coeff_table(h);
00732     if(h->sps.transform_bypass){
00733         for(i=0; i<6; i++)
00734             for(x=0; x<16; x++)
00735                 h->dequant4_coeff[i][0][x] = 1<<6;
00736         if(h->pps.transform_8x8_mode)
00737             for(i=0; i<2; i++)
00738                 for(x=0; x<64; x++)
00739                     h->dequant8_coeff[i][0][x] = 1<<6;
00740     }
00741 }
00742 
00743 
00744 int ff_h264_alloc_tables(H264Context *h){
00745     MpegEncContext * const s = &h->s;
00746     const int big_mb_num= s->mb_stride * (s->mb_height+1);
00747     const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count;
00748     int x,y;
00749 
00750     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8  * sizeof(uint8_t), fail)
00751 
00752     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count    , big_mb_num * 32 * sizeof(uint8_t), fail)
00753     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
00754     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
00755 
00756     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
00757     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail);
00758     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail);
00759     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail);
00760     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)
00761 
00762     memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(*h->slice_table_base));
00763     h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
00764 
00765     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy  , big_mb_num * sizeof(uint32_t), fail);
00766     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail);
00767     for(y=0; y<s->mb_height; y++){
00768         for(x=0; x<s->mb_width; x++){
00769             const int mb_xy= x + y*s->mb_stride;
00770             const int b_xy = 4*x + 4*y*h->b_stride;
00771 
00772             h->mb2b_xy [mb_xy]= b_xy;
00773             h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride)));
00774         }
00775     }
00776 
00777     s->obmc_scratchpad = NULL;
00778 
00779     if(!h->dequant4_coeff[0])
00780         init_dequant_tables(h);
00781 
00782     return 0;
00783 fail:
00784     free_tables(h);
00785     return -1;
00786 }
00787 
00791 static void clone_tables(H264Context *dst, H264Context *src, int i){
00792     MpegEncContext * const s = &src->s;
00793     dst->intra4x4_pred_mode       = src->intra4x4_pred_mode + i*8*2*s->mb_stride;
00794     dst->non_zero_count           = src->non_zero_count;
00795     dst->slice_table              = src->slice_table;
00796     dst->cbp_table                = src->cbp_table;
00797     dst->mb2b_xy                  = src->mb2b_xy;
00798     dst->mb2br_xy                 = src->mb2br_xy;
00799     dst->chroma_pred_mode_table   = src->chroma_pred_mode_table;
00800     dst->mvd_table[0]             = src->mvd_table[0] + i*8*2*s->mb_stride;
00801     dst->mvd_table[1]             = src->mvd_table[1] + i*8*2*s->mb_stride;
00802     dst->direct_table             = src->direct_table;
00803     dst->list_counts              = src->list_counts;
00804 
00805     dst->s.obmc_scratchpad = NULL;
00806     ff_h264_pred_init(&dst->hpc, src->s.codec_id);
00807 }
00808 
00813 static int context_init(H264Context *h){
00814     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
00815     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
00816 
00817     h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
00818     h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
00819 
00820     return 0;
00821 fail:
00822     return -1; // free_tables will clean up for us
00823 }
00824 
00825 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size);
00826 
00827 static av_cold void common_init(H264Context *h){
00828     MpegEncContext * const s = &h->s;
00829 
00830     s->width = s->avctx->width;
00831     s->height = s->avctx->height;
00832     s->codec_id= s->avctx->codec->id;
00833 
00834     ff_h264dsp_init(&h->h264dsp);
00835     ff_h264_pred_init(&h->hpc, s->codec_id);
00836 
00837     h->dequant_coeff_pps= -1;
00838     s->unrestricted_mv=1;
00839     s->decode=1; //FIXME
00840 
00841     dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
00842 
00843     memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
00844     memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
00845 }
00846 
00847 av_cold int ff_h264_decode_init(AVCodecContext *avctx){
00848     H264Context *h= avctx->priv_data;
00849     MpegEncContext * const s = &h->s;
00850 
00851     MPV_decode_defaults(s);
00852 
00853     s->avctx = avctx;
00854     common_init(h);
00855 
00856     s->out_format = FMT_H264;
00857     s->workaround_bugs= avctx->workaround_bugs;
00858 
00859     // set defaults
00860 //    s->decode_mb= ff_h263_decode_mb;
00861     s->quarter_sample = 1;
00862     if(!avctx->has_b_frames)
00863     s->low_delay= 1;
00864 
00865     avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
00866 
00867     ff_h264_decode_init_vlc();
00868 
00869     h->thread_context[0] = h;
00870     h->outputed_poc = INT_MIN;
00871     h->prev_poc_msb= 1<<16;
00872     h->x264_build = -1;
00873     ff_h264_reset_sei(h);
00874     if(avctx->codec_id == CODEC_ID_H264){
00875         if(avctx->ticks_per_frame == 1){
00876             s->avctx->time_base.den *=2;
00877         }
00878         avctx->ticks_per_frame = 2;
00879     }
00880 
00881     if(avctx->extradata_size > 0 && avctx->extradata && *(char *)avctx->extradata == 1){
00882         int i, cnt, nalsize;
00883         unsigned char *p = avctx->extradata;
00884 
00885         h->is_avc = 1;
00886 
00887         if(avctx->extradata_size < 7) {
00888             av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
00889             return -1;
00890         }
00891         /* sps and pps in the avcC always have length coded with 2 bytes,
00892            so put a fake nal_length_size = 2 while parsing them */
00893         h->nal_length_size = 2;
00894         // Decode sps from avcC
00895         cnt = *(p+5) & 0x1f; // Number of sps
00896         p += 6;
00897         for (i = 0; i < cnt; i++) {
00898             nalsize = AV_RB16(p) + 2;
00899             if(decode_nal_units(h, p, nalsize) < 0) {
00900                 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
00901                 return -1;
00902             }
00903             p += nalsize;
00904         }
00905         // Decode pps from avcC
00906         cnt = *(p++); // Number of pps
00907         for (i = 0; i < cnt; i++) {
00908             nalsize = AV_RB16(p) + 2;
00909             if(decode_nal_units(h, p, nalsize)  != nalsize) {
00910                 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
00911                 return -1;
00912             }
00913             p += nalsize;
00914         }
00915         // Now store right nal length size, that will be use to parse all other nals
00916         h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
00917     } else {
00918         h->is_avc = 0;
00919         if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
00920             return -1;
00921     }
00922     if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){
00923         s->avctx->has_b_frames = h->sps.num_reorder_frames;
00924         s->low_delay = 0;
00925     }
00926 
00927     return 0;
00928 }
00929 
00930 int ff_h264_frame_start(H264Context *h){
00931     MpegEncContext * const s = &h->s;
00932     int i;
00933 
00934     if(MPV_frame_start(s, s->avctx) < 0)
00935         return -1;
00936     ff_er_frame_start(s);
00937     /*
00938      * MPV_frame_start uses pict_type to derive key_frame.
00939      * This is incorrect for H.264; IDR markings must be used.
00940      * Zero here; IDR markings per slice in frame or fields are ORed in later.
00941      * See decode_nal_units().
00942      */
00943     s->current_picture_ptr->key_frame= 0;
00944     s->current_picture_ptr->mmco_reset= 0;
00945 
00946     assert(s->linesize && s->uvlinesize);
00947 
00948     for(i=0; i<16; i++){
00949         h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
00950         h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
00951     }
00952     for(i=0; i<4; i++){
00953         h->block_offset[16+i]=
00954         h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
00955         h->block_offset[24+16+i]=
00956         h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
00957     }
00958 
00959     /* can't be in alloc_tables because linesize isn't known there.
00960      * FIXME: redo bipred weight to not require extra buffer? */
00961     for(i = 0; i < s->avctx->thread_count; i++)
00962         if(!h->thread_context[i]->s.obmc_scratchpad)
00963             h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
00964 
00965     /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/
00966     memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
00967 
00968 //    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
00969 
00970     // We mark the current picture as non-reference after allocating it, so
00971     // that if we break out due to an error it can be released automatically
00972     // in the next MPV_frame_start().
00973     // SVQ3 as well as most other codecs have only last/next/current and thus
00974     // get released even with set reference, besides SVQ3 and others do not
00975     // mark frames as reference later "naturally".
00976     if(s->codec_id != CODEC_ID_SVQ3)
00977         s->current_picture_ptr->reference= 0;
00978 
00979     s->current_picture_ptr->field_poc[0]=
00980     s->current_picture_ptr->field_poc[1]= INT_MAX;
00981     assert(s->current_picture_ptr->long_ref==0);
00982 
00983     return 0;
00984 }
00985 
00986 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
00987     MpegEncContext * const s = &h->s;
00988     uint8_t *top_border;
00989     int top_idx = 1;
00990 
00991     src_y  -=   linesize;
00992     src_cb -= uvlinesize;
00993     src_cr -= uvlinesize;
00994 
00995     if(!simple && FRAME_MBAFF){
00996         if(s->mb_y&1){
00997             if(!MB_MBAFF){
00998                 top_border = h->top_borders[0][s->mb_x];
00999                 AV_COPY128(top_border, src_y + 15*linesize);
01000                 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01001                     AV_COPY64(top_border+16, src_cb+7*uvlinesize);
01002                     AV_COPY64(top_border+24, src_cr+7*uvlinesize);
01003                 }
01004             }
01005         }else if(MB_MBAFF){
01006             top_idx = 0;
01007         }else
01008             return;
01009     }
01010 
01011     top_border = h->top_borders[top_idx][s->mb_x];
01012     // There are two lines saved, the line above the the top macroblock of a pair,
01013     // and the line above the bottom macroblock
01014     AV_COPY128(top_border, src_y + 16*linesize);
01015 
01016     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01017         AV_COPY64(top_border+16, src_cb+8*uvlinesize);
01018         AV_COPY64(top_border+24, src_cr+8*uvlinesize);
01019     }
01020 }
01021 
01022 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
01023     MpegEncContext * const s = &h->s;
01024     int deblock_left;
01025     int deblock_top;
01026     int top_idx = 1;
01027     uint8_t *top_border_m1;
01028     uint8_t *top_border;
01029 
01030     if(!simple && FRAME_MBAFF){
01031         if(s->mb_y&1){
01032             if(!MB_MBAFF)
01033                 return;
01034         }else{
01035             top_idx = MB_MBAFF ? 0 : 1;
01036         }
01037     }
01038 
01039     if(h->deblocking_filter == 2) {
01040         deblock_left = h->left_type[0];
01041         deblock_top  = h->top_type;
01042     } else {
01043         deblock_left = (s->mb_x > 0);
01044         deblock_top =  (s->mb_y > !!MB_FIELD);
01045     }
01046 
01047     src_y  -=   linesize + 1;
01048     src_cb -= uvlinesize + 1;
01049     src_cr -= uvlinesize + 1;
01050 
01051     top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
01052     top_border    = h->top_borders[top_idx][s->mb_x];
01053 
01054 #define XCHG(a,b,xchg)\
01055 if (xchg) AV_SWAP64(b,a);\
01056 else      AV_COPY64(b,a);
01057 
01058     if(deblock_top){
01059         if(deblock_left){
01060             XCHG(top_border_m1+8, src_y -7, 1);
01061         }
01062         XCHG(top_border+0, src_y +1, xchg);
01063         XCHG(top_border+8, src_y +9, 1);
01064         if(s->mb_x+1 < s->mb_width){
01065             XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17, 1);
01066         }
01067     }
01068 
01069     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01070         if(deblock_top){
01071             if(deblock_left){
01072                 XCHG(top_border_m1+16, src_cb -7, 1);
01073                 XCHG(top_border_m1+24, src_cr -7, 1);
01074             }
01075             XCHG(top_border+16, src_cb+1, 1);
01076             XCHG(top_border+24, src_cr+1, 1);
01077         }
01078     }
01079 }
01080 
01081 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
01082     MpegEncContext * const s = &h->s;
01083     const int mb_x= s->mb_x;
01084     const int mb_y= s->mb_y;
01085     const int mb_xy= h->mb_xy;
01086     const int mb_type= s->current_picture.mb_type[mb_xy];
01087     uint8_t  *dest_y, *dest_cb, *dest_cr;
01088     int linesize, uvlinesize /*dct_offset*/;
01089     int i;
01090     int *block_offset = &h->block_offset[0];
01091     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
01092     /* is_h264 should always be true if SVQ3 is disabled. */
01093     const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
01094     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01095     void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
01096 
01097     dest_y  = s->current_picture.data[0] + (mb_x + mb_y * s->linesize  ) * 16;
01098     dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
01099     dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
01100 
01101     s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
01102     s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
01103 
01104     h->list_counts[mb_xy]= h->list_count;
01105 
01106     if (!simple && MB_FIELD) {
01107         linesize   = h->mb_linesize   = s->linesize * 2;
01108         uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
01109         block_offset = &h->block_offset[24];
01110         if(mb_y&1){ //FIXME move out of this function?
01111             dest_y -= s->linesize*15;
01112             dest_cb-= s->uvlinesize*7;
01113             dest_cr-= s->uvlinesize*7;
01114         }
01115         if(FRAME_MBAFF) {
01116             int list;
01117             for(list=0; list<h->list_count; list++){
01118                 if(!USES_LIST(mb_type, list))
01119                     continue;
01120                 if(IS_16X16(mb_type)){
01121                     int8_t *ref = &h->ref_cache[list][scan8[0]];
01122                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
01123                 }else{
01124                     for(i=0; i<16; i+=4){
01125                         int ref = h->ref_cache[list][scan8[i]];
01126                         if(ref >= 0)
01127                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
01128                     }
01129                 }
01130             }
01131         }
01132     } else {
01133         linesize   = h->mb_linesize   = s->linesize;
01134         uvlinesize = h->mb_uvlinesize = s->uvlinesize;
01135 //        dct_offset = s->linesize * 16;
01136     }
01137 
01138     if (!simple && IS_INTRA_PCM(mb_type)) {
01139         for (i=0; i<16; i++) {
01140             memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
01141         }
01142         for (i=0; i<8; i++) {
01143             memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4,  8);
01144             memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4,  8);
01145         }
01146     } else {
01147         if(IS_INTRA(mb_type)){
01148             if(h->deblocking_filter)
01149                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
01150 
01151             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01152                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
01153                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
01154             }
01155 
01156             if(IS_INTRA4x4(mb_type)){
01157                 if(simple || !s->encoding){
01158                     if(IS_8x8DCT(mb_type)){
01159                         if(transform_bypass){
01160                             idct_dc_add =
01161                             idct_add    = s->dsp.add_pixels8;
01162                         }else{
01163                             idct_dc_add = h->h264dsp.h264_idct8_dc_add;
01164                             idct_add    = h->h264dsp.h264_idct8_add;
01165                         }
01166                         for(i=0; i<16; i+=4){
01167                             uint8_t * const ptr= dest_y + block_offset[i];
01168                             const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
01169                             if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
01170                                 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
01171                             }else{
01172                                 const int nnz = h->non_zero_count_cache[ scan8[i] ];
01173                                 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
01174                                                             (h->topright_samples_available<<i)&0x4000, linesize);
01175                                 if(nnz){
01176                                     if(nnz == 1 && h->mb[i*16])
01177                                         idct_dc_add(ptr, h->mb + i*16, linesize);
01178                                     else
01179                                         idct_add   (ptr, h->mb + i*16, linesize);
01180                                 }
01181                             }
01182                         }
01183                     }else{
01184                         if(transform_bypass){
01185                             idct_dc_add =
01186                             idct_add    = s->dsp.add_pixels4;
01187                         }else{
01188                             idct_dc_add = h->h264dsp.h264_idct_dc_add;
01189                             idct_add    = h->h264dsp.h264_idct_add;
01190                         }
01191                         for(i=0; i<16; i++){
01192                             uint8_t * const ptr= dest_y + block_offset[i];
01193                             const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
01194 
01195                             if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
01196                                 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
01197                             }else{
01198                                 uint8_t *topright;
01199                                 int nnz, tr;
01200                                 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
01201                                     const int topright_avail= (h->topright_samples_available<<i)&0x8000;
01202                                     assert(mb_y || linesize <= block_offset[i]);
01203                                     if(!topright_avail){
01204                                         tr= ptr[3 - linesize]*0x01010101;
01205                                         topright= (uint8_t*) &tr;
01206                                     }else
01207                                         topright= ptr + 4 - linesize;
01208                                 }else
01209                                     topright= NULL;
01210 
01211                                 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
01212                                 nnz = h->non_zero_count_cache[ scan8[i] ];
01213                                 if(nnz){
01214                                     if(is_h264){
01215                                         if(nnz == 1 && h->mb[i*16])
01216                                             idct_dc_add(ptr, h->mb + i*16, linesize);
01217                                         else
01218                                             idct_add   (ptr, h->mb + i*16, linesize);
01219                                     }else
01220                                         ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
01221                                 }
01222                             }
01223                         }
01224                     }
01225                 }
01226             }else{
01227                 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
01228                 if(is_h264){
01229                     if(!transform_bypass)
01230                         h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
01231                 }else
01232                     ff_svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
01233             }
01234             if(h->deblocking_filter)
01235                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
01236         }else if(is_h264){
01237             hl_motion(h, dest_y, dest_cb, dest_cr,
01238                       s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
01239                       s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
01240                       h->h264dsp.weight_h264_pixels_tab, h->h264dsp.biweight_h264_pixels_tab);
01241         }
01242 
01243 
01244         if(!IS_INTRA4x4(mb_type)){
01245             if(is_h264){
01246                 if(IS_INTRA16x16(mb_type)){
01247                     if(transform_bypass){
01248                         if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
01249                             h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
01250                         }else{
01251                             for(i=0; i<16; i++){
01252                                 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
01253                                     s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
01254                             }
01255                         }
01256                     }else{
01257                          h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
01258                     }
01259                 }else if(h->cbp&15){
01260                     if(transform_bypass){
01261                         const int di = IS_8x8DCT(mb_type) ? 4 : 1;
01262                         idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
01263                         for(i=0; i<16; i+=di){
01264                             if(h->non_zero_count_cache[ scan8[i] ]){
01265                                 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
01266                             }
01267                         }
01268                     }else{
01269                         if(IS_8x8DCT(mb_type)){
01270                             h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
01271                         }else{
01272                             h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
01273                         }
01274                     }
01275                 }
01276             }else{
01277                 for(i=0; i<16; i++){
01278                     if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
01279                         uint8_t * const ptr= dest_y + block_offset[i];
01280                         ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
01281                     }
01282                 }
01283             }
01284         }
01285 
01286         if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
01287             uint8_t *dest[2] = {dest_cb, dest_cr};
01288             if(transform_bypass){
01289                 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
01290                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
01291                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
01292                 }else{
01293                     idct_add = s->dsp.add_pixels4;
01294                     for(i=16; i<16+8; i++){
01295                         if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
01296                             idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
01297                     }
01298                 }
01299             }else{
01300                 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
01301                 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
01302                 if(is_h264){
01303                     idct_add = h->h264dsp.h264_idct_add;
01304                     idct_dc_add = h->h264dsp.h264_idct_dc_add;
01305                     for(i=16; i<16+8; i++){
01306                         if(h->non_zero_count_cache[ scan8[i] ])
01307                             idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
01308                         else if(h->mb[i*16])
01309                             idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
01310                     }
01311                 }else{
01312                     for(i=16; i<16+8; i++){
01313                         if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
01314                             uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
01315                             ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[s->qscale + 12] - 12, 2);
01316                         }
01317                     }
01318                 }
01319             }
01320         }
01321     }
01322     if(h->cbp || IS_INTRA(mb_type))
01323         s->dsp.clear_blocks(h->mb);
01324 }
01325 
01329 static void hl_decode_mb_simple(H264Context *h){
01330     hl_decode_mb_internal(h, 1);
01331 }
01332 
01336 static void av_noinline hl_decode_mb_complex(H264Context *h){
01337     hl_decode_mb_internal(h, 0);
01338 }
01339 
01340 void ff_h264_hl_decode_mb(H264Context *h){
01341     MpegEncContext * const s = &h->s;
01342     const int mb_xy= h->mb_xy;
01343     const int mb_type= s->current_picture.mb_type[mb_xy];
01344     int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
01345 
01346     if (is_complex)
01347         hl_decode_mb_complex(h);
01348     else hl_decode_mb_simple(h);
01349 }
01350 
01351 static int pred_weight_table(H264Context *h){
01352     MpegEncContext * const s = &h->s;
01353     int list, i;
01354     int luma_def, chroma_def;
01355 
01356     h->use_weight= 0;
01357     h->use_weight_chroma= 0;
01358     h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
01359     h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
01360     luma_def = 1<<h->luma_log2_weight_denom;
01361     chroma_def = 1<<h->chroma_log2_weight_denom;
01362 
01363     for(list=0; list<2; list++){
01364         h->luma_weight_flag[list]   = 0;
01365         h->chroma_weight_flag[list] = 0;
01366         for(i=0; i<h->ref_count[list]; i++){
01367             int luma_weight_flag, chroma_weight_flag;
01368 
01369             luma_weight_flag= get_bits1(&s->gb);
01370             if(luma_weight_flag){
01371                 h->luma_weight[i][list][0]= get_se_golomb(&s->gb);
01372                 h->luma_weight[i][list][1]= get_se_golomb(&s->gb);
01373                 if(   h->luma_weight[i][list][0] != luma_def
01374                    || h->luma_weight[i][list][1] != 0) {
01375                     h->use_weight= 1;
01376                     h->luma_weight_flag[list]= 1;
01377                 }
01378             }else{
01379                 h->luma_weight[i][list][0]= luma_def;
01380                 h->luma_weight[i][list][1]= 0;
01381             }
01382 
01383             if(CHROMA){
01384                 chroma_weight_flag= get_bits1(&s->gb);
01385                 if(chroma_weight_flag){
01386                     int j;
01387                     for(j=0; j<2; j++){
01388                         h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb);
01389                         h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb);
01390                         if(   h->chroma_weight[i][list][j][0] != chroma_def
01391                            || h->chroma_weight[i][list][j][1] != 0) {
01392                             h->use_weight_chroma= 1;
01393                             h->chroma_weight_flag[list]= 1;
01394                         }
01395                     }
01396                 }else{
01397                     int j;
01398                     for(j=0; j<2; j++){
01399                         h->chroma_weight[i][list][j][0]= chroma_def;
01400                         h->chroma_weight[i][list][j][1]= 0;
01401                     }
01402                 }
01403             }
01404         }
01405         if(h->slice_type_nos != FF_B_TYPE) break;
01406     }
01407     h->use_weight= h->use_weight || h->use_weight_chroma;
01408     return 0;
01409 }
01410 
01416 static void implicit_weight_table(H264Context *h, int field){
01417     MpegEncContext * const s = &h->s;
01418     int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
01419 
01420     for (i = 0; i < 2; i++) {
01421         h->luma_weight_flag[i]   = 0;
01422         h->chroma_weight_flag[i] = 0;
01423     }
01424 
01425     if(field < 0){
01426         cur_poc = s->current_picture_ptr->poc;
01427     if(   h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF
01428        && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
01429         h->use_weight= 0;
01430         h->use_weight_chroma= 0;
01431         return;
01432     }
01433         ref_start= 0;
01434         ref_count0= h->ref_count[0];
01435         ref_count1= h->ref_count[1];
01436     }else{
01437         cur_poc = s->current_picture_ptr->field_poc[field];
01438         ref_start= 16;
01439         ref_count0= 16+2*h->ref_count[0];
01440         ref_count1= 16+2*h->ref_count[1];
01441     }
01442 
01443     h->use_weight= 2;
01444     h->use_weight_chroma= 2;
01445     h->luma_log2_weight_denom= 5;
01446     h->chroma_log2_weight_denom= 5;
01447 
01448     for(ref0=ref_start; ref0 < ref_count0; ref0++){
01449         int poc0 = h->ref_list[0][ref0].poc;
01450         for(ref1=ref_start; ref1 < ref_count1; ref1++){
01451             int w = 32;
01452             if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) {
01453                 int poc1 = h->ref_list[1][ref1].poc;
01454                 int td = av_clip(poc1 - poc0, -128, 127);
01455                 if(td){
01456                     int tb = av_clip(cur_poc - poc0, -128, 127);
01457                     int tx = (16384 + (FFABS(td) >> 1)) / td;
01458                     int dist_scale_factor = (tb*tx + 32) >> 8;
01459                     if(dist_scale_factor >= -64 && dist_scale_factor <= 128)
01460                         w = 64 - dist_scale_factor;
01461                 }
01462             }
01463             if(field<0){
01464                 h->implicit_weight[ref0][ref1][0]=
01465                 h->implicit_weight[ref0][ref1][1]= w;
01466             }else{
01467                 h->implicit_weight[ref0][ref1][field]=w;
01468             }
01469         }
01470     }
01471 }
01472 
01476 static void idr(H264Context *h){
01477     ff_h264_remove_all_refs(h);
01478     h->prev_frame_num= 0;
01479     h->prev_frame_num_offset= 0;
01480     h->prev_poc_msb=
01481     h->prev_poc_lsb= 0;
01482 }
01483 
01484 /* forget old pics after a seek */
01485 static void flush_dpb(AVCodecContext *avctx){
01486     H264Context *h= avctx->priv_data;
01487     int i;
01488     for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
01489         if(h->delayed_pic[i])
01490             h->delayed_pic[i]->reference= 0;
01491         h->delayed_pic[i]= NULL;
01492     }
01493     h->outputed_poc= INT_MIN;
01494     h->prev_interlaced_frame = 1;
01495     idr(h);
01496     if(h->s.current_picture_ptr)
01497         h->s.current_picture_ptr->reference= 0;
01498     h->s.first_field= 0;
01499     ff_h264_reset_sei(h);
01500     ff_mpeg_flush(avctx);
01501 }
01502 
01503 static int init_poc(H264Context *h){
01504     MpegEncContext * const s = &h->s;
01505     const int max_frame_num= 1<<h->sps.log2_max_frame_num;
01506     int field_poc[2];
01507     Picture *cur = s->current_picture_ptr;
01508 
01509     h->frame_num_offset= h->prev_frame_num_offset;
01510     if(h->frame_num < h->prev_frame_num)
01511         h->frame_num_offset += max_frame_num;
01512 
01513     if(h->sps.poc_type==0){
01514         const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
01515 
01516         if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
01517             h->poc_msb = h->prev_poc_msb + max_poc_lsb;
01518         else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
01519             h->poc_msb = h->prev_poc_msb - max_poc_lsb;
01520         else
01521             h->poc_msb = h->prev_poc_msb;
01522 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
01523         field_poc[0] =
01524         field_poc[1] = h->poc_msb + h->poc_lsb;
01525         if(s->picture_structure == PICT_FRAME)
01526             field_poc[1] += h->delta_poc_bottom;
01527     }else if(h->sps.poc_type==1){
01528         int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
01529         int i;
01530 
01531         if(h->sps.poc_cycle_length != 0)
01532             abs_frame_num = h->frame_num_offset + h->frame_num;
01533         else
01534             abs_frame_num = 0;
01535 
01536         if(h->nal_ref_idc==0 && abs_frame_num > 0)
01537             abs_frame_num--;
01538 
01539         expected_delta_per_poc_cycle = 0;
01540         for(i=0; i < h->sps.poc_cycle_length; i++)
01541             expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
01542 
01543         if(abs_frame_num > 0){
01544             int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
01545             int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
01546 
01547             expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
01548             for(i = 0; i <= frame_num_in_poc_cycle; i++)
01549                 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
01550         } else
01551             expectedpoc = 0;
01552 
01553         if(h->nal_ref_idc == 0)
01554             expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
01555 
01556         field_poc[0] = expectedpoc + h->delta_poc[0];
01557         field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
01558 
01559         if(s->picture_structure == PICT_FRAME)
01560             field_poc[1] += h->delta_poc[1];
01561     }else{
01562         int poc= 2*(h->frame_num_offset + h->frame_num);
01563 
01564         if(!h->nal_ref_idc)
01565             poc--;
01566 
01567         field_poc[0]= poc;
01568         field_poc[1]= poc;
01569     }
01570 
01571     if(s->picture_structure != PICT_BOTTOM_FIELD)
01572         s->current_picture_ptr->field_poc[0]= field_poc[0];
01573     if(s->picture_structure != PICT_TOP_FIELD)
01574         s->current_picture_ptr->field_poc[1]= field_poc[1];
01575     cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
01576 
01577     return 0;
01578 }
01579 
01580 
01584 static void init_scan_tables(H264Context *h){
01585     int i;
01586     if(h->h264dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
01587         memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
01588         memcpy(h-> field_scan,  field_scan, 16*sizeof(uint8_t));
01589     }else{
01590         for(i=0; i<16; i++){
01591 #define T(x) (x>>2) | ((x<<2) & 0xF)
01592             h->zigzag_scan[i] = T(zigzag_scan[i]);
01593             h-> field_scan[i] = T( field_scan[i]);
01594 #undef T
01595         }
01596     }
01597     if(h->h264dsp.h264_idct8_add == ff_h264_idct8_add_c){
01598         memcpy(h->zigzag_scan8x8,       ff_zigzag_direct,     64*sizeof(uint8_t));
01599         memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
01600         memcpy(h->field_scan8x8,        field_scan8x8,        64*sizeof(uint8_t));
01601         memcpy(h->field_scan8x8_cavlc,  field_scan8x8_cavlc,  64*sizeof(uint8_t));
01602     }else{
01603         for(i=0; i<64; i++){
01604 #define T(x) (x>>3) | ((x&7)<<3)
01605             h->zigzag_scan8x8[i]       = T(ff_zigzag_direct[i]);
01606             h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
01607             h->field_scan8x8[i]        = T(field_scan8x8[i]);
01608             h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
01609 #undef T
01610         }
01611     }
01612     if(h->sps.transform_bypass){ //FIXME same ugly
01613         h->zigzag_scan_q0          = zigzag_scan;
01614         h->zigzag_scan8x8_q0       = ff_zigzag_direct;
01615         h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
01616         h->field_scan_q0           = field_scan;
01617         h->field_scan8x8_q0        = field_scan8x8;
01618         h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
01619     }else{
01620         h->zigzag_scan_q0          = h->zigzag_scan;
01621         h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
01622         h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
01623         h->field_scan_q0           = h->field_scan;
01624         h->field_scan8x8_q0        = h->field_scan8x8;
01625         h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
01626     }
01627 }
01628 
01629 static void field_end(H264Context *h){
01630     MpegEncContext * const s = &h->s;
01631     AVCodecContext * const avctx= s->avctx;
01632     s->mb_y= 0;
01633 
01634     s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
01635     s->current_picture_ptr->pict_type= s->pict_type;
01636 
01637     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
01638         ff_vdpau_h264_set_reference_frames(s);
01639 
01640     if(!s->dropable) {
01641         ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
01642         h->prev_poc_msb= h->poc_msb;
01643         h->prev_poc_lsb= h->poc_lsb;
01644     }
01645     h->prev_frame_num_offset= h->frame_num_offset;
01646     h->prev_frame_num= h->frame_num;
01647 
01648     if (avctx->hwaccel) {
01649         if (avctx->hwaccel->end_frame(avctx) < 0)
01650             av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
01651     }
01652 
01653     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
01654         ff_vdpau_h264_picture_complete(s);
01655 
01656     /*
01657      * FIXME: Error handling code does not seem to support interlaced
01658      * when slices span multiple rows
01659      * The ff_er_add_slice calls don't work right for bottom
01660      * fields; they cause massive erroneous error concealing
01661      * Error marking covers both fields (top and bottom).
01662      * This causes a mismatched s->error_count
01663      * and a bad error table. Further, the error count goes to
01664      * INT_MAX when called for bottom field, because mb_y is
01665      * past end by one (callers fault) and resync_mb_y != 0
01666      * causes problems for the first MB line, too.
01667      */
01668     if (!FIELD_PICTURE)
01669         ff_er_frame_end(s);
01670 
01671     MPV_frame_end(s);
01672 
01673     h->current_slice=0;
01674 }
01675 
01679 static void clone_slice(H264Context *dst, H264Context *src)
01680 {
01681     memcpy(dst->block_offset,     src->block_offset, sizeof(dst->block_offset));
01682     dst->s.current_picture_ptr  = src->s.current_picture_ptr;
01683     dst->s.current_picture      = src->s.current_picture;
01684     dst->s.linesize             = src->s.linesize;
01685     dst->s.uvlinesize           = src->s.uvlinesize;
01686     dst->s.first_field          = src->s.first_field;
01687 
01688     dst->prev_poc_msb           = src->prev_poc_msb;
01689     dst->prev_poc_lsb           = src->prev_poc_lsb;
01690     dst->prev_frame_num_offset  = src->prev_frame_num_offset;
01691     dst->prev_frame_num         = src->prev_frame_num;
01692     dst->short_ref_count        = src->short_ref_count;
01693 
01694     memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
01695     memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
01696     memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
01697     memcpy(dst->ref_list,         src->ref_list,         sizeof(dst->ref_list));
01698 
01699     memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
01700     memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
01701 }
01702 
01712 static int decode_slice_header(H264Context *h, H264Context *h0){
01713     MpegEncContext * const s = &h->s;
01714     MpegEncContext * const s0 = &h0->s;
01715     unsigned int first_mb_in_slice;
01716     unsigned int pps_id;
01717     int num_ref_idx_active_override_flag;
01718     unsigned int slice_type, tmp, i, j;
01719     int default_ref_list_done = 0;
01720     int last_pic_structure;
01721 
01722     s->dropable= h->nal_ref_idc == 0;
01723 
01724     if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
01725         s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
01726         s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
01727     }else{
01728         s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
01729         s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
01730     }
01731 
01732     first_mb_in_slice= get_ue_golomb(&s->gb);
01733 
01734     if(first_mb_in_slice == 0){ //FIXME better field boundary detection
01735         if(h0->current_slice && FIELD_PICTURE){
01736             field_end(h);
01737         }
01738 
01739         h0->current_slice = 0;
01740         if (!s0->first_field)
01741             s->current_picture_ptr= NULL;
01742     }
01743 
01744     slice_type= get_ue_golomb_31(&s->gb);
01745     if(slice_type > 9){
01746         av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
01747         return -1;
01748     }
01749     if(slice_type > 4){
01750         slice_type -= 5;
01751         h->slice_type_fixed=1;
01752     }else
01753         h->slice_type_fixed=0;
01754 
01755     slice_type= golomb_to_pict_type[ slice_type ];
01756     if (slice_type == FF_I_TYPE
01757         || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
01758         default_ref_list_done = 1;
01759     }
01760     h->slice_type= slice_type;
01761     h->slice_type_nos= slice_type & 3;
01762 
01763     s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
01764 
01765     pps_id= get_ue_golomb(&s->gb);
01766     if(pps_id>=MAX_PPS_COUNT){
01767         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
01768         return -1;
01769     }
01770     if(!h0->pps_buffers[pps_id]) {
01771         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
01772         return -1;
01773     }
01774     h->pps= *h0->pps_buffers[pps_id];
01775 
01776     if(!h0->sps_buffers[h->pps.sps_id]) {
01777         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
01778         return -1;
01779     }
01780     h->sps = *h0->sps_buffers[h->pps.sps_id];
01781 
01782     s->avctx->profile = h->sps.profile_idc;
01783     s->avctx->level   = h->sps.level_idc;
01784     s->avctx->refs    = h->sps.ref_frame_count;
01785 
01786     if(h == h0 && h->dequant_coeff_pps != pps_id){
01787         h->dequant_coeff_pps = pps_id;
01788         init_dequant_tables(h);
01789     }
01790 
01791     s->mb_width= h->sps.mb_width;
01792     s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
01793 
01794     h->b_stride=  s->mb_width*4;
01795 
01796     s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
01797     if(h->sps.frame_mbs_only_flag)
01798         s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
01799     else
01800         s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
01801 
01802     if (s->context_initialized
01803         && (   s->width != s->avctx->width || s->height != s->avctx->height
01804             || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
01805         if(h != h0)
01806             return -1;   // width / height changed during parallelized decoding
01807         free_tables(h);
01808         flush_dpb(s->avctx);
01809         MPV_common_end(s);
01810     }
01811     if (!s->context_initialized) {
01812         if(h != h0)
01813             return -1;  // we cant (re-)initialize context during parallel decoding
01814 
01815         avcodec_set_dimensions(s->avctx, s->width, s->height);
01816         s->avctx->sample_aspect_ratio= h->sps.sar;
01817         if(!s->avctx->sample_aspect_ratio.den)
01818             s->avctx->sample_aspect_ratio.den = 1;
01819 
01820         if(h->sps.video_signal_type_present_flag){
01821             s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
01822             if(h->sps.colour_description_present_flag){
01823                 s->avctx->color_primaries = h->sps.color_primaries;
01824                 s->avctx->color_trc       = h->sps.color_trc;
01825                 s->avctx->colorspace      = h->sps.colorspace;
01826             }
01827         }
01828 
01829         if(h->sps.timing_info_present_flag){
01830             int64_t den= h->sps.time_scale;
01831             if(h->x264_build < 44U)
01832                 den *= 2;
01833             av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
01834                       h->sps.num_units_in_tick, den, 1<<30);
01835         }
01836         s->avctx->pix_fmt = s->avctx->get_format(s->avctx, s->avctx->codec->pix_fmts);
01837         s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
01838 
01839         if (MPV_common_init(s) < 0)
01840             return -1;
01841         s->first_field = 0;
01842         h->prev_interlaced_frame = 1;
01843 
01844         init_scan_tables(h);
01845         if (ff_h264_alloc_tables(h) < 0) {
01846             av_log(h->s.avctx, AV_LOG_ERROR, "Could not allocate memory for h264\n");
01847             return AVERROR(ENOMEM);
01848         }
01849 
01850         for(i = 1; i < s->avctx->thread_count; i++) {
01851             H264Context *c;
01852             c = h->thread_context[i] = av_malloc(sizeof(H264Context));
01853             memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
01854             memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
01855             c->h264dsp = h->h264dsp;
01856             c->sps = h->sps;
01857             c->pps = h->pps;
01858             init_scan_tables(c);
01859             clone_tables(c, h, i);
01860         }
01861 
01862         for(i = 0; i < s->avctx->thread_count; i++)
01863             if(context_init(h->thread_context[i]) < 0)
01864                 return -1;
01865     }
01866 
01867     h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
01868 
01869     h->mb_mbaff = 0;
01870     h->mb_aff_frame = 0;
01871     last_pic_structure = s0->picture_structure;
01872     if(h->sps.frame_mbs_only_flag){
01873         s->picture_structure= PICT_FRAME;
01874     }else{
01875         if(get_bits1(&s->gb)) { //field_pic_flag
01876             s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
01877         } else {
01878             s->picture_structure= PICT_FRAME;
01879             h->mb_aff_frame = h->sps.mb_aff;
01880         }
01881     }
01882     h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
01883 
01884     if(h0->current_slice == 0){
01885         while(h->frame_num !=  h->prev_frame_num &&
01886               h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
01887             av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
01888             if (ff_h264_frame_start(h) < 0)
01889                 return -1;
01890             h->prev_frame_num++;
01891             h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
01892             s->current_picture_ptr->frame_num= h->prev_frame_num;
01893             ff_h264_execute_ref_pic_marking(h, NULL, 0);
01894         }
01895 
01896         /* See if we have a decoded first field looking for a pair... */
01897         if (s0->first_field) {
01898             assert(s0->current_picture_ptr);
01899             assert(s0->current_picture_ptr->data[0]);
01900             assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
01901 
01902             /* figure out if we have a complementary field pair */
01903             if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
01904                 /*
01905                  * Previous field is unmatched. Don't display it, but let it
01906                  * remain for reference if marked as such.
01907                  */
01908                 s0->current_picture_ptr = NULL;
01909                 s0->first_field = FIELD_PICTURE;
01910 
01911             } else {
01912                 if (h->nal_ref_idc &&
01913                         s0->current_picture_ptr->reference &&
01914                         s0->current_picture_ptr->frame_num != h->frame_num) {
01915                     /*
01916                      * This and previous field were reference, but had
01917                      * different frame_nums. Consider this field first in
01918                      * pair. Throw away previous field except for reference
01919                      * purposes.
01920                      */
01921                     s0->first_field = 1;
01922                     s0->current_picture_ptr = NULL;
01923 
01924                 } else {
01925                     /* Second field in complementary pair */
01926                     s0->first_field = 0;
01927                 }
01928             }
01929 
01930         } else {
01931             /* Frame or first field in a potentially complementary pair */
01932             assert(!s0->current_picture_ptr);
01933             s0->first_field = FIELD_PICTURE;
01934         }
01935 
01936         if((!FIELD_PICTURE || s0->first_field) && ff_h264_frame_start(h) < 0) {
01937             s0->first_field = 0;
01938             return -1;
01939         }
01940     }
01941     if(h != h0)
01942         clone_slice(h, h0);
01943 
01944     s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
01945 
01946     assert(s->mb_num == s->mb_width * s->mb_height);
01947     if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
01948        first_mb_in_slice                    >= s->mb_num){
01949         av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
01950         return -1;
01951     }
01952     s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
01953     s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
01954     if (s->picture_structure == PICT_BOTTOM_FIELD)
01955         s->resync_mb_y = s->mb_y = s->mb_y + 1;
01956     assert(s->mb_y < s->mb_height);
01957 
01958     if(s->picture_structure==PICT_FRAME){
01959         h->curr_pic_num=   h->frame_num;
01960         h->max_pic_num= 1<< h->sps.log2_max_frame_num;
01961     }else{
01962         h->curr_pic_num= 2*h->frame_num + 1;
01963         h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
01964     }
01965 
01966     if(h->nal_unit_type == NAL_IDR_SLICE){
01967         get_ue_golomb(&s->gb); /* idr_pic_id */
01968     }
01969 
01970     if(h->sps.poc_type==0){
01971         h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
01972 
01973         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
01974             h->delta_poc_bottom= get_se_golomb(&s->gb);
01975         }
01976     }
01977 
01978     if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
01979         h->delta_poc[0]= get_se_golomb(&s->gb);
01980 
01981         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
01982             h->delta_poc[1]= get_se_golomb(&s->gb);
01983     }
01984 
01985     init_poc(h);
01986 
01987     if(h->pps.redundant_pic_cnt_present){
01988         h->redundant_pic_count= get_ue_golomb(&s->gb);
01989     }
01990 
01991     //set defaults, might be overridden a few lines later
01992     h->ref_count[0]= h->pps.ref_count[0];
01993     h->ref_count[1]= h->pps.ref_count[1];
01994 
01995     if(h->slice_type_nos != FF_I_TYPE){
01996         if(h->slice_type_nos == FF_B_TYPE){
01997             h->direct_spatial_mv_pred= get_bits1(&s->gb);
01998         }
01999         num_ref_idx_active_override_flag= get_bits1(&s->gb);
02000 
02001         if(num_ref_idx_active_override_flag){
02002             h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
02003             if(h->slice_type_nos==FF_B_TYPE)
02004                 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
02005 
02006             if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
02007                 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
02008                 h->ref_count[0]= h->ref_count[1]= 1;
02009                 return -1;
02010             }
02011         }
02012         if(h->slice_type_nos == FF_B_TYPE)
02013             h->list_count= 2;
02014         else
02015             h->list_count= 1;
02016     }else
02017         h->list_count= 0;
02018 
02019     if(!default_ref_list_done){
02020         ff_h264_fill_default_ref_list(h);
02021     }
02022 
02023     if(h->slice_type_nos!=FF_I_TYPE && ff_h264_decode_ref_pic_list_reordering(h) < 0)
02024         return -1;
02025 
02026     if(h->slice_type_nos!=FF_I_TYPE){
02027         s->last_picture_ptr= &h->ref_list[0][0];
02028         ff_copy_picture(&s->last_picture, s->last_picture_ptr);
02029     }
02030     if(h->slice_type_nos==FF_B_TYPE){
02031         s->next_picture_ptr= &h->ref_list[1][0];
02032         ff_copy_picture(&s->next_picture, s->next_picture_ptr);
02033     }
02034 
02035     if(   (h->pps.weighted_pred          && h->slice_type_nos == FF_P_TYPE )
02036        ||  (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
02037         pred_weight_table(h);
02038     else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE){
02039         implicit_weight_table(h, -1);
02040     }else {
02041         h->use_weight = 0;
02042         for (i = 0; i < 2; i++) {
02043             h->luma_weight_flag[i]   = 0;
02044             h->chroma_weight_flag[i] = 0;
02045         }
02046     }
02047 
02048     if(h->nal_ref_idc)
02049         ff_h264_decode_ref_pic_marking(h0, &s->gb);
02050 
02051     if(FRAME_MBAFF){
02052         ff_h264_fill_mbaff_ref_list(h);
02053 
02054         if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE){
02055             implicit_weight_table(h, 0);
02056             implicit_weight_table(h, 1);
02057         }
02058     }
02059 
02060     if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
02061         ff_h264_direct_dist_scale_factor(h);
02062     ff_h264_direct_ref_list_init(h);
02063 
02064     if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
02065         tmp = get_ue_golomb_31(&s->gb);
02066         if(tmp > 2){
02067             av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
02068             return -1;
02069         }
02070         h->cabac_init_idc= tmp;
02071     }
02072 
02073     h->last_qscale_diff = 0;
02074     tmp = h->pps.init_qp + get_se_golomb(&s->gb);
02075     if(tmp>51){
02076         av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
02077         return -1;
02078     }
02079     s->qscale= tmp;
02080     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
02081     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
02082     //FIXME qscale / qp ... stuff
02083     if(h->slice_type == FF_SP_TYPE){
02084         get_bits1(&s->gb); /* sp_for_switch_flag */
02085     }
02086     if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
02087         get_se_golomb(&s->gb); /* slice_qs_delta */
02088     }
02089 
02090     h->deblocking_filter = 1;
02091     h->slice_alpha_c0_offset = 52;
02092     h->slice_beta_offset = 52;
02093     if( h->pps.deblocking_filter_parameters_present ) {
02094         tmp= get_ue_golomb_31(&s->gb);
02095         if(tmp > 2){
02096             av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
02097             return -1;
02098         }
02099         h->deblocking_filter= tmp;
02100         if(h->deblocking_filter < 2)
02101             h->deblocking_filter^= 1; // 1<->0
02102 
02103         if( h->deblocking_filter ) {
02104             h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;
02105             h->slice_beta_offset     += get_se_golomb(&s->gb) << 1;
02106             if(   h->slice_alpha_c0_offset > 104U
02107                || h->slice_beta_offset     > 104U){
02108                 av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset);
02109                 return -1;
02110             }
02111         }
02112     }
02113 
02114     if(   s->avctx->skip_loop_filter >= AVDISCARD_ALL
02115        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
02116        ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  && h->slice_type_nos == FF_B_TYPE)
02117        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
02118         h->deblocking_filter= 0;
02119 
02120     if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
02121         if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
02122             /* Cheat slightly for speed:
02123                Do not bother to deblock across slices. */
02124             h->deblocking_filter = 2;
02125         } else {
02126             h0->max_contexts = 1;
02127             if(!h0->single_decode_warning) {
02128                 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
02129                 h0->single_decode_warning = 1;
02130             }
02131             if(h != h0)
02132                 return 1; // deblocking switched inside frame
02133         }
02134     }
02135     h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
02136 
02137 #if 0 //FMO
02138     if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
02139         slice_group_change_cycle= get_bits(&s->gb, ?);
02140 #endif
02141 
02142     h0->last_slice_type = slice_type;
02143     h->slice_num = ++h0->current_slice;
02144     if(h->slice_num >= MAX_SLICES){
02145         av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
02146     }
02147 
02148     for(j=0; j<2; j++){
02149         int id_list[16];
02150         int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
02151         for(i=0; i<16; i++){
02152             id_list[i]= 60;
02153             if(h->ref_list[j][i].data[0]){
02154                 int k;
02155                 uint8_t *base= h->ref_list[j][i].base[0];
02156                 for(k=0; k<h->short_ref_count; k++)
02157                     if(h->short_ref[k]->base[0] == base){
02158                         id_list[i]= k;
02159                         break;
02160                     }
02161                 for(k=0; k<h->long_ref_count; k++)
02162                     if(h->long_ref[k] && h->long_ref[k]->base[0] == base){
02163                         id_list[i]= h->short_ref_count + k;
02164                         break;
02165                     }
02166             }
02167         }
02168 
02169         ref2frm[0]=
02170         ref2frm[1]= -1;
02171         for(i=0; i<16; i++)
02172             ref2frm[i+2]= 4*id_list[i]
02173                           +(h->ref_list[j][i].reference&3);
02174         ref2frm[18+0]=
02175         ref2frm[18+1]= -1;
02176         for(i=16; i<48; i++)
02177             ref2frm[i+4]= 4*id_list[(i-16)>>1]
02178                           +(h->ref_list[j][i].reference&3);
02179     }
02180 
02181     h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
02182     h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
02183 
02184     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
02185         av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
02186                h->slice_num,
02187                (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
02188                first_mb_in_slice,
02189                av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
02190                pps_id, h->frame_num,
02191                s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
02192                h->ref_count[0], h->ref_count[1],
02193                s->qscale,
02194                h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26,
02195                h->use_weight,
02196                h->use_weight==1 && h->use_weight_chroma ? "c" : "",
02197                h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
02198                );
02199     }
02200 
02201     return 0;
02202 }
02203 
02204 int ff_h264_get_slice_type(const H264Context *h)
02205 {
02206     switch (h->slice_type) {
02207     case FF_P_TYPE:  return 0;
02208     case FF_B_TYPE:  return 1;
02209     case FF_I_TYPE:  return 2;
02210     case FF_SP_TYPE: return 3;
02211     case FF_SI_TYPE: return 4;
02212     default:         return -1;
02213     }
02214 }
02215 
02220 static int fill_filter_caches(H264Context *h, int mb_type){
02221     MpegEncContext * const s = &h->s;
02222     const int mb_xy= h->mb_xy;
02223     int top_xy, left_xy[2];
02224     int top_type, left_type[2];
02225 
02226     top_xy     = mb_xy  - (s->mb_stride << MB_FIELD);
02227 
02228     //FIXME deblocking could skip the intra and nnz parts.
02229 
02230     /* Wow, what a mess, why didn't they simplify the interlacing & intra
02231      * stuff, I can't imagine that these complex rules are worth it. */
02232 
02233     left_xy[1] = left_xy[0] = mb_xy-1;
02234     if(FRAME_MBAFF){
02235         const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
02236         const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
02237         if(s->mb_y&1){
02238             if (left_mb_field_flag != curr_mb_field_flag) {
02239                 left_xy[0] -= s->mb_stride;
02240             }
02241         }else{
02242             if(curr_mb_field_flag){
02243                 top_xy      += s->mb_stride & (((s->current_picture.mb_type[top_xy    ]>>7)&1)-1);
02244             }
02245             if (left_mb_field_flag != curr_mb_field_flag) {
02246                 left_xy[1] += s->mb_stride;
02247             }
02248         }
02249     }
02250 
02251     h->top_mb_xy = top_xy;
02252     h->left_mb_xy[0] = left_xy[0];
02253     h->left_mb_xy[1] = left_xy[1];
02254     {
02255         //for sufficiently low qp, filtering wouldn't do anything
02256         //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
02257         int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
02258         int qp = s->current_picture.qscale_table[mb_xy];
02259         if(qp <= qp_thresh
02260            && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh)
02261            && (top_xy   < 0 || ((qp + s->current_picture.qscale_table[top_xy    ] + 1)>>1) <= qp_thresh)){
02262             if(!FRAME_MBAFF)
02263                 return 1;
02264             if(   (left_xy[0]< 0            || ((qp + s->current_picture.qscale_table[left_xy[1]             ] + 1)>>1) <= qp_thresh)
02265                && (top_xy    < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy    -s->mb_stride] + 1)>>1) <= qp_thresh))
02266                 return 1;
02267         }
02268     }
02269 
02270     top_type     = s->current_picture.mb_type[top_xy]    ;
02271     left_type[0] = s->current_picture.mb_type[left_xy[0]];
02272     left_type[1] = s->current_picture.mb_type[left_xy[1]];
02273     if(h->deblocking_filter == 2){
02274         if(h->slice_table[top_xy     ] != h->slice_num) top_type= 0;
02275         if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0;
02276     }else{
02277         if(h->slice_table[top_xy     ] == 0xFFFF) top_type= 0;
02278         if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0;
02279     }
02280     h->top_type    = top_type    ;
02281     h->left_type[0]= left_type[0];
02282     h->left_type[1]= left_type[1];
02283 
02284     if(IS_INTRA(mb_type))
02285         return 0;
02286 
02287     AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]);
02288     AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]);
02289     AV_COPY32(&h->non_zero_count_cache[0+8*5], &h->non_zero_count[mb_xy][16]);
02290     AV_COPY32(&h->non_zero_count_cache[4+8*3], &h->non_zero_count[mb_xy][20]);
02291     AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]);
02292 
02293     h->cbp= h->cbp_table[mb_xy];
02294 
02295     {
02296         int list;
02297         for(list=0; list<h->list_count; list++){
02298             int8_t *ref;
02299             int y, b_stride;
02300             int16_t (*mv_dst)[2];
02301             int16_t (*mv_src)[2];
02302 
02303             if(!USES_LIST(mb_type, list)){
02304                 fill_rectangle(  h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
02305                 AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
02306                 AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
02307                 AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
02308                 AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
02309                 continue;
02310             }
02311 
02312             ref = &s->current_picture.ref_index[list][4*mb_xy];
02313             {
02314                 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
02315                 AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
02316                 AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
02317                 ref += 2;
02318                 AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
02319                 AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
02320             }
02321 
02322             b_stride = h->b_stride;
02323             mv_dst   = &h->mv_cache[list][scan8[0]];
02324             mv_src   = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
02325             for(y=0; y<4; y++){
02326                 AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);
02327             }
02328 
02329         }
02330     }
02331 
02332 
02333 /*
02334 0 . T T. T T T T
02335 1 L . .L . . . .
02336 2 L . .L . . . .
02337 3 . T TL . . . .
02338 4 L . .L . . . .
02339 5 L . .. . . . .
02340 */
02341 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
02342     if(top_type){
02343         AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]);
02344     }
02345 
02346     if(left_type[0]){
02347         h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8];
02348         h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8];
02349         h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][7+2*8];
02350         h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][7+3*8];
02351     }
02352 
02353     // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
02354     if(!CABAC && h->pps.transform_8x8_mode){
02355         if(IS_8x8DCT(top_type)){
02356             h->non_zero_count_cache[4+8*0]=
02357             h->non_zero_count_cache[5+8*0]= h->cbp_table[top_xy] & 4;
02358             h->non_zero_count_cache[6+8*0]=
02359             h->non_zero_count_cache[7+8*0]= h->cbp_table[top_xy] & 8;
02360         }
02361         if(IS_8x8DCT(left_type[0])){
02362             h->non_zero_count_cache[3+8*1]=
02363             h->non_zero_count_cache[3+8*2]= h->cbp_table[left_xy[0]]&2; //FIXME check MBAFF
02364         }
02365         if(IS_8x8DCT(left_type[1])){
02366             h->non_zero_count_cache[3+8*3]=
02367             h->non_zero_count_cache[3+8*4]= h->cbp_table[left_xy[1]]&8; //FIXME check MBAFF
02368         }
02369 
02370         if(IS_8x8DCT(mb_type)){
02371             h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
02372             h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= h->cbp & 1;
02373 
02374             h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
02375             h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
02376 
02377             h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
02378             h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
02379 
02380             h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
02381             h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
02382         }
02383     }
02384 
02385     if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
02386         int list;
02387         for(list=0; list<h->list_count; list++){
02388             if(USES_LIST(top_type, list)){
02389                 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
02390                 const int b8_xy= 4*top_xy + 2;
02391                 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
02392                 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
02393                 h->ref_cache[list][scan8[0] + 0 - 1*8]=
02394                 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
02395                 h->ref_cache[list][scan8[0] + 2 - 1*8]=
02396                 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
02397             }else{
02398                 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
02399                 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
02400             }
02401 
02402             if(!IS_INTERLACED(mb_type^left_type[0])){
02403                 if(USES_LIST(left_type[0], list)){
02404                     const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
02405                     const int b8_xy= 4*left_xy[0] + 1;
02406                     int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
02407                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]);
02408                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]);
02409                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]);
02410                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]);
02411                     h->ref_cache[list][scan8[0] - 1 + 0 ]=
02412                     h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]];
02413                     h->ref_cache[list][scan8[0] - 1 +16 ]=
02414                     h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]];
02415                 }else{
02416                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]);
02417                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]);
02418                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]);
02419                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]);
02420                     h->ref_cache[list][scan8[0] - 1 + 0  ]=
02421                     h->ref_cache[list][scan8[0] - 1 + 8  ]=
02422                     h->ref_cache[list][scan8[0] - 1 + 16 ]=
02423                     h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
02424                 }
02425             }
02426         }
02427     }
02428 
02429     return 0;
02430 }
02431 
02432 static void loop_filter(H264Context *h){
02433     MpegEncContext * const s = &h->s;
02434     uint8_t  *dest_y, *dest_cb, *dest_cr;
02435     int linesize, uvlinesize, mb_x, mb_y;
02436     const int end_mb_y= s->mb_y + FRAME_MBAFF;
02437     const int old_slice_type= h->slice_type;
02438 
02439     if(h->deblocking_filter) {
02440         for(mb_x= 0; mb_x<s->mb_width; mb_x++){
02441             for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){
02442                 int mb_xy, mb_type;
02443                 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;
02444                 h->slice_num= h->slice_table[mb_xy];
02445                 mb_type= s->current_picture.mb_type[mb_xy];
02446                 h->list_count= h->list_counts[mb_xy];
02447 
02448                 if(FRAME_MBAFF)
02449                     h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
02450 
02451                 s->mb_x= mb_x;
02452                 s->mb_y= mb_y;
02453                 dest_y  = s->current_picture.data[0] + (mb_x + mb_y * s->linesize  ) * 16;
02454                 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
02455                 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
02456                     //FIXME simplify above
02457 
02458                 if (MB_FIELD) {
02459                     linesize   = h->mb_linesize   = s->linesize * 2;
02460                     uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
02461                     if(mb_y&1){ //FIXME move out of this function?
02462                         dest_y -= s->linesize*15;
02463                         dest_cb-= s->uvlinesize*7;
02464                         dest_cr-= s->uvlinesize*7;
02465                     }
02466                 } else {
02467                     linesize   = h->mb_linesize   = s->linesize;
02468                     uvlinesize = h->mb_uvlinesize = s->uvlinesize;
02469                 }
02470                 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
02471                 if(fill_filter_caches(h, mb_type))
02472                     continue;
02473                 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
02474                 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
02475 
02476                 if (FRAME_MBAFF) {
02477                     ff_h264_filter_mb     (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
02478                 } else {
02479                     ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
02480                 }
02481             }
02482         }
02483     }
02484     h->slice_type= old_slice_type;
02485     s->mb_x= 0;
02486     s->mb_y= end_mb_y - FRAME_MBAFF;
02487     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
02488     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
02489 }
02490 
02491 static void predict_field_decoding_flag(H264Context *h){
02492     MpegEncContext * const s = &h->s;
02493     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
02494     int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
02495                 ? s->current_picture.mb_type[mb_xy-1]
02496                 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
02497                 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
02498                 : 0;
02499     h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
02500 }
02501 
02502 static int decode_slice(struct AVCodecContext *avctx, void *arg){
02503     H264Context *h = *(void**)arg;
02504     MpegEncContext * const s = &h->s;
02505     const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
02506 
02507     s->mb_skip_run= -1;
02508 
02509     h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
02510                     (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
02511 
02512     if( h->pps.cabac ) {
02513         /* realign */
02514         align_get_bits( &s->gb );
02515 
02516         /* init cabac */
02517         ff_init_cabac_states( &h->cabac);
02518         ff_init_cabac_decoder( &h->cabac,
02519                                s->gb.buffer + get_bits_count(&s->gb)/8,
02520                                (get_bits_left(&s->gb) + 7)/8);
02521 
02522         ff_h264_init_cabac_states(h);
02523 
02524         for(;;){
02525 //START_TIMER
02526             int ret = ff_h264_decode_mb_cabac(h);
02527             int eos;
02528 //STOP_TIMER("decode_mb_cabac")
02529 
02530             if(ret>=0) ff_h264_hl_decode_mb(h);
02531 
02532             if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
02533                 s->mb_y++;
02534 
02535                 ret = ff_h264_decode_mb_cabac(h);
02536 
02537                 if(ret>=0) ff_h264_hl_decode_mb(h);
02538                 s->mb_y--;
02539             }
02540             eos = get_cabac_terminate( &h->cabac );
02541 
02542             if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){
02543                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
02544                 return 0;
02545             }
02546             if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
02547                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
02548                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
02549                 return -1;
02550             }
02551 
02552             if( ++s->mb_x >= s->mb_width ) {
02553                 s->mb_x = 0;
02554                 loop_filter(h);
02555                 ff_draw_horiz_band(s, 16*s->mb_y, 16);
02556                 ++s->mb_y;
02557                 if(FIELD_OR_MBAFF_PICTURE) {
02558                     ++s->mb_y;
02559                     if(FRAME_MBAFF && s->mb_y < s->mb_height)
02560                         predict_field_decoding_flag(h);
02561                 }
02562             }
02563 
02564             if( eos || s->mb_y >= s->mb_height ) {
02565                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
02566                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
02567                 return 0;
02568             }
02569         }
02570 
02571     } else {
02572         for(;;){
02573             int ret = ff_h264_decode_mb_cavlc(h);
02574 
02575             if(ret>=0) ff_h264_hl_decode_mb(h);
02576 
02577             if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
02578                 s->mb_y++;
02579                 ret = ff_h264_decode_mb_cavlc(h);
02580 
02581                 if(ret>=0) ff_h264_hl_decode_mb(h);
02582                 s->mb_y--;
02583             }
02584 
02585             if(ret<0){
02586                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
02587                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
02588 
02589                 return -1;
02590             }
02591 
02592             if(++s->mb_x >= s->mb_width){
02593                 s->mb_x=0;
02594                 loop_filter(h);
02595                 ff_draw_horiz_band(s, 16*s->mb_y, 16);
02596                 ++s->mb_y;
02597                 if(FIELD_OR_MBAFF_PICTURE) {
02598                     ++s->mb_y;
02599                     if(FRAME_MBAFF && s->mb_y < s->mb_height)
02600                         predict_field_decoding_flag(h);
02601                 }
02602                 if(s->mb_y >= s->mb_height){
02603                     tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
02604 
02605                     if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
02606                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
02607 
02608                         return 0;
02609                     }else{
02610                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
02611 
02612                         return -1;
02613                     }
02614                 }
02615             }
02616 
02617             if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
02618                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
02619                 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
02620                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
02621 
02622                     return 0;
02623                 }else{
02624                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
02625 
02626                     return -1;
02627                 }
02628             }
02629         }
02630     }
02631 
02632 #if 0
02633     for(;s->mb_y < s->mb_height; s->mb_y++){
02634         for(;s->mb_x < s->mb_width; s->mb_x++){
02635             int ret= decode_mb(h);
02636 
02637             ff_h264_hl_decode_mb(h);
02638 
02639             if(ret<0){
02640                 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
02641                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
02642 
02643                 return -1;
02644             }
02645 
02646             if(++s->mb_x >= s->mb_width){
02647                 s->mb_x=0;
02648                 if(++s->mb_y >= s->mb_height){
02649                     if(get_bits_count(s->gb) == s->gb.size_in_bits){
02650                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
02651 
02652                         return 0;
02653                     }else{
02654                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
02655 
02656                         return -1;
02657                     }
02658                 }
02659             }
02660 
02661             if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
02662                 if(get_bits_count(s->gb) == s->gb.size_in_bits){
02663                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
02664 
02665                     return 0;
02666                 }else{
02667                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
02668 
02669                     return -1;
02670                 }
02671             }
02672         }
02673         s->mb_x=0;
02674         ff_draw_horiz_band(s, 16*s->mb_y, 16);
02675     }
02676 #endif
02677     return -1; //not reached
02678 }
02679 
02686 static void execute_decode_slices(H264Context *h, int context_count){
02687     MpegEncContext * const s = &h->s;
02688     AVCodecContext * const avctx= s->avctx;
02689     H264Context *hx;
02690     int i;
02691 
02692     if (s->avctx->hwaccel)
02693         return;
02694     if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
02695         return;
02696     if(context_count == 1) {
02697         decode_slice(avctx, &h);
02698     } else {
02699         for(i = 1; i < context_count; i++) {
02700             hx = h->thread_context[i];
02701             hx->s.error_recognition = avctx->error_recognition;
02702             hx->s.error_count = 0;
02703         }
02704 
02705         avctx->execute(avctx, (void *)decode_slice,
02706                        h->thread_context, NULL, context_count, sizeof(void*));
02707 
02708         /* pull back stuff from slices to master context */
02709         hx = h->thread_context[context_count - 1];
02710         s->mb_x = hx->s.mb_x;
02711         s->mb_y = hx->s.mb_y;
02712         s->dropable = hx->s.dropable;
02713         s->picture_structure = hx->s.picture_structure;
02714         for(i = 1; i < context_count; i++)
02715             h->s.error_count += h->thread_context[i]->s.error_count;
02716     }
02717 }
02718 
02719 
02720 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
02721     MpegEncContext * const s = &h->s;
02722     AVCodecContext * const avctx= s->avctx;
02723     int buf_index=0;
02724     H264Context *hx; 
02725     int context_count = 0;
02726     int next_avc= h->is_avc ? 0 : buf_size;
02727 
02728     h->max_contexts = avctx->thread_count;
02729 #if 0
02730     int i;
02731     for(i=0; i<50; i++){
02732         av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
02733     }
02734 #endif
02735     if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
02736         h->current_slice = 0;
02737         if (!s->first_field)
02738             s->current_picture_ptr= NULL;
02739         ff_h264_reset_sei(h);
02740     }
02741 
02742     for(;;){
02743         int consumed;
02744         int dst_length;
02745         int bit_length;
02746         const uint8_t *ptr;
02747         int i, nalsize = 0;
02748         int err;
02749 
02750         if(buf_index >= next_avc) {
02751             if(buf_index >= buf_size) break;
02752             nalsize = 0;
02753             for(i = 0; i < h->nal_length_size; i++)
02754                 nalsize = (nalsize << 8) | buf[buf_index++];
02755             if(nalsize <= 1 || nalsize > buf_size - buf_index){
02756                 if(nalsize == 1){
02757                     buf_index++;
02758                     continue;
02759                 }else{
02760                     av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
02761                     break;
02762                 }
02763             }
02764             next_avc= buf_index + nalsize;
02765         } else {
02766             // start code prefix search
02767             for(; buf_index + 3 < next_avc; buf_index++){
02768                 // This should always succeed in the first iteration.
02769                 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
02770                     break;
02771             }
02772 
02773             if(buf_index+3 >= buf_size) break;
02774 
02775             buf_index+=3;
02776             if(buf_index >= next_avc) continue;
02777         }
02778 
02779         hx = h->thread_context[context_count];
02780 
02781         ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
02782         if (ptr==NULL || dst_length < 0){
02783             return -1;
02784         }
02785         i= buf_index + consumed;
02786         if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc &&
02787            buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0)
02788             s->workaround_bugs |= FF_BUG_TRUNCATED;
02789 
02790         if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){
02791         while(ptr[dst_length - 1] == 0 && dst_length > 0)
02792             dst_length--;
02793         }
02794         bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
02795 
02796         if(s->avctx->debug&FF_DEBUG_STARTCODE){
02797             av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
02798         }
02799 
02800         if (h->is_avc && (nalsize != consumed) && nalsize){
02801             av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
02802         }
02803 
02804         buf_index += consumed;
02805 
02806         if(  (s->hurry_up == 1 && h->nal_ref_idc  == 0) //FIXME do not discard SEI id
02807            ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc  == 0))
02808             continue;
02809 
02810       again:
02811         err = 0;
02812         switch(hx->nal_unit_type){
02813         case NAL_IDR_SLICE:
02814             if (h->nal_unit_type != NAL_IDR_SLICE) {
02815                 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
02816                 return -1;
02817             }
02818             idr(h); //FIXME ensure we don't loose some frames if there is reordering
02819         case NAL_SLICE:
02820             init_get_bits(&hx->s.gb, ptr, bit_length);
02821             hx->intra_gb_ptr=
02822             hx->inter_gb_ptr= &hx->s.gb;
02823             hx->s.data_partitioning = 0;
02824 
02825             if((err = decode_slice_header(hx, h)))
02826                break;
02827 
02828             if (h->current_slice == 1) {
02829                 if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
02830                     return -1;
02831                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
02832                     ff_vdpau_h264_picture_start(s);
02833             }
02834 
02835             s->current_picture_ptr->key_frame |=
02836                     (hx->nal_unit_type == NAL_IDR_SLICE) ||
02837                     (h->sei_recovery_frame_cnt >= 0);
02838             if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
02839                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
02840                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=FF_B_TYPE)
02841                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
02842                && avctx->skip_frame < AVDISCARD_ALL){
02843                 if(avctx->hwaccel) {
02844                     if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
02845                         return -1;
02846                 }else
02847                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
02848                     static const uint8_t start_code[] = {0x00, 0x00, 0x01};
02849                     ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
02850                     ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
02851                 }else
02852                     context_count++;
02853             }
02854             break;
02855         case NAL_DPA:
02856             init_get_bits(&hx->s.gb, ptr, bit_length);
02857             hx->intra_gb_ptr=
02858             hx->inter_gb_ptr= NULL;
02859 
02860             if ((err = decode_slice_header(hx, h)) < 0)
02861                 break;
02862 
02863             hx->s.data_partitioning = 1;
02864 
02865             break;
02866         case NAL_DPB:
02867             init_get_bits(&hx->intra_gb, ptr, bit_length);
02868             hx->intra_gb_ptr= &hx->intra_gb;
02869             break;
02870         case NAL_DPC:
02871             init_get_bits(&hx->inter_gb, ptr, bit_length);
02872             hx->inter_gb_ptr= &hx->inter_gb;
02873 
02874             if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
02875                && s->context_initialized
02876                && s->hurry_up < 5
02877                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
02878                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=FF_B_TYPE)
02879                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
02880                && avctx->skip_frame < AVDISCARD_ALL)
02881                 context_count++;
02882             break;
02883         case NAL_SEI:
02884             init_get_bits(&s->gb, ptr, bit_length);
02885             ff_h264_decode_sei(h);
02886             break;
02887         case NAL_SPS:
02888             init_get_bits(&s->gb, ptr, bit_length);
02889             ff_h264_decode_seq_parameter_set(h);
02890 
02891             if(s->flags& CODEC_FLAG_LOW_DELAY)
02892                 s->low_delay=1;
02893 
02894             if(avctx->has_b_frames < 2)
02895                 avctx->has_b_frames= !s->low_delay;
02896             break;
02897         case NAL_PPS:
02898             init_get_bits(&s->gb, ptr, bit_length);
02899 
02900             ff_h264_decode_picture_parameter_set(h, bit_length);
02901 
02902             break;
02903         case NAL_AUD:
02904         case NAL_END_SEQUENCE:
02905         case NAL_END_STREAM:
02906         case NAL_FILLER_DATA:
02907         case NAL_SPS_EXT:
02908         case NAL_AUXILIARY_SLICE:
02909             break;
02910         default:
02911             av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length);
02912         }
02913 
02914         if(context_count == h->max_contexts) {
02915             execute_decode_slices(h, context_count);
02916             context_count = 0;
02917         }
02918 
02919         if (err < 0)
02920             av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
02921         else if(err == 1) {
02922             /* Slice could not be decoded in parallel mode, copy down
02923              * NAL unit stuff to context 0 and restart. Note that
02924              * rbsp_buffer is not transferred, but since we no longer
02925              * run in parallel mode this should not be an issue. */
02926             h->nal_unit_type = hx->nal_unit_type;
02927             h->nal_ref_idc   = hx->nal_ref_idc;
02928             hx = h;
02929             goto again;
02930         }
02931     }
02932     if(context_count)
02933         execute_decode_slices(h, context_count);
02934     return buf_index;
02935 }
02936 
02940 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
02941         if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
02942         if(pos+10>buf_size) pos=buf_size; // oops ;)
02943 
02944         return pos;
02945 }
02946 
02947 static int decode_frame(AVCodecContext *avctx,
02948                              void *data, int *data_size,
02949                              AVPacket *avpkt)
02950 {
02951     const uint8_t *buf = avpkt->data;
02952     int buf_size = avpkt->size;
02953     H264Context *h = avctx->priv_data;
02954     MpegEncContext *s = &h->s;
02955     AVFrame *pict = data;
02956     int buf_index;
02957 
02958     s->flags= avctx->flags;
02959     s->flags2= avctx->flags2;
02960 
02961    /* end of stream, output what is still in the buffers */
02962     if (buf_size == 0) {
02963         Picture *out;
02964         int i, out_idx;
02965 
02966 //FIXME factorize this with the output code below
02967         out = h->delayed_pic[0];
02968         out_idx = 0;
02969         for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
02970             if(h->delayed_pic[i]->poc < out->poc){
02971                 out = h->delayed_pic[i];
02972                 out_idx = i;
02973             }
02974 
02975         for(i=out_idx; h->delayed_pic[i]; i++)
02976             h->delayed_pic[i] = h->delayed_pic[i+1];
02977 
02978         if(out){
02979             *data_size = sizeof(AVFrame);
02980             *pict= *(AVFrame*)out;
02981         }
02982 
02983         return 0;
02984     }
02985 
02986     buf_index=decode_nal_units(h, buf, buf_size);
02987     if(buf_index < 0)
02988         return -1;
02989 
02990     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
02991         if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
02992         av_log(avctx, AV_LOG_ERROR, "no frame!\n");
02993         return -1;
02994     }
02995 
02996     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
02997         Picture *out = s->current_picture_ptr;
02998         Picture *cur = s->current_picture_ptr;
02999         int i, pics, out_of_order, out_idx;
03000 
03001         field_end(h);
03002 
03003         if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
03004             /* Wait for second field. */
03005             *data_size = 0;
03006 
03007         } else {
03008             cur->interlaced_frame = 0;
03009             cur->repeat_pict = 0;
03010 
03011             /* Signal interlacing information externally. */
03012             /* Prioritize picture timing SEI information over used decoding process if it exists. */
03013 
03014             if(h->sps.pic_struct_present_flag){
03015                 switch (h->sei_pic_struct)
03016                 {
03017                 case SEI_PIC_STRUCT_FRAME:
03018                     break;
03019                 case SEI_PIC_STRUCT_TOP_FIELD:
03020                 case SEI_PIC_STRUCT_BOTTOM_FIELD:
03021                     cur->interlaced_frame = 1;
03022                     break;
03023                 case SEI_PIC_STRUCT_TOP_BOTTOM:
03024                 case SEI_PIC_STRUCT_BOTTOM_TOP:
03025                     if (FIELD_OR_MBAFF_PICTURE)
03026                         cur->interlaced_frame = 1;
03027                     else
03028                         // try to flag soft telecine progressive
03029                         cur->interlaced_frame = h->prev_interlaced_frame;
03030                     break;
03031                 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
03032                 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
03033                     // Signal the possibility of telecined film externally (pic_struct 5,6)
03034                     // From these hints, let the applications decide if they apply deinterlacing.
03035                     cur->repeat_pict = 1;
03036                     break;
03037                 case SEI_PIC_STRUCT_FRAME_DOUBLING:
03038                     // Force progressive here, as doubling interlaced frame is a bad idea.
03039                     cur->repeat_pict = 2;
03040                     break;
03041                 case SEI_PIC_STRUCT_FRAME_TRIPLING:
03042                     cur->repeat_pict = 4;
03043                     break;
03044                 }
03045 
03046                 if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
03047                     cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
03048             }else{
03049                 /* Derive interlacing flag from used decoding process. */
03050                 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
03051             }
03052             h->prev_interlaced_frame = cur->interlaced_frame;
03053 
03054             if (cur->field_poc[0] != cur->field_poc[1]){
03055                 /* Derive top_field_first from field pocs. */
03056                 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
03057             }else{
03058                 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
03059                     /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
03060                     if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
03061                       || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
03062                         cur->top_field_first = 1;
03063                     else
03064                         cur->top_field_first = 0;
03065                 }else{
03066                     /* Most likely progressive */
03067                     cur->top_field_first = 0;
03068                 }
03069             }
03070 
03071         //FIXME do something with unavailable reference frames
03072 
03073             /* Sort B-frames into display order */
03074 
03075             if(h->sps.bitstream_restriction_flag
03076                && s->avctx->has_b_frames < h->sps.num_reorder_frames){
03077                 s->avctx->has_b_frames = h->sps.num_reorder_frames;
03078                 s->low_delay = 0;
03079             }
03080 
03081             if(   s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
03082                && !h->sps.bitstream_restriction_flag){
03083                 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
03084                 s->low_delay= 0;
03085             }
03086 
03087             pics = 0;
03088             while(h->delayed_pic[pics]) pics++;
03089 
03090             assert(pics <= MAX_DELAYED_PIC_COUNT);
03091 
03092             h->delayed_pic[pics++] = cur;
03093             if(cur->reference == 0)
03094                 cur->reference = DELAYED_PIC_REF;
03095 
03096             out = h->delayed_pic[0];
03097             out_idx = 0;
03098             for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
03099                 if(h->delayed_pic[i]->poc < out->poc){
03100                     out = h->delayed_pic[i];
03101                     out_idx = i;
03102                 }
03103             if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))
03104                 h->outputed_poc= INT_MIN;
03105             out_of_order = out->poc < h->outputed_poc;
03106 
03107             if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
03108                 { }
03109             else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
03110                || (s->low_delay &&
03111                 ((h->outputed_poc != INT_MIN && out->poc > h->outputed_poc + 2)
03112                  || cur->pict_type == FF_B_TYPE)))
03113             {
03114                 s->low_delay = 0;
03115                 s->avctx->has_b_frames++;
03116             }
03117 
03118             if(out_of_order || pics > s->avctx->has_b_frames){
03119                 out->reference &= ~DELAYED_PIC_REF;
03120                 for(i=out_idx; h->delayed_pic[i]; i++)
03121                     h->delayed_pic[i] = h->delayed_pic[i+1];
03122             }
03123             if(!out_of_order && pics > s->avctx->has_b_frames){
03124                 *data_size = sizeof(AVFrame);
03125 
03126                 if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
03127                     h->outputed_poc = INT_MIN;
03128                 } else
03129                     h->outputed_poc = out->poc;
03130                 *pict= *(AVFrame*)out;
03131             }else{
03132                 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
03133             }
03134         }
03135     }
03136 
03137     assert(pict->data[0] || !*data_size);
03138     ff_print_debug_info(s, pict);
03139 //printf("out %d\n", (int)pict->data[0]);
03140 
03141     return get_consumed_bytes(s, buf_index, buf_size);
03142 }
03143 #if 0
03144 static inline void fill_mb_avail(H264Context *h){
03145     MpegEncContext * const s = &h->s;
03146     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
03147 
03148     if(s->mb_y){
03149         h->mb_avail[0]= s->mb_x                 && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
03150         h->mb_avail[1]=                            h->slice_table[mb_xy - s->mb_stride    ] == h->slice_num;
03151         h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
03152     }else{
03153         h->mb_avail[0]=
03154         h->mb_avail[1]=
03155         h->mb_avail[2]= 0;
03156     }
03157     h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
03158     h->mb_avail[4]= 1; //FIXME move out
03159     h->mb_avail[5]= 0; //FIXME move out
03160 }
03161 #endif
03162 
03163 #ifdef TEST
03164 #undef printf
03165 #undef random
03166 #define COUNT 8000
03167 #define SIZE (COUNT*40)
03168 int main(void){
03169     int i;
03170     uint8_t temp[SIZE];
03171     PutBitContext pb;
03172     GetBitContext gb;
03173 //    int int_temp[10000];
03174     DSPContext dsp;
03175     AVCodecContext avctx;
03176 
03177     dsputil_init(&dsp, &avctx);
03178 
03179     init_put_bits(&pb, temp, SIZE);
03180     printf("testing unsigned exp golomb\n");
03181     for(i=0; i<COUNT; i++){
03182         START_TIMER
03183         set_ue_golomb(&pb, i);
03184         STOP_TIMER("set_ue_golomb");
03185     }
03186     flush_put_bits(&pb);
03187 
03188     init_get_bits(&gb, temp, 8*SIZE);
03189     for(i=0; i<COUNT; i++){
03190         int j, s;
03191 
03192         s= show_bits(&gb, 24);
03193 
03194         START_TIMER
03195         j= get_ue_golomb(&gb);
03196         if(j != i){
03197             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
03198 //            return -1;
03199         }
03200         STOP_TIMER("get_ue_golomb");
03201     }
03202 
03203 
03204     init_put_bits(&pb, temp, SIZE);
03205     printf("testing signed exp golomb\n");
03206     for(i=0; i<COUNT; i++){
03207         START_TIMER
03208         set_se_golomb(&pb, i - COUNT/2);
03209         STOP_TIMER("set_se_golomb");
03210     }
03211     flush_put_bits(&pb);
03212 
03213     init_get_bits(&gb, temp, 8*SIZE);
03214     for(i=0; i<COUNT; i++){
03215         int j, s;
03216 
03217         s= show_bits(&gb, 24);
03218 
03219         START_TIMER
03220         j= get_se_golomb(&gb);
03221         if(j != i - COUNT/2){
03222             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
03223 //            return -1;
03224         }
03225         STOP_TIMER("get_se_golomb");
03226     }
03227 
03228 #if 0
03229     printf("testing 4x4 (I)DCT\n");
03230 
03231     DCTELEM block[16];
03232     uint8_t src[16], ref[16];
03233     uint64_t error= 0, max_error=0;
03234 
03235     for(i=0; i<COUNT; i++){
03236         int j;
03237 //        printf("%d %d %d\n", r1, r2, (r2-r1)*16);
03238         for(j=0; j<16; j++){
03239             ref[j]= random()%255;
03240             src[j]= random()%255;
03241         }
03242 
03243         h264_diff_dct_c(block, src, ref, 4);
03244 
03245         //normalize
03246         for(j=0; j<16; j++){
03247 //            printf("%d ", block[j]);
03248             block[j]= block[j]*4;
03249             if(j&1) block[j]= (block[j]*4 + 2)/5;
03250             if(j&4) block[j]= (block[j]*4 + 2)/5;
03251         }
03252 //        printf("\n");
03253 
03254         h->h264dsp.h264_idct_add(ref, block, 4);
03255 /*        for(j=0; j<16; j++){
03256             printf("%d ", ref[j]);
03257         }
03258         printf("\n");*/
03259 
03260         for(j=0; j<16; j++){
03261             int diff= FFABS(src[j] - ref[j]);
03262 
03263             error+= diff*diff;
03264             max_error= FFMAX(max_error, diff);
03265         }
03266     }
03267     printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
03268     printf("testing quantizer\n");
03269     for(qp=0; qp<52; qp++){
03270         for(i=0; i<16; i++)
03271             src1_block[i]= src2_block[i]= random()%255;
03272 
03273     }
03274     printf("Testing NAL layer\n");
03275 
03276     uint8_t bitstream[COUNT];
03277     uint8_t nal[COUNT*2];
03278     H264Context h;
03279     memset(&h, 0, sizeof(H264Context));
03280 
03281     for(i=0; i<COUNT; i++){
03282         int zeros= i;
03283         int nal_length;
03284         int consumed;
03285         int out_length;
03286         uint8_t *out;
03287         int j;
03288 
03289         for(j=0; j<COUNT; j++){
03290             bitstream[j]= (random() % 255) + 1;
03291         }
03292 
03293         for(j=0; j<zeros; j++){
03294             int pos= random() % COUNT;
03295             while(bitstream[pos] == 0){
03296                 pos++;
03297                 pos %= COUNT;
03298             }
03299             bitstream[pos]=0;
03300         }
03301 
03302         START_TIMER
03303 
03304         nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
03305         if(nal_length<0){
03306             printf("encoding failed\n");
03307             return -1;
03308         }
03309 
03310         out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
03311 
03312         STOP_TIMER("NAL")
03313 
03314         if(out_length != COUNT){
03315             printf("incorrect length %d %d\n", out_length, COUNT);
03316             return -1;
03317         }
03318 
03319         if(consumed != nal_length){
03320             printf("incorrect consumed length %d %d\n", nal_length, consumed);
03321             return -1;
03322         }
03323 
03324         if(memcmp(bitstream, out, COUNT)){
03325             printf("mismatch\n");
03326             return -1;
03327         }
03328     }
03329 #endif
03330 
03331     printf("Testing RBSP\n");
03332 
03333 
03334     return 0;
03335 }
03336 #endif /* TEST */
03337 
03338 
03339 av_cold void ff_h264_free_context(H264Context *h)
03340 {
03341     int i;
03342 
03343     free_tables(h); //FIXME cleanup init stuff perhaps
03344 
03345     for(i = 0; i < MAX_SPS_COUNT; i++)
03346         av_freep(h->sps_buffers + i);
03347 
03348     for(i = 0; i < MAX_PPS_COUNT; i++)
03349         av_freep(h->pps_buffers + i);
03350 }
03351 
03352 av_cold int ff_h264_decode_end(AVCodecContext *avctx)
03353 {
03354     H264Context *h = avctx->priv_data;
03355     MpegEncContext *s = &h->s;
03356 
03357     ff_h264_free_context(h);
03358 
03359     MPV_common_end(s);
03360 
03361 //    memset(h, 0, sizeof(H264Context));
03362 
03363     return 0;
03364 }
03365 
03366 
03367 AVCodec h264_decoder = {
03368     "h264",
03369     AVMEDIA_TYPE_VIDEO,
03370     CODEC_ID_H264,
03371     sizeof(H264Context),
03372     ff_h264_decode_init,
03373     NULL,
03374     ff_h264_decode_end,
03375     decode_frame,
03376     /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
03377     .flush= flush_dpb,
03378     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
03379     .pix_fmts= ff_hwaccel_pixfmt_list_420,
03380 };
03381 
03382 #if CONFIG_H264_VDPAU_DECODER
03383 AVCodec h264_vdpau_decoder = {
03384     "h264_vdpau",
03385     AVMEDIA_TYPE_VIDEO,
03386     CODEC_ID_H264,
03387     sizeof(H264Context),
03388     ff_h264_decode_init,
03389     NULL,
03390     ff_h264_decode_end,
03391     decode_frame,
03392     CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
03393     .flush= flush_dpb,
03394     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
03395     .pix_fmts = (const enum PixelFormat[]){PIX_FMT_VDPAU_H264, PIX_FMT_NONE},
03396 };
03397 #endif