00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #define RC_VARIANCE 1 // use variance or ssd for fast rc
00026
00027 #include "avcodec.h"
00028 #include "dsputil.h"
00029 #include "mpegvideo.h"
00030 #include "dnxhddata.h"
00031
00032 typedef struct {
00033 uint16_t mb;
00034 int value;
00035 } RCCMPEntry;
00036
00037 typedef struct {
00038 int ssd;
00039 int bits;
00040 } RCEntry;
00041
00042 int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
00043
00044 typedef struct DNXHDEncContext {
00045 MpegEncContext m;
00046
00047 AVFrame frame;
00048 int cid;
00049 const CIDEntry *cid_table;
00050 uint8_t *msip;
00051 uint32_t *slice_size;
00052
00053 struct DNXHDEncContext *thread[MAX_THREADS];
00054
00055 unsigned dct_y_offset;
00056 unsigned dct_uv_offset;
00057 int interlaced;
00058 int cur_field;
00059
00060 DECLARE_ALIGNED_16(DCTELEM, blocks[8][64]);
00061
00062 int (*qmatrix_c) [64];
00063 int (*qmatrix_l) [64];
00064 uint16_t (*qmatrix_l16)[2][64];
00065 uint16_t (*qmatrix_c16)[2][64];
00066
00067 unsigned frame_bits;
00068 uint8_t *src[3];
00069
00070 uint16_t *table_vlc_codes;
00071 uint8_t *table_vlc_bits;
00072 uint16_t *table_run_codes;
00073 uint8_t *table_run_bits;
00074
00076 unsigned slice_bits;
00077 unsigned qscale;
00078 unsigned lambda;
00079
00080 unsigned thread_size;
00081
00082 uint16_t *mb_bits;
00083 uint8_t *mb_qscale;
00084
00085 RCCMPEntry *mb_cmp;
00086 RCEntry (*mb_rc)[8160];
00087 } DNXHDEncContext;
00088
00089 #define LAMBDA_FRAC_BITS 10
00090
00091 static int dnxhd_init_vlc(DNXHDEncContext *ctx)
00092 {
00093 int i;
00094
00095 CHECKED_ALLOCZ(ctx->table_vlc_codes, 449*2);
00096 CHECKED_ALLOCZ(ctx->table_vlc_bits, 449);
00097 CHECKED_ALLOCZ(ctx->table_run_codes, 63*2);
00098 CHECKED_ALLOCZ(ctx->table_run_bits, 63);
00099
00100 for (i = 0; i < 257; i++) {
00101 int level = ctx->cid_table->ac_level[i] +
00102 (ctx->cid_table->ac_run_flag[i] << 7) + (ctx->cid_table->ac_index_flag[i] << 8);
00103 assert(level < 449);
00104 if (ctx->cid_table->ac_level[i] == 64 && ctx->cid_table->ac_index_flag[i])
00105 level -= 64;
00106 ctx->table_vlc_codes[level] = ctx->cid_table->ac_codes[i];
00107 ctx->table_vlc_bits [level] = ctx->cid_table->ac_bits[i];
00108 }
00109 for (i = 0; i < 62; i++) {
00110 int run = ctx->cid_table->run[i];
00111 assert(run < 63);
00112 ctx->table_run_codes[run] = ctx->cid_table->run_codes[i];
00113 ctx->table_run_bits [run] = ctx->cid_table->run_bits[i];
00114 }
00115 return 0;
00116 fail:
00117 return -1;
00118 }
00119
00120 static int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias)
00121 {
00122
00123 uint16_t weight_matrix[64] = {1,};
00124 int qscale, i;
00125
00126 CHECKED_ALLOCZ(ctx->qmatrix_l, (ctx->m.avctx->qmax+1) * 64 * sizeof(int));
00127 CHECKED_ALLOCZ(ctx->qmatrix_c, (ctx->m.avctx->qmax+1) * 64 * sizeof(int));
00128 CHECKED_ALLOCZ(ctx->qmatrix_l16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t));
00129 CHECKED_ALLOCZ(ctx->qmatrix_c16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t));
00130
00131 for (i = 1; i < 64; i++) {
00132 int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
00133 weight_matrix[j] = ctx->cid_table->luma_weight[i];
00134 }
00135 ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_l, ctx->qmatrix_l16, weight_matrix,
00136 ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
00137 for (i = 1; i < 64; i++) {
00138 int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
00139 weight_matrix[j] = ctx->cid_table->chroma_weight[i];
00140 }
00141 ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_c, ctx->qmatrix_c16, weight_matrix,
00142 ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
00143 for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
00144 for (i = 0; i < 64; i++) {
00145 ctx->qmatrix_l [qscale] [i] <<= 2; ctx->qmatrix_c [qscale] [i] <<= 2;
00146 ctx->qmatrix_l16[qscale][0][i] <<= 2; ctx->qmatrix_l16[qscale][1][i] <<= 2;
00147 ctx->qmatrix_c16[qscale][0][i] <<= 2; ctx->qmatrix_c16[qscale][1][i] <<= 2;
00148 }
00149 }
00150 return 0;
00151 fail:
00152 return -1;
00153 }
00154
00155 static int dnxhd_init_rc(DNXHDEncContext *ctx)
00156 {
00157 CHECKED_ALLOCZ(ctx->mb_rc, 8160*ctx->m.avctx->qmax*sizeof(RCEntry));
00158 if (ctx->m.avctx->mb_decision != FF_MB_DECISION_RD)
00159 CHECKED_ALLOCZ(ctx->mb_cmp, ctx->m.mb_num*sizeof(RCCMPEntry));
00160
00161 ctx->frame_bits = (ctx->cid_table->coding_unit_size - 640 - 4) * 8;
00162 ctx->qscale = 1;
00163 ctx->lambda = 2<<LAMBDA_FRAC_BITS;
00164 return 0;
00165 fail:
00166 return -1;
00167 }
00168
00169 static int dnxhd_encode_init(AVCodecContext *avctx)
00170 {
00171 DNXHDEncContext *ctx = avctx->priv_data;
00172 int i, index;
00173
00174 ctx->cid = ff_dnxhd_find_cid(avctx);
00175 if (!ctx->cid || avctx->pix_fmt != PIX_FMT_YUV422P) {
00176 av_log(avctx, AV_LOG_ERROR, "video parameters incompatible with DNxHD\n");
00177 return -1;
00178 }
00179 av_log(avctx, AV_LOG_DEBUG, "cid %d\n", ctx->cid);
00180
00181 index = ff_dnxhd_get_cid_table(ctx->cid);
00182 ctx->cid_table = &ff_dnxhd_cid_table[index];
00183
00184 ctx->m.avctx = avctx;
00185 ctx->m.mb_intra = 1;
00186 ctx->m.h263_aic = 1;
00187
00188 dsputil_init(&ctx->m.dsp, avctx);
00189 ff_dct_common_init(&ctx->m);
00190 if (!ctx->m.dct_quantize)
00191 ctx->m.dct_quantize = dct_quantize_c;
00192
00193 ctx->m.mb_height = (avctx->height + 15) / 16;
00194 ctx->m.mb_width = (avctx->width + 15) / 16;
00195
00196 if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
00197 ctx->interlaced = 1;
00198 ctx->m.mb_height /= 2;
00199 }
00200
00201 ctx->m.mb_num = ctx->m.mb_height * ctx->m.mb_width;
00202
00203 if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
00204 ctx->m.intra_quant_bias = avctx->intra_quant_bias;
00205 if (dnxhd_init_qmat(ctx, ctx->m.intra_quant_bias, 0) < 0)
00206 return -1;
00207
00208 if (dnxhd_init_vlc(ctx) < 0)
00209 return -1;
00210 if (dnxhd_init_rc(ctx) < 0)
00211 return -1;
00212
00213 CHECKED_ALLOCZ(ctx->slice_size, ctx->m.mb_height*sizeof(uint32_t));
00214 CHECKED_ALLOCZ(ctx->mb_bits, ctx->m.mb_num *sizeof(uint16_t));
00215 CHECKED_ALLOCZ(ctx->mb_qscale, ctx->m.mb_num *sizeof(uint8_t));
00216
00217 ctx->frame.key_frame = 1;
00218 ctx->frame.pict_type = FF_I_TYPE;
00219 ctx->m.avctx->coded_frame = &ctx->frame;
00220
00221 if (avctx->thread_count > MAX_THREADS || (avctx->thread_count > ctx->m.mb_height)) {
00222 av_log(avctx, AV_LOG_ERROR, "too many threads\n");
00223 return -1;
00224 }
00225
00226 ctx->thread[0] = ctx;
00227 for (i = 1; i < avctx->thread_count; i++) {
00228 ctx->thread[i] = av_malloc(sizeof(DNXHDEncContext));
00229 memcpy(ctx->thread[i], ctx, sizeof(DNXHDEncContext));
00230 }
00231
00232 for (i = 0; i < avctx->thread_count; i++) {
00233 ctx->thread[i]->m.start_mb_y = (ctx->m.mb_height*(i ) + avctx->thread_count/2) / avctx->thread_count;
00234 ctx->thread[i]->m.end_mb_y = (ctx->m.mb_height*(i+1) + avctx->thread_count/2) / avctx->thread_count;
00235 }
00236
00237 return 0;
00238 fail:
00239 return -1;
00240 }
00241
00242 static int dnxhd_write_header(AVCodecContext *avctx, uint8_t *buf)
00243 {
00244 DNXHDEncContext *ctx = avctx->priv_data;
00245 const uint8_t header_prefix[5] = { 0x00,0x00,0x02,0x80,0x01 };
00246
00247 memcpy(buf, header_prefix, 5);
00248 buf[5] = ctx->interlaced ? ctx->cur_field+2 : 0x01;
00249 buf[6] = 0x80;
00250 buf[7] = 0xa0;
00251 AV_WB16(buf + 0x18, avctx->height);
00252 AV_WB16(buf + 0x1a, avctx->width);
00253 AV_WB16(buf + 0x1d, avctx->height);
00254
00255 buf[0x21] = 0x38;
00256 buf[0x22] = 0x88 + (ctx->frame.interlaced_frame<<2);
00257 AV_WB32(buf + 0x28, ctx->cid);
00258 buf[0x2c] = ctx->interlaced ? 0 : 0x80;
00259
00260 buf[0x5f] = 0x01;
00261
00262 buf[0x167] = 0x02;
00263 AV_WB16(buf + 0x16a, ctx->m.mb_height * 4 + 4);
00264 buf[0x16d] = ctx->m.mb_height;
00265 buf[0x16f] = 0x10;
00266
00267 ctx->msip = buf + 0x170;
00268 return 0;
00269 }
00270
00271 static av_always_inline void dnxhd_encode_dc(DNXHDEncContext *ctx, int diff)
00272 {
00273 int nbits;
00274 if (diff < 0) {
00275 nbits = av_log2_16bit(-2*diff);
00276 diff--;
00277 } else {
00278 nbits = av_log2_16bit(2*diff);
00279 }
00280 put_bits(&ctx->m.pb, ctx->cid_table->dc_bits[nbits] + nbits,
00281 (ctx->cid_table->dc_codes[nbits]<<nbits) + (diff & ((1 << nbits) - 1)));
00282 }
00283
00284 static av_always_inline void dnxhd_encode_block(DNXHDEncContext *ctx, DCTELEM *block, int last_index, int n)
00285 {
00286 int last_non_zero = 0;
00287 int offset = 0;
00288 int slevel, i, j;
00289
00290 dnxhd_encode_dc(ctx, block[0] - ctx->m.last_dc[n]);
00291 ctx->m.last_dc[n] = block[0];
00292
00293 for (i = 1; i <= last_index; i++) {
00294 j = ctx->m.intra_scantable.permutated[i];
00295 slevel = block[j];
00296 if (slevel) {
00297 int run_level = i - last_non_zero - 1;
00298 int sign;
00299 MASK_ABS(sign, slevel);
00300 if (slevel > 64) {
00301 offset = (slevel-1) >> 6;
00302 slevel = 256 | (slevel & 63);
00303 }
00304 if (run_level)
00305 slevel |= 128;
00306 put_bits(&ctx->m.pb, ctx->table_vlc_bits[slevel]+1, (ctx->table_vlc_codes[slevel]<<1)|(sign&1));
00307 if (offset) {
00308 put_bits(&ctx->m.pb, 4, offset);
00309 offset = 0;
00310 }
00311 if (run_level)
00312 put_bits(&ctx->m.pb, ctx->table_run_bits[run_level], ctx->table_run_codes[run_level]);
00313 last_non_zero = i;
00314 }
00315 }
00316 put_bits(&ctx->m.pb, ctx->table_vlc_bits[0], ctx->table_vlc_codes[0]);
00317 }
00318
00319 static av_always_inline void dnxhd_unquantize_c(DNXHDEncContext *ctx, DCTELEM *block, int n, int qscale, int last_index)
00320 {
00321 const uint8_t *weight_matrix;
00322 int level;
00323 int i;
00324
00325 weight_matrix = (n&2) ? ctx->cid_table->chroma_weight : ctx->cid_table->luma_weight;
00326
00327 for (i = 1; i <= last_index; i++) {
00328 int j = ctx->m.intra_scantable.permutated[i];
00329 level = block[j];
00330 if (level) {
00331 if (level < 0) {
00332 level = (1-2*level) * qscale * weight_matrix[i];
00333 if (weight_matrix[i] != 32)
00334 level += 32;
00335 level >>= 6;
00336 level = -level;
00337 } else {
00338 level = (2*level+1) * qscale * weight_matrix[i];
00339 if (weight_matrix[i] != 32)
00340 level += 32;
00341 level >>= 6;
00342 }
00343 block[j] = level;
00344 }
00345 }
00346 }
00347
00348 static av_always_inline int dnxhd_ssd_block(DCTELEM *qblock, DCTELEM *block)
00349 {
00350 int score = 0;
00351 int i;
00352 for (i = 0; i < 64; i++)
00353 score += (block[i]-qblock[i])*(block[i]-qblock[i]);
00354 return score;
00355 }
00356
00357 static av_always_inline int dnxhd_calc_ac_bits(DNXHDEncContext *ctx, DCTELEM *block, int last_index)
00358 {
00359 int last_non_zero = 0;
00360 int bits = 0;
00361 int i, j, level;
00362 for (i = 1; i <= last_index; i++) {
00363 j = ctx->m.intra_scantable.permutated[i];
00364 level = block[j];
00365 if (level) {
00366 int run_level = i - last_non_zero - 1;
00367 level = FFABS(level);
00368 if (level > 64) {
00369 level = 256 | (level & 63);
00370 bits += 4;
00371 }
00372 level |= (!!run_level)<<7;
00373 bits += ctx->table_vlc_bits[level]+1 + ctx->table_run_bits[run_level];
00374 last_non_zero = i;
00375 }
00376 }
00377 return bits;
00378 }
00379
00380 static av_always_inline void dnxhd_get_pixels_4x8(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
00381 {
00382 int i;
00383 for (i = 0; i < 4; i++) {
00384 block[0] = pixels[0];
00385 block[1] = pixels[1];
00386 block[2] = pixels[2];
00387 block[3] = pixels[3];
00388 block[4] = pixels[4];
00389 block[5] = pixels[5];
00390 block[6] = pixels[6];
00391 block[7] = pixels[7];
00392 pixels += line_size;
00393 block += 8;
00394 }
00395 memcpy(block , block- 8, sizeof(*block)*8);
00396 memcpy(block+ 8, block-16, sizeof(*block)*8);
00397 memcpy(block+16, block-24, sizeof(*block)*8);
00398 memcpy(block+24, block-32, sizeof(*block)*8);
00399 }
00400
00401 static av_always_inline void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
00402 {
00403 const uint8_t *ptr_y = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize) + (mb_x << 4);
00404 const uint8_t *ptr_u = ctx->thread[0]->src[1] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3);
00405 const uint8_t *ptr_v = ctx->thread[0]->src[2] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3);
00406 DSPContext *dsp = &ctx->m.dsp;
00407
00408 dsp->get_pixels(ctx->blocks[0], ptr_y , ctx->m.linesize);
00409 dsp->get_pixels(ctx->blocks[1], ptr_y + 8, ctx->m.linesize);
00410 dsp->get_pixels(ctx->blocks[2], ptr_u , ctx->m.uvlinesize);
00411 dsp->get_pixels(ctx->blocks[3], ptr_v , ctx->m.uvlinesize);
00412
00413 if (mb_y+1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) {
00414 if (ctx->interlaced) {
00415 dnxhd_get_pixels_4x8(ctx->blocks[4], ptr_y + ctx->dct_y_offset , ctx->m.linesize);
00416 dnxhd_get_pixels_4x8(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize);
00417 dnxhd_get_pixels_4x8(ctx->blocks[6], ptr_u + ctx->dct_uv_offset , ctx->m.uvlinesize);
00418 dnxhd_get_pixels_4x8(ctx->blocks[7], ptr_v + ctx->dct_uv_offset , ctx->m.uvlinesize);
00419 } else
00420 memset(ctx->blocks[4], 0, 4*64*sizeof(DCTELEM));
00421 } else {
00422 dsp->get_pixels(ctx->blocks[4], ptr_y + ctx->dct_y_offset , ctx->m.linesize);
00423 dsp->get_pixels(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize);
00424 dsp->get_pixels(ctx->blocks[6], ptr_u + ctx->dct_uv_offset , ctx->m.uvlinesize);
00425 dsp->get_pixels(ctx->blocks[7], ptr_v + ctx->dct_uv_offset , ctx->m.uvlinesize);
00426 }
00427 }
00428
00429 static av_always_inline int dnxhd_switch_matrix(DNXHDEncContext *ctx, int i)
00430 {
00431 if (i&2) {
00432 ctx->m.q_intra_matrix16 = ctx->qmatrix_c16;
00433 ctx->m.q_intra_matrix = ctx->qmatrix_c;
00434 return 1 + (i&1);
00435 } else {
00436 ctx->m.q_intra_matrix16 = ctx->qmatrix_l16;
00437 ctx->m.q_intra_matrix = ctx->qmatrix_l;
00438 return 0;
00439 }
00440 }
00441
00442 static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg)
00443 {
00444 DNXHDEncContext *ctx = arg;
00445 int mb_y, mb_x;
00446 int qscale = ctx->thread[0]->qscale;
00447
00448 for (mb_y = ctx->m.start_mb_y; mb_y < ctx->m.end_mb_y; mb_y++) {
00449 ctx->m.last_dc[0] =
00450 ctx->m.last_dc[1] =
00451 ctx->m.last_dc[2] = 1024;
00452
00453 for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
00454 unsigned mb = mb_y * ctx->m.mb_width + mb_x;
00455 int ssd = 0;
00456 int ac_bits = 0;
00457 int dc_bits = 0;
00458 int i;
00459
00460 dnxhd_get_blocks(ctx, mb_x, mb_y);
00461
00462 for (i = 0; i < 8; i++) {
00463 DECLARE_ALIGNED_16(DCTELEM, block[64]);
00464 DCTELEM *src_block = ctx->blocks[i];
00465 int overflow, nbits, diff, last_index;
00466 int n = dnxhd_switch_matrix(ctx, i);
00467
00468 memcpy(block, src_block, sizeof(block));
00469 last_index = ctx->m.dct_quantize((MpegEncContext*)ctx, block, i, qscale, &overflow);
00470 ac_bits += dnxhd_calc_ac_bits(ctx, block, last_index);
00471
00472 diff = block[0] - ctx->m.last_dc[n];
00473 if (diff < 0) nbits = av_log2_16bit(-2*diff);
00474 else nbits = av_log2_16bit( 2*diff);
00475 dc_bits += ctx->cid_table->dc_bits[nbits] + nbits;
00476
00477 ctx->m.last_dc[n] = block[0];
00478
00479 if (avctx->mb_decision == FF_MB_DECISION_RD || !RC_VARIANCE) {
00480 dnxhd_unquantize_c(ctx, block, i, qscale, last_index);
00481 ctx->m.dsp.idct(block);
00482 ssd += dnxhd_ssd_block(block, src_block);
00483 }
00484 }
00485 ctx->mb_rc[qscale][mb].ssd = ssd;
00486 ctx->mb_rc[qscale][mb].bits = ac_bits+dc_bits+12+8*ctx->table_vlc_bits[0];
00487 }
00488 }
00489 return 0;
00490 }
00491
00492 static int dnxhd_encode_thread(AVCodecContext *avctx, void *arg)
00493 {
00494 DNXHDEncContext *ctx = arg;
00495 int mb_y, mb_x;
00496
00497 for (mb_y = ctx->m.start_mb_y; mb_y < ctx->m.end_mb_y; mb_y++) {
00498 ctx->m.last_dc[0] =
00499 ctx->m.last_dc[1] =
00500 ctx->m.last_dc[2] = 1024;
00501 for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
00502 unsigned mb = mb_y * ctx->m.mb_width + mb_x;
00503 int qscale = ctx->mb_qscale[mb];
00504 int i;
00505
00506 put_bits(&ctx->m.pb, 12, qscale<<1);
00507
00508 dnxhd_get_blocks(ctx, mb_x, mb_y);
00509
00510 for (i = 0; i < 8; i++) {
00511 DCTELEM *block = ctx->blocks[i];
00512 int last_index, overflow;
00513 int n = dnxhd_switch_matrix(ctx, i);
00514 last_index = ctx->m.dct_quantize((MpegEncContext*)ctx, block, i, qscale, &overflow);
00515 dnxhd_encode_block(ctx, block, last_index, n);
00516 }
00517 }
00518 if (put_bits_count(&ctx->m.pb)&31)
00519 put_bits(&ctx->m.pb, 32-(put_bits_count(&ctx->m.pb)&31), 0);
00520 }
00521 flush_put_bits(&ctx->m.pb);
00522 return 0;
00523 }
00524
00525 static void dnxhd_setup_threads_slices(DNXHDEncContext *ctx, uint8_t *buf)
00526 {
00527 int mb_y, mb_x;
00528 int i, offset = 0;
00529 for (i = 0; i < ctx->m.avctx->thread_count; i++) {
00530 int thread_size = 0;
00531 for (mb_y = ctx->thread[i]->m.start_mb_y; mb_y < ctx->thread[i]->m.end_mb_y; mb_y++) {
00532 ctx->slice_size[mb_y] = 0;
00533 for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
00534 unsigned mb = mb_y * ctx->m.mb_width + mb_x;
00535 ctx->slice_size[mb_y] += ctx->mb_bits[mb];
00536 }
00537 ctx->slice_size[mb_y] = (ctx->slice_size[mb_y]+31)&~31;
00538 ctx->slice_size[mb_y] >>= 3;
00539 thread_size += ctx->slice_size[mb_y];
00540 }
00541 init_put_bits(&ctx->thread[i]->m.pb, buf + 640 + offset, thread_size);
00542 offset += thread_size;
00543 }
00544 }
00545
00546 static int dnxhd_mb_var_thread(AVCodecContext *avctx, void *arg)
00547 {
00548 DNXHDEncContext *ctx = arg;
00549 int mb_y, mb_x;
00550 for (mb_y = ctx->m.start_mb_y; mb_y < ctx->m.end_mb_y; mb_y++) {
00551 for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
00552 unsigned mb = mb_y * ctx->m.mb_width + mb_x;
00553 uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y<<4) * ctx->m.linesize) + (mb_x<<4);
00554 int sum = ctx->m.dsp.pix_sum(pix, ctx->m.linesize);
00555 int varc = (ctx->m.dsp.pix_norm1(pix, ctx->m.linesize) - (((unsigned)(sum*sum))>>8)+128)>>8;
00556 ctx->mb_cmp[mb].value = varc;
00557 ctx->mb_cmp[mb].mb = mb;
00558 }
00559 }
00560 return 0;
00561 }
00562
00563 static int dnxhd_encode_rdo(AVCodecContext *avctx, DNXHDEncContext *ctx)
00564 {
00565 int lambda, up_step, down_step;
00566 int last_lower = INT_MAX, last_higher = 0;
00567 int x, y, q;
00568
00569 for (q = 1; q < avctx->qmax; q++) {
00570 ctx->qscale = q;
00571 avctx->execute(avctx, dnxhd_calc_bits_thread, (void**)&ctx->thread[0], NULL, avctx->thread_count);
00572 }
00573 up_step = down_step = 2<<LAMBDA_FRAC_BITS;
00574 lambda = ctx->lambda;
00575
00576 for (;;) {
00577 int bits = 0;
00578 int end = 0;
00579 if (lambda == last_higher) {
00580 lambda++;
00581 end = 1;
00582 }
00583 for (y = 0; y < ctx->m.mb_height; y++) {
00584 for (x = 0; x < ctx->m.mb_width; x++) {
00585 unsigned min = UINT_MAX;
00586 int qscale = 1;
00587 int mb = y*ctx->m.mb_width+x;
00588 for (q = 1; q < avctx->qmax; q++) {
00589 unsigned score = ctx->mb_rc[q][mb].bits*lambda+(ctx->mb_rc[q][mb].ssd<<LAMBDA_FRAC_BITS);
00590 if (score < min) {
00591 min = score;
00592 qscale = q;
00593 }
00594 }
00595 bits += ctx->mb_rc[qscale][mb].bits;
00596 ctx->mb_qscale[mb] = qscale;
00597 ctx->mb_bits[mb] = ctx->mb_rc[qscale][mb].bits;
00598 }
00599 bits = (bits+31)&~31;
00600 if (bits > ctx->frame_bits)
00601 break;
00602 }
00603
00604
00605 if (end) {
00606 if (bits > ctx->frame_bits)
00607 return -1;
00608 break;
00609 }
00610 if (bits < ctx->frame_bits) {
00611 last_lower = FFMIN(lambda, last_lower);
00612 if (last_higher != 0)
00613 lambda = (lambda+last_higher)>>1;
00614 else
00615 lambda -= down_step;
00616 down_step *= 5;
00617 up_step = 1<<LAMBDA_FRAC_BITS;
00618 lambda = FFMAX(1, lambda);
00619 if (lambda == last_lower)
00620 break;
00621 } else {
00622 last_higher = FFMAX(lambda, last_higher);
00623 if (last_lower != INT_MAX)
00624 lambda = (lambda+last_lower)>>1;
00625 else
00626 lambda += up_step;
00627 up_step *= 5;
00628 down_step = 1<<LAMBDA_FRAC_BITS;
00629 }
00630 }
00631
00632 ctx->lambda = lambda;
00633 return 0;
00634 }
00635
00636 static int dnxhd_find_qscale(DNXHDEncContext *ctx)
00637 {
00638 int bits = 0;
00639 int up_step = 1;
00640 int down_step = 1;
00641 int last_higher = 0;
00642 int last_lower = INT_MAX;
00643 int qscale;
00644 int x, y;
00645
00646 qscale = ctx->qscale;
00647 for (;;) {
00648 bits = 0;
00649 ctx->qscale = qscale;
00650
00651 ctx->m.avctx->execute(ctx->m.avctx, dnxhd_calc_bits_thread, (void**)&ctx->thread[0], NULL, ctx->m.avctx->thread_count);
00652 for (y = 0; y < ctx->m.mb_height; y++) {
00653 for (x = 0; x < ctx->m.mb_width; x++)
00654 bits += ctx->mb_rc[qscale][y*ctx->m.mb_width+x].bits;
00655 bits = (bits+31)&~31;
00656 if (bits > ctx->frame_bits)
00657 break;
00658 }
00659
00660
00661 if (bits < ctx->frame_bits) {
00662 if (qscale == 1)
00663 return 1;
00664 if (last_higher == qscale - 1) {
00665 qscale = last_higher;
00666 break;
00667 }
00668 last_lower = FFMIN(qscale, last_lower);
00669 if (last_higher != 0)
00670 qscale = (qscale+last_higher)>>1;
00671 else
00672 qscale -= down_step++;
00673 if (qscale < 1)
00674 qscale = 1;
00675 up_step = 1;
00676 } else {
00677 if (last_lower == qscale + 1)
00678 break;
00679 last_higher = FFMAX(qscale, last_higher);
00680 if (last_lower != INT_MAX)
00681 qscale = (qscale+last_lower)>>1;
00682 else
00683 qscale += up_step++;
00684 down_step = 1;
00685 if (qscale >= ctx->m.avctx->qmax)
00686 return -1;
00687 }
00688 }
00689
00690 ctx->qscale = qscale;
00691 return 0;
00692 }
00693
00694 static int dnxhd_rc_cmp(const void *a, const void *b)
00695 {
00696 return ((const RCCMPEntry *)b)->value - ((const RCCMPEntry *)a)->value;
00697 }
00698
00699 static int dnxhd_encode_fast(AVCodecContext *avctx, DNXHDEncContext *ctx)
00700 {
00701 int max_bits = 0;
00702 int ret, x, y;
00703 if ((ret = dnxhd_find_qscale(ctx)) < 0)
00704 return -1;
00705 for (y = 0; y < ctx->m.mb_height; y++) {
00706 for (x = 0; x < ctx->m.mb_width; x++) {
00707 int mb = y*ctx->m.mb_width+x;
00708 int delta_bits;
00709 ctx->mb_qscale[mb] = ctx->qscale;
00710 ctx->mb_bits[mb] = ctx->mb_rc[ctx->qscale][mb].bits;
00711 max_bits += ctx->mb_rc[ctx->qscale][mb].bits;
00712 if (!RC_VARIANCE) {
00713 delta_bits = ctx->mb_rc[ctx->qscale][mb].bits-ctx->mb_rc[ctx->qscale+1][mb].bits;
00714 ctx->mb_cmp[mb].mb = mb;
00715 ctx->mb_cmp[mb].value = delta_bits ?
00716 ((ctx->mb_rc[ctx->qscale][mb].ssd-ctx->mb_rc[ctx->qscale+1][mb].ssd)*100)/delta_bits
00717 : INT_MIN;
00718 }
00719 }
00720 max_bits += 31;
00721 }
00722 if (!ret) {
00723 if (RC_VARIANCE)
00724 avctx->execute(avctx, dnxhd_mb_var_thread, (void**)&ctx->thread[0], NULL, avctx->thread_count);
00725 qsort(ctx->mb_cmp, ctx->m.mb_num, sizeof(RCEntry), dnxhd_rc_cmp);
00726 for (x = 0; x < ctx->m.mb_num && max_bits > ctx->frame_bits; x++) {
00727 int mb = ctx->mb_cmp[x].mb;
00728 max_bits -= ctx->mb_rc[ctx->qscale][mb].bits - ctx->mb_rc[ctx->qscale+1][mb].bits;
00729 ctx->mb_qscale[mb] = ctx->qscale+1;
00730 ctx->mb_bits[mb] = ctx->mb_rc[ctx->qscale+1][mb].bits;
00731 }
00732 }
00733 return 0;
00734 }
00735
00736 static void dnxhd_load_picture(DNXHDEncContext *ctx, const AVFrame *frame)
00737 {
00738 int i;
00739
00740 for (i = 0; i < 3; i++) {
00741 ctx->frame.data[i] = frame->data[i];
00742 ctx->frame.linesize[i] = frame->linesize[i];
00743 }
00744
00745 for (i = 0; i < ctx->m.avctx->thread_count; i++) {
00746 ctx->thread[i]->m.linesize = ctx->frame.linesize[0]<<ctx->interlaced;
00747 ctx->thread[i]->m.uvlinesize = ctx->frame.linesize[1]<<ctx->interlaced;
00748 ctx->thread[i]->dct_y_offset = ctx->m.linesize *8;
00749 ctx->thread[i]->dct_uv_offset = ctx->m.uvlinesize*8;
00750 }
00751
00752 ctx->frame.interlaced_frame = frame->interlaced_frame;
00753 ctx->cur_field = frame->interlaced_frame && !frame->top_field_first;
00754 }
00755
00756 static int dnxhd_encode_picture(AVCodecContext *avctx, unsigned char *buf, int buf_size, const void *data)
00757 {
00758 DNXHDEncContext *ctx = avctx->priv_data;
00759 int first_field = 1;
00760 int offset, i, ret;
00761
00762 if (buf_size < ctx->cid_table->frame_size) {
00763 av_log(avctx, AV_LOG_ERROR, "output buffer is too small to compress picture\n");
00764 return -1;
00765 }
00766
00767 dnxhd_load_picture(ctx, data);
00768
00769 encode_coding_unit:
00770 for (i = 0; i < 3; i++) {
00771 ctx->src[i] = ctx->frame.data[i];
00772 if (ctx->interlaced && ctx->cur_field)
00773 ctx->src[i] += ctx->frame.linesize[i];
00774 }
00775
00776 dnxhd_write_header(avctx, buf);
00777
00778 if (avctx->mb_decision == FF_MB_DECISION_RD)
00779 ret = dnxhd_encode_rdo(avctx, ctx);
00780 else
00781 ret = dnxhd_encode_fast(avctx, ctx);
00782 if (ret < 0) {
00783 av_log(avctx, AV_LOG_ERROR, "picture could not fit ratecontrol constraints\n");
00784 return -1;
00785 }
00786
00787 dnxhd_setup_threads_slices(ctx, buf);
00788
00789 offset = 0;
00790 for (i = 0; i < ctx->m.mb_height; i++) {
00791 AV_WB32(ctx->msip + i * 4, offset);
00792 offset += ctx->slice_size[i];
00793 assert(!(ctx->slice_size[i] & 3));
00794 }
00795
00796 avctx->execute(avctx, dnxhd_encode_thread, (void**)&ctx->thread[0], NULL, avctx->thread_count);
00797
00798 AV_WB32(buf + ctx->cid_table->coding_unit_size - 4, 0x600DC0DE);
00799
00800 if (ctx->interlaced && first_field) {
00801 first_field = 0;
00802 ctx->cur_field ^= 1;
00803 buf += ctx->cid_table->coding_unit_size;
00804 buf_size -= ctx->cid_table->coding_unit_size;
00805 goto encode_coding_unit;
00806 }
00807
00808 return ctx->cid_table->frame_size;
00809 }
00810
00811 static int dnxhd_encode_end(AVCodecContext *avctx)
00812 {
00813 DNXHDEncContext *ctx = avctx->priv_data;
00814 int i;
00815
00816 av_freep(&ctx->table_vlc_codes);
00817 av_freep(&ctx->table_vlc_bits);
00818 av_freep(&ctx->table_run_codes);
00819 av_freep(&ctx->table_run_bits);
00820
00821 av_freep(&ctx->mb_bits);
00822 av_freep(&ctx->mb_qscale);
00823 av_freep(&ctx->mb_rc);
00824 av_freep(&ctx->mb_cmp);
00825 av_freep(&ctx->slice_size);
00826
00827 av_freep(&ctx->qmatrix_c);
00828 av_freep(&ctx->qmatrix_l);
00829 av_freep(&ctx->qmatrix_c16);
00830 av_freep(&ctx->qmatrix_l16);
00831
00832 for (i = 1; i < avctx->thread_count; i++)
00833 av_freep(&ctx->thread[i]);
00834
00835 return 0;
00836 }
00837
00838 AVCodec dnxhd_encoder = {
00839 "dnxhd",
00840 CODEC_TYPE_VIDEO,
00841 CODEC_ID_DNXHD,
00842 sizeof(DNXHDEncContext),
00843 dnxhd_encode_init,
00844 dnxhd_encode_picture,
00845 dnxhd_encode_end,
00846 .pix_fmts = (enum PixelFormat[]){PIX_FMT_YUV422P, -1},
00847 };