| /* |
| * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder |
| * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> |
| * |
| * This file is part of FFmpeg. |
| * |
| * FFmpeg is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; either |
| * version 2.1 of the License, or (at your option) any later version. |
| * |
| * FFmpeg is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with FFmpeg; if not, write to the Free Software |
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| */ |
| |
| /** |
| * @file libavcodec/h264.c |
| * H.264 / AVC / MPEG4 part10 codec. |
| * @author Michael Niedermayer <michaelni@gmx.at> |
| */ |
| |
| #include "internal.h" |
| #include "dsputil.h" |
| #include "avcodec.h" |
| #include "mpegvideo.h" |
| #include "h264.h" |
| #include "h264data.h" |
| #include "h264_parser.h" |
| #include "golomb.h" |
| #include "mathops.h" |
| #include "rectangle.h" |
| #include "vdpau_internal.h" |
| |
| #include "cabac.h" |
| #if ARCH_X86 |
| #include "x86/h264_i386.h" |
| #endif |
| |
| //#undef NDEBUG |
| #include <assert.h> |
| |
| /** |
| * Value of Picture.reference when Picture is not a reference picture, but |
| * is held for delayed output. |
| */ |
| #define DELAYED_PIC_REF 4 |
| |
| static VLC coeff_token_vlc[4]; |
| static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2]; |
| static const int coeff_token_vlc_tables_size[4]={520,332,280,256}; |
| |
| static VLC chroma_dc_coeff_token_vlc; |
| static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2]; |
| static const int chroma_dc_coeff_token_vlc_table_size = 256; |
| |
| static VLC total_zeros_vlc[15]; |
| static VLC_TYPE total_zeros_vlc_tables[15][512][2]; |
| static const int total_zeros_vlc_tables_size = 512; |
| |
| static VLC chroma_dc_total_zeros_vlc[3]; |
| static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2]; |
| static const int chroma_dc_total_zeros_vlc_tables_size = 8; |
| |
| static VLC run_vlc[6]; |
| static VLC_TYPE run_vlc_tables[6][8][2]; |
| static const int run_vlc_tables_size = 8; |
| |
| static VLC run7_vlc; |
| static VLC_TYPE run7_vlc_table[96][2]; |
| static const int run7_vlc_table_size = 96; |
| |
| static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp); |
| static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc); |
| static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); |
| static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); |
| static Picture * remove_long(H264Context *h, int i, int ref_mask); |
| |
| static av_always_inline uint32_t pack16to32(int a, int b){ |
| #ifdef WORDS_BIGENDIAN |
| return (b&0xFFFF) + (a<<16); |
| #else |
| return (a&0xFFFF) + (b<<16); |
| #endif |
| } |
| |
| static const uint8_t rem6[52]={ |
| 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, |
| }; |
| |
| static const uint8_t div6[52]={ |
| 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, |
| }; |
| |
| static const uint8_t left_block_options[4][8]={ |
| {0,1,2,3,7,10,8,11}, |
| {2,2,3,3,8,11,8,11}, |
| {0,0,1,1,7,10,7,10}, |
| {0,2,0,2,7,10,7,10} |
| }; |
| |
| #define LEVEL_TAB_BITS 8 |
| static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2]; |
| |
| static void fill_caches(H264Context *h, int mb_type, int for_deblock){ |
| MpegEncContext * const s = &h->s; |
| const int mb_xy= h->mb_xy; |
| int topleft_xy, top_xy, topright_xy, left_xy[2]; |
| int topleft_type, top_type, topright_type, left_type[2]; |
| const uint8_t * left_block; |
| int topleft_partition= -1; |
| int i; |
| |
| top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE); |
| |
| //FIXME deblocking could skip the intra and nnz parts. |
| if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF) |
| return; |
| |
| /* Wow, what a mess, why didn't they simplify the interlacing & intra |
| * stuff, I can't imagine that these complex rules are worth it. */ |
| |
| topleft_xy = top_xy - 1; |
| topright_xy= top_xy + 1; |
| left_xy[1] = left_xy[0] = mb_xy-1; |
| left_block = left_block_options[0]; |
| if(FRAME_MBAFF){ |
| const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride; |
| const int top_pair_xy = pair_xy - s->mb_stride; |
| const int topleft_pair_xy = top_pair_xy - 1; |
| const int topright_pair_xy = top_pair_xy + 1; |
| const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]); |
| const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]); |
| const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]); |
| const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]); |
| const int curr_mb_field_flag = IS_INTERLACED(mb_type); |
| const int bottom = (s->mb_y & 1); |
| tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag); |
| |
| if (curr_mb_field_flag && (bottom || top_mb_field_flag)){ |
| top_xy -= s->mb_stride; |
| } |
| if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){ |
| topleft_xy -= s->mb_stride; |
| } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) { |
| topleft_xy += s->mb_stride; |
| // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition |
| topleft_partition = 0; |
| } |
| if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){ |
| topright_xy -= s->mb_stride; |
| } |
| if (left_mb_field_flag != curr_mb_field_flag) { |
| left_xy[1] = left_xy[0] = pair_xy - 1; |
| if (curr_mb_field_flag) { |
| left_xy[1] += s->mb_stride; |
| left_block = left_block_options[3]; |
| } else { |
| left_block= left_block_options[2 - bottom]; |
| } |
| } |
| } |
| |
| h->top_mb_xy = top_xy; |
| h->left_mb_xy[0] = left_xy[0]; |
| h->left_mb_xy[1] = left_xy[1]; |
| if(for_deblock){ |
| topleft_type = 0; |
| topright_type = 0; |
| top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0; |
| left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0; |
| left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0; |
| |
| if(MB_MBAFF && !IS_INTRA(mb_type)){ |
| int list; |
| for(list=0; list<h->list_count; list++){ |
| //These values where changed for ease of performing MC, we need to change them back |
| //FIXME maybe we can make MC and loop filter use the same values or prevent |
| //the MC code from changing ref_cache and rather use a temporary array. |
| if(USES_LIST(mb_type,list)){ |
| int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]]; |
| *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = |
| *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101; |
| ref += h->b8_stride; |
| *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = |
| *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101; |
| } |
| } |
| } |
| }else{ |
| topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0; |
| top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0; |
| topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0; |
| left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0; |
| left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0; |
| |
| if(IS_INTRA(mb_type)){ |
| int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1; |
| h->topleft_samples_available= |
| h->top_samples_available= |
| h->left_samples_available= 0xFFFF; |
| h->topright_samples_available= 0xEEEA; |
| |
| if(!(top_type & type_mask)){ |
| h->topleft_samples_available= 0xB3FF; |
| h->top_samples_available= 0x33FF; |
| h->topright_samples_available= 0x26EA; |
| } |
| if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){ |
| if(IS_INTERLACED(mb_type)){ |
| if(!(left_type[0] & type_mask)){ |
| h->topleft_samples_available&= 0xDFFF; |
| h->left_samples_available&= 0x5FFF; |
| } |
| if(!(left_type[1] & type_mask)){ |
| h->topleft_samples_available&= 0xFF5F; |
| h->left_samples_available&= 0xFF5F; |
| } |
| }else{ |
| int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num |
| ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0; |
| assert(left_xy[0] == left_xy[1]); |
| if(!((left_typei & type_mask) && (left_type[0] & type_mask))){ |
| h->topleft_samples_available&= 0xDF5F; |
| h->left_samples_available&= 0x5F5F; |
| } |
| } |
| }else{ |
| if(!(left_type[0] & type_mask)){ |
| h->topleft_samples_available&= 0xDF5F; |
| h->left_samples_available&= 0x5F5F; |
| } |
| } |
| |
| if(!(topleft_type & type_mask)) |
| h->topleft_samples_available&= 0x7FFF; |
| |
| if(!(topright_type & type_mask)) |
| h->topright_samples_available&= 0xFBFF; |
| |
| if(IS_INTRA4x4(mb_type)){ |
| if(IS_INTRA4x4(top_type)){ |
| h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4]; |
| h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5]; |
| h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6]; |
| h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3]; |
| }else{ |
| int pred; |
| if(!(top_type & type_mask)) |
| pred= -1; |
| else{ |
| pred= 2; |
| } |
| h->intra4x4_pred_mode_cache[4+8*0]= |
| h->intra4x4_pred_mode_cache[5+8*0]= |
| h->intra4x4_pred_mode_cache[6+8*0]= |
| h->intra4x4_pred_mode_cache[7+8*0]= pred; |
| } |
| for(i=0; i<2; i++){ |
| if(IS_INTRA4x4(left_type[i])){ |
| h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]]; |
| h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]]; |
| }else{ |
| int pred; |
| if(!(left_type[i] & type_mask)) |
| pred= -1; |
| else{ |
| pred= 2; |
| } |
| h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= |
| h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred; |
| } |
| } |
| } |
| } |
| } |
| |
| |
| /* |
| 0 . T T. T T T T |
| 1 L . .L . . . . |
| 2 L . .L . . . . |
| 3 . T TL . . . . |
| 4 L . .L . . . . |
| 5 L . .. . . . . |
| */ |
| //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) |
| if(top_type){ |
| h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4]; |
| h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5]; |
| h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6]; |
| h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3]; |
| |
| h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9]; |
| h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8]; |
| |
| h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12]; |
| h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11]; |
| |
| }else{ |
| h->non_zero_count_cache[4+8*0]= |
| h->non_zero_count_cache[5+8*0]= |
| h->non_zero_count_cache[6+8*0]= |
| h->non_zero_count_cache[7+8*0]= |
| |
| h->non_zero_count_cache[1+8*0]= |
| h->non_zero_count_cache[2+8*0]= |
| |
| h->non_zero_count_cache[1+8*3]= |
| h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64; |
| |
| } |
| |
| for (i=0; i<2; i++) { |
| if(left_type[i]){ |
| h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]]; |
| h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]]; |
| h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]]; |
| h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]]; |
| }else{ |
| h->non_zero_count_cache[3+8*1 + 2*8*i]= |
| h->non_zero_count_cache[3+8*2 + 2*8*i]= |
| h->non_zero_count_cache[0+8*1 + 8*i]= |
| h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64; |
| } |
| } |
| |
| if( h->pps.cabac ) { |
| // top_cbp |
| if(top_type) { |
| h->top_cbp = h->cbp_table[top_xy]; |
| } else if(IS_INTRA(mb_type)) { |
| h->top_cbp = 0x1C0; |
| } else { |
| h->top_cbp = 0; |
| } |
| // left_cbp |
| if (left_type[0]) { |
| h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0; |
| } else if(IS_INTRA(mb_type)) { |
| h->left_cbp = 0x1C0; |
| } else { |
| h->left_cbp = 0; |
| } |
| if (left_type[0]) { |
| h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1; |
| } |
| if (left_type[1]) { |
| h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3; |
| } |
| } |
| |
| #if 1 |
| if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ |
| int list; |
| for(list=0; list<h->list_count; list++){ |
| if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){ |
| /*if(!h->mv_cache_clean[list]){ |
| memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all? |
| memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t)); |
| h->mv_cache_clean[list]= 1; |
| }*/ |
| continue; |
| } |
| h->mv_cache_clean[list]= 0; |
| |
| if(USES_LIST(top_type, list)){ |
| const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; |
| const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; |
| *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0]; |
| *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1]; |
| *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2]; |
| *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3]; |
| h->ref_cache[list][scan8[0] + 0 - 1*8]= |
| h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0]; |
| h->ref_cache[list][scan8[0] + 2 - 1*8]= |
| h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1]; |
| }else{ |
| *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]= |
| *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]= |
| *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]= |
| *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0; |
| *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101; |
| } |
| |
| for(i=0; i<2; i++){ |
| int cache_idx = scan8[0] - 1 + i*2*8; |
| if(USES_LIST(left_type[i], list)){ |
| const int b_xy= h->mb2b_xy[left_xy[i]] + 3; |
| const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1; |
| *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]; |
| *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]; |
| h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)]; |
| h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)]; |
| }else{ |
| *(uint32_t*)h->mv_cache [list][cache_idx ]= |
| *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0; |
| h->ref_cache[list][cache_idx ]= |
| h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE; |
| } |
| } |
| |
| if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF)) |
| continue; |
| |
| if(USES_LIST(topleft_type, list)){ |
| const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride); |
| const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride); |
| *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; |
| h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy]; |
| }else{ |
| *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0; |
| h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; |
| } |
| |
| if(USES_LIST(topright_type, list)){ |
| const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride; |
| const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride; |
| *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; |
| h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy]; |
| }else{ |
| *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0; |
| h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; |
| } |
| |
| if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF) |
| continue; |
| |
| h->ref_cache[list][scan8[5 ]+1] = |
| h->ref_cache[list][scan8[7 ]+1] = |
| h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else) |
| h->ref_cache[list][scan8[4 ]] = |
| h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE; |
| *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]= |
| *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]= |
| *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else) |
| *(uint32_t*)h->mv_cache [list][scan8[4 ]]= |
| *(uint32_t*)h->mv_cache [list][scan8[12]]= 0; |
| |
| if( h->pps.cabac ) { |
| /* XXX beurk, Load mvd */ |
| if(USES_LIST(top_type, list)){ |
| const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; |
| *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0]; |
| *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1]; |
| *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2]; |
| *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3]; |
| }else{ |
| *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]= |
| *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]= |
| *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]= |
| *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0; |
| } |
| if(USES_LIST(left_type[0], list)){ |
| const int b_xy= h->mb2b_xy[left_xy[0]] + 3; |
| *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]]; |
| *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]]; |
| }else{ |
| *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]= |
| *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0; |
| } |
| if(USES_LIST(left_type[1], list)){ |
| const int b_xy= h->mb2b_xy[left_xy[1]] + 3; |
| *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]]; |
| *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]]; |
| }else{ |
| *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]= |
| *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0; |
| } |
| *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]= |
| *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]= |
| *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else) |
| *(uint32_t*)h->mvd_cache [list][scan8[4 ]]= |
| *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0; |
| |
| if(h->slice_type_nos == FF_B_TYPE){ |
| fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1); |
| |
| if(IS_DIRECT(top_type)){ |
| *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101; |
| }else if(IS_8X8(top_type)){ |
| int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride; |
| h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy]; |
| h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1]; |
| }else{ |
| *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0; |
| } |
| |
| if(IS_DIRECT(left_type[0])) |
| h->direct_cache[scan8[0] - 1 + 0*8]= 1; |
| else if(IS_8X8(left_type[0])) |
| h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)]; |
| else |
| h->direct_cache[scan8[0] - 1 + 0*8]= 0; |
| |
| if(IS_DIRECT(left_type[1])) |
| h->direct_cache[scan8[0] - 1 + 2*8]= 1; |
| else if(IS_8X8(left_type[1])) |
| h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)]; |
| else |
| h->direct_cache[scan8[0] - 1 + 2*8]= 0; |
| } |
| } |
| |
| if(FRAME_MBAFF){ |
| #define MAP_MVS\ |
| MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\ |
| MAP_F2F(scan8[0] + 0 - 1*8, top_type)\ |
| MAP_F2F(scan8[0] + 1 - 1*8, top_type)\ |
| MAP_F2F(scan8[0] + 2 - 1*8, top_type)\ |
| MAP_F2F(scan8[0] + 3 - 1*8, top_type)\ |
| MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\ |
| MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\ |
| MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\ |
| MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\ |
| MAP_F2F(scan8[0] - 1 + 3*8, left_type[1]) |
| if(MB_FIELD){ |
| #define MAP_F2F(idx, mb_type)\ |
| if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ |
| h->ref_cache[list][idx] <<= 1;\ |
| h->mv_cache[list][idx][1] /= 2;\ |
| h->mvd_cache[list][idx][1] /= 2;\ |
| } |
| MAP_MVS |
| #undef MAP_F2F |
| }else{ |
| #define MAP_F2F(idx, mb_type)\ |
| if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ |
| h->ref_cache[list][idx] >>= 1;\ |
| h->mv_cache[list][idx][1] <<= 1;\ |
| h->mvd_cache[list][idx][1] <<= 1;\ |
| } |
| MAP_MVS |
| #undef MAP_F2F |
| } |
| } |
| } |
| } |
| #endif |
| |
| h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]); |
| } |
| |
| static inline void write_back_intra_pred_mode(H264Context *h){ |
| const int mb_xy= h->mb_xy; |
| |
| h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1]; |
| h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2]; |
| h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3]; |
| h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4]; |
| h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4]; |
| h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4]; |
| h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4]; |
| } |
| |
| /** |
| * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks. |
| */ |
| static inline int check_intra4x4_pred_mode(H264Context *h){ |
| MpegEncContext * const s = &h->s; |
| static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0}; |
| static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED}; |
| int i; |
| |
| if(!(h->top_samples_available&0x8000)){ |
| for(i=0; i<4; i++){ |
| int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ]; |
| if(status<0){ |
| av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y); |
| return -1; |
| } else if(status){ |
| h->intra4x4_pred_mode_cache[scan8[0] + i]= status; |
| } |
| } |
| } |
| |
| if((h->left_samples_available&0x8888)!=0x8888){ |
| static const int mask[4]={0x8000,0x2000,0x80,0x20}; |
| for(i=0; i<4; i++){ |
| if(!(h->left_samples_available&mask[i])){ |
| int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ]; |
| if(status<0){ |
| av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y); |
| return -1; |
| } else if(status){ |
| h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status; |
| } |
| } |
| } |
| } |
| |
| return 0; |
| } //FIXME cleanup like next |
| |
| /** |
| * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks. |
| */ |
| static inline int check_intra_pred_mode(H264Context *h, int mode){ |
| MpegEncContext * const s = &h->s; |
| static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1}; |
| static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8}; |
| |
| if(mode > 6U) { |
| av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y); |
| return -1; |
| } |
| |
| if(!(h->top_samples_available&0x8000)){ |
| mode= top[ mode ]; |
| if(mode<0){ |
| av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y); |
| return -1; |
| } |
| } |
| |
| if((h->left_samples_available&0x8080) != 0x8080){ |
| mode= left[ mode ]; |
| if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred |
| mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8); |
| } |
| if(mode<0){ |
| av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y); |
| return -1; |
| } |
| } |
| |
| return mode; |
| } |
| |
| /** |
| * gets the predicted intra4x4 prediction mode. |
| */ |
| static inline int pred_intra_mode(H264Context *h, int n){ |
| const int index8= scan8[n]; |
| const int left= h->intra4x4_pred_mode_cache[index8 - 1]; |
| const int top = h->intra4x4_pred_mode_cache[index8 - 8]; |
| const int min= FFMIN(left, top); |
| |
| tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min); |
| |
| if(min<0) return DC_PRED; |
| else return min; |
| } |
| |
| static inline void write_back_non_zero_count(H264Context *h){ |
| const int mb_xy= h->mb_xy; |
| |
| h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1]; |
| h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2]; |
| h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3]; |
| h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4]; |
| h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4]; |
| h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4]; |
| h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4]; |
| |
| h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2]; |
| h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2]; |
| h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1]; |
| |
| h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5]; |
| h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5]; |
| h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4]; |
| } |
| |
| /** |
| * gets the predicted number of non-zero coefficients. |
| * @param n block index |
| */ |
| static inline int pred_non_zero_count(H264Context *h, int n){ |
| const int index8= scan8[n]; |
| const int left= h->non_zero_count_cache[index8 - 1]; |
| const int top = h->non_zero_count_cache[index8 - 8]; |
| int i= left + top; |
| |
| if(i<64) i= (i+1)>>1; |
| |
| tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31); |
| |
| return i&31; |
| } |
| |
| static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){ |
| const int topright_ref= h->ref_cache[list][ i - 8 + part_width ]; |
| MpegEncContext *s = &h->s; |
| |
| /* there is no consistent mapping of mvs to neighboring locations that will |
| * make mbaff happy, so we can't move all this logic to fill_caches */ |
| if(FRAME_MBAFF){ |
| const uint32_t *mb_types = s->current_picture_ptr->mb_type; |
| const int16_t *mv; |
| *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0; |
| *C = h->mv_cache[list][scan8[0]-2]; |
| |
| if(!MB_FIELD |
| && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){ |
| int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3); |
| if(IS_INTERLACED(mb_types[topright_xy])){ |
| #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\ |
| const int x4 = X4, y4 = Y4;\ |
| const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\ |
| if(!USES_LIST(mb_type,list))\ |
| return LIST_NOT_USED;\ |
| mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\ |
| h->mv_cache[list][scan8[0]-2][0] = mv[0];\ |
| h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\ |
| return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP; |
| |
| SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1); |
| } |
| } |
| if(topright_ref == PART_NOT_AVAILABLE |
| && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4 |
| && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){ |
| if(!MB_FIELD |
| && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){ |
| SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1); |
| } |
| if(MB_FIELD |
| && !IS_INTERLACED(mb_types[h->left_mb_xy[0]]) |
| && i >= scan8[0]+8){ |
| // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK. |
| SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2); |
| } |
| } |
| #undef SET_DIAG_MV |
| } |
| |
| if(topright_ref != PART_NOT_AVAILABLE){ |
| *C= h->mv_cache[list][ i - 8 + part_width ]; |
| return topright_ref; |
| }else{ |
| tprintf(s->avctx, "topright MV not available\n"); |
| |
| *C= h->mv_cache[list][ i - 8 - 1 ]; |
| return h->ref_cache[list][ i - 8 - 1 ]; |
| } |
| } |
| |
| /** |
| * gets the predicted MV. |
| * @param n the block index |
| * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4) |
| * @param mx the x component of the predicted motion vector |
| * @param my the y component of the predicted motion vector |
| */ |
| static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){ |
| const int index8= scan8[n]; |
| const int top_ref= h->ref_cache[list][ index8 - 8 ]; |
| const int left_ref= h->ref_cache[list][ index8 - 1 ]; |
| const int16_t * const A= h->mv_cache[list][ index8 - 1 ]; |
| const int16_t * const B= h->mv_cache[list][ index8 - 8 ]; |
| const int16_t * C; |
| int diagonal_ref, match_count; |
| |
| assert(part_width==1 || part_width==2 || part_width==4); |
| |
| /* mv_cache |
| B . . A T T T T |
| U . . L . . , . |
| U . . L . . . . |
| U . . L . . , . |
| . . . L . . . . |
| */ |
| |
| diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width); |
| match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref); |
| tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count); |
| if(match_count > 1){ //most common |
| *mx= mid_pred(A[0], B[0], C[0]); |
| *my= mid_pred(A[1], B[1], C[1]); |
| }else if(match_count==1){ |
| if(left_ref==ref){ |
| *mx= A[0]; |
| *my= A[1]; |
| }else if(top_ref==ref){ |
| *mx= B[0]; |
| *my= B[1]; |
| }else{ |
| *mx= C[0]; |
| *my= C[1]; |
| } |
| }else{ |
| if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){ |
| *mx= A[0]; |
| *my= A[1]; |
| }else{ |
| *mx= mid_pred(A[0], B[0], C[0]); |
| *my= mid_pred(A[1], B[1], C[1]); |
| } |
| } |
| |
| tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list); |
| } |
| |
| /** |
| * gets the directionally predicted 16x8 MV. |
| * @param n the block index |
| * @param mx the x component of the predicted motion vector |
| * @param my the y component of the predicted motion vector |
| */ |
| static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){ |
| if(n==0){ |
| const int top_ref= h->ref_cache[list][ scan8[0] - 8 ]; |
| const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ]; |
| |
| tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list); |
| |
| if(top_ref == ref){ |
| *mx= B[0]; |
| *my= B[1]; |
| return; |
| } |
| }else{ |
| const int left_ref= h->ref_cache[list][ scan8[8] - 1 ]; |
| const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ]; |
| |
| tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); |
| |
| if(left_ref == ref){ |
| *mx= A[0]; |
| *my= A[1]; |
| return; |
| } |
| } |
| |
| //RARE |
| pred_motion(h, n, 4, list, ref, mx, my); |
| } |
| |
| /** |
| * gets the directionally predicted 8x16 MV. |
| * @param n the block index |
| * @param mx the x component of the predicted motion vector |
| * @param my the y component of the predicted motion vector |
| */ |
| static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){ |
| if(n==0){ |
| const int left_ref= h->ref_cache[list][ scan8[0] - 1 ]; |
| const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ]; |
| |
| tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); |
| |
| if(left_ref == ref){ |
| *mx= A[0]; |
| *my= A[1]; |
| return; |
| } |
| }else{ |
| const int16_t * C; |
| int diagonal_ref; |
| |
| diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2); |
| |
| tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list); |
| |
| if(diagonal_ref == ref){ |
| *mx= C[0]; |
| *my= C[1]; |
| return; |
| } |
| } |
| |
| //RARE |
| pred_motion(h, n, 2, list, ref, mx, my); |
| } |
| |
| static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){ |
| const int top_ref = h->ref_cache[0][ scan8[0] - 8 ]; |
| const int left_ref= h->ref_cache[0][ scan8[0] - 1 ]; |
| |
| tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y); |
| |
| if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE |
| || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ]) |
| || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){ |
| |
| *mx = *my = 0; |
| return; |
| } |
| |
| pred_motion(h, 0, 4, 0, 0, mx, my); |
| |
| return; |
| } |
| |
| static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){ |
| int poc0 = h->ref_list[0][i].poc; |
| int td = av_clip(poc1 - poc0, -128, 127); |
| if(td == 0 || h->ref_list[0][i].long_ref){ |
| return 256; |
| }else{ |
| int tb = av_clip(poc - poc0, -128, 127); |
| int tx = (16384 + (FFABS(td) >> 1)) / td; |
| return av_clip((tb*tx + 32) >> 6, -1024, 1023); |
| } |
| } |
| |
| static inline void direct_dist_scale_factor(H264Context * const h){ |
| MpegEncContext * const s = &h->s; |
| const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ]; |
| const int poc1 = h->ref_list[1][0].poc; |
| int i, field; |
| for(field=0; field<2; field++){ |
| const int poc = h->s.current_picture_ptr->field_poc[field]; |
| const int poc1 = h->ref_list[1][0].field_poc[field]; |
| for(i=0; i < 2*h->ref_count[0]; i++) |
| h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16); |
| } |
| |
| for(i=0; i<h->ref_count[0]; i++){ |
| h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i); |
| } |
| } |
| |
| static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){ |
| MpegEncContext * const s = &h->s; |
| Picture * const ref1 = &h->ref_list[1][0]; |
| int j, old_ref, rfield; |
| int start= mbafi ? 16 : 0; |
| int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list]; |
| int interl= mbafi || s->picture_structure != PICT_FRAME; |
| |
| /* bogus; fills in for missing frames */ |
| memset(map[list], 0, sizeof(map[list])); |
| |
| for(rfield=0; rfield<2; rfield++){ |
| for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){ |
| int poc = ref1->ref_poc[colfield][list][old_ref]; |
| |
| if (!interl) |
| poc |= 3; |
| else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed |
| poc= (poc&~3) + rfield + 1; |
| |
| for(j=start; j<end; j++){ |
| if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){ |
| int cur_ref= mbafi ? (j-16)^field : j; |
| map[list][2*old_ref + (rfield^field) + 16] = cur_ref; |
| if(rfield == field) |
| map[list][old_ref] = cur_ref; |
| break; |
| } |
| } |
| } |
| } |
| } |
| |
| static inline void direct_ref_list_init(H264Context * const h){ |
| MpegEncContext * const s = &h->s; |
| Picture * const ref1 = &h->ref_list[1][0]; |
| Picture * const cur = s->current_picture_ptr; |
| int list, j, field; |
| int sidx= (s->picture_structure&1)^1; |
| int ref1sidx= (ref1->reference&1)^1; |
| |
| for(list=0; list<2; list++){ |
| cur->ref_count[sidx][list] = h->ref_count[list]; |
| for(j=0; j<h->ref_count[list]; j++) |
| cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3); |
| } |
| |
| if(s->picture_structure == PICT_FRAME){ |
| memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0])); |
| memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0])); |
| } |
| |
| cur->mbaff= FRAME_MBAFF; |
| |
| if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred) |
| return; |
| |
| for(list=0; list<2; list++){ |
| fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0); |
| for(field=0; field<2; field++) |
| fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1); |
| } |
| } |
| |
| static inline void pred_direct_motion(H264Context * const h, int *mb_type){ |
| MpegEncContext * const s = &h->s; |
| int b8_stride = h->b8_stride; |
| int b4_stride = h->b_stride; |
| int mb_xy = h->mb_xy; |
| int mb_type_col[2]; |
| const int16_t (*l1mv0)[2], (*l1mv1)[2]; |
| const int8_t *l1ref0, *l1ref1; |
| const int is_b8x8 = IS_8X8(*mb_type); |
| unsigned int sub_mb_type; |
| int i8, i4; |
| |
| #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM) |
| |
| if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL |
| if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL |
| int cur_poc = s->current_picture_ptr->poc; |
| int *col_poc = h->ref_list[1]->field_poc; |
| int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc); |
| mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride; |
| b8_stride = 0; |
| }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity |
| int fieldoff= 2*(h->ref_list[1][0].reference)-3; |
| mb_xy += s->mb_stride*fieldoff; |
| } |
| goto single_col; |
| }else{ // AFL/AFR/FR/FL -> AFR/FR |
| if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR |
| mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride; |
| mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy]; |
| mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride]; |
| b8_stride *= 3; |
| b4_stride *= 6; |
| //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag |
| if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA) |
| && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA) |
| && !is_b8x8){ |
| sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */ |
| *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */ |
| }else{ |
| sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */ |
| *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1; |
| } |
| }else{ // AFR/FR -> AFR/FR |
| single_col: |
| mb_type_col[0] = |
| mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy]; |
| if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){ |
| /* FIXME save sub mb types from previous frames (or derive from MVs) |
| * so we know exactly what block size to use */ |
| sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */ |
| *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1; |
| }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){ |
| sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */ |
| *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */ |
| }else{ |
| sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */ |
| *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1; |
| } |
| } |
| } |
| |
| l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]]; |
| l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]]; |
| l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]]; |
| l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]]; |
| if(!b8_stride){ |
| if(s->mb_y&1){ |
| l1ref0 += h->b8_stride; |
| l1ref1 += h->b8_stride; |
| l1mv0 += 2*b4_stride; |
| l1mv1 += 2*b4_stride; |
| } |
| } |
| |
| if(h->direct_spatial_mv_pred){ |
| int ref[2]; |
| int mv[2][2]; |
| int list; |
| |
| /* FIXME interlacing + spatial direct uses wrong colocated block positions */ |
| |
| /* ref = min(neighbors) */ |
| for(list=0; list<2; list++){ |
| int refa = h->ref_cache[list][scan8[0] - 1]; |
| int refb = h->ref_cache[list][scan8[0] - 8]; |
| int refc = h->ref_cache[list][scan8[0] - 8 + 4]; |
| if(refc == PART_NOT_AVAILABLE) |
| refc = h->ref_cache[list][scan8[0] - 8 - 1]; |
| ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc); |
| if(ref[list] < 0) |
| ref[list] = -1; |
| } |
| |
| if(ref[0] < 0 && ref[1] < 0){ |
| ref[0] = ref[1] = 0; |
| mv[0][0] = mv[0][1] = |
| mv[1][0] = mv[1][1] = 0; |
| }else{ |
| for(list=0; list<2; list++){ |
| if(ref[list] >= 0) |
| pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]); |
| else |
| mv[list][0] = mv[list][1] = 0; |
| } |
| } |
| |
| if(ref[1] < 0){ |
| if(!is_b8x8) |
| *mb_type &= ~MB_TYPE_L1; |
| sub_mb_type &= ~MB_TYPE_L1; |
| }else if(ref[0] < 0){ |
| if(!is_b8x8) |
| *mb_type &= ~MB_TYPE_L0; |
| sub_mb_type &= ~MB_TYPE_L0; |
| } |
| |
| if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){ |
| for(i8=0; i8<4; i8++){ |
| int x8 = i8&1; |
| int y8 = i8>>1; |
| int xy8 = x8+y8*b8_stride; |
| int xy4 = 3*x8+y8*b4_stride; |
| int a=0, b=0; |
| |
| if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8])) |
| continue; |
| h->sub_mb_type[i8] = sub_mb_type; |
| |
| fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1); |
| fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1); |
| if(!IS_INTRA(mb_type_col[y8]) |
| && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1) |
| || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){ |
| if(ref[0] > 0) |
| a= pack16to32(mv[0][0],mv[0][1]); |
| if(ref[1] > 0) |
| b= pack16to32(mv[1][0],mv[1][1]); |
| }else{ |
| a= pack16to32(mv[0][0],mv[0][1]); |
| b= pack16to32(mv[1][0],mv[1][1]); |
| } |
| fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4); |
| fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4); |
| } |
| }else if(IS_16X16(*mb_type)){ |
| int a=0, b=0; |
| |
| fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1); |
| fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1); |
| if(!IS_INTRA(mb_type_col[0]) |
| && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1) |
| || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1 |
| && (h->x264_build>33 || !h->x264_build)))){ |
| if(ref[0] > 0) |
| a= pack16to32(mv[0][0],mv[0][1]); |
| if(ref[1] > 0) |
| b= pack16to32(mv[1][0],mv[1][1]); |
| }else{ |
| a= pack16to32(mv[0][0],mv[0][1]); |
| b= pack16to32(mv[1][0],mv[1][1]); |
| } |
| fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4); |
| fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4); |
| }else{ |
| for(i8=0; i8<4; i8++){ |
| const int x8 = i8&1; |
| const int y8 = i8>>1; |
| |
| if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8])) |
| continue; |
| h->sub_mb_type[i8] = sub_mb_type; |
| |
| fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4); |
| fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4); |
| fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1); |
| fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1); |
| |
| /* col_zero_flag */ |
| if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0 |
| || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0 |
| && (h->x264_build>33 || !h->x264_build)))){ |
| const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1; |
| if(IS_SUB_8X8(sub_mb_type)){ |
| const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride]; |
| if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){ |
| if(ref[0] == 0) |
| fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4); |
| if(ref[1] == 0) |
| fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4); |
| } |
| }else |
| for(i4=0; i4<4; i4++){ |
| const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride]; |
| if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){ |
| if(ref[0] == 0) |
| *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0; |
| if(ref[1] == 0) |
| *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0; |
| } |
| } |
| } |
| } |
| } |
| }else{ /* direct temporal mv pred */ |
| const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]}; |
| const int *dist_scale_factor = h->dist_scale_factor; |
| int ref_offset= 0; |
| |
| if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){ |
| map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0]; |
| map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1]; |
| dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1]; |
| } |
| if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0])) |
| ref_offset += 16; |
| |
| if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){ |
| /* FIXME assumes direct_8x8_inference == 1 */ |
| int y_shift = 2*!IS_INTERLACED(*mb_type); |
| |
| for(i8=0; i8<4; i8++){ |
| const int x8 = i8&1; |
| const int y8 = i8>>1; |
| int ref0, scale; |
| const int16_t (*l1mv)[2]= l1mv0; |
| |
| if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8])) |
| continue; |
| h->sub_mb_type[i8] = sub_mb_type; |
| |
| fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1); |
| if(IS_INTRA(mb_type_col[y8])){ |
| fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1); |
| fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4); |
| fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4); |
| continue; |
| } |
| |
| ref0 = l1ref0[x8 + y8*b8_stride]; |
| if(ref0 >= 0) |
| ref0 = map_col_to_list0[0][ref0 + ref_offset]; |
| else{ |
| ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset]; |
| l1mv= l1mv1; |
| } |
| scale = dist_scale_factor[ref0]; |
| fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1); |
| |
| { |
| const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride]; |
| int my_col = (mv_col[1]<<y_shift)/2; |
| int mx = (scale * mv_col[0] + 128) >> 8; |
| int my = (scale * my_col + 128) >> 8; |
| fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4); |
| fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4); |
| } |
| } |
| return; |
| } |
| |
| /* one-to-one mv scaling */ |
| |
| if(IS_16X16(*mb_type)){ |
| int ref, mv0, mv1; |
| |
| fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1); |
| if(IS_INTRA(mb_type_col[0])){ |
| ref=mv0=mv1=0; |
| }else{ |
| const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset] |
| : map_col_to_list0[1][l1ref1[0] + ref_offset]; |
| const int scale = dist_scale_factor[ref0]; |
| const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0]; |
| int mv_l0[2]; |
| mv_l0[0] = (scale * mv_col[0] + 128) >> 8; |
| mv_l0[1] = (scale * mv_col[1] + 128) >> 8; |
| ref= ref0; |
| mv0= pack16to32(mv_l0[0],mv_l0[1]); |
| mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]); |
| } |
| fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1); |
| fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4); |
| fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4); |
| }else{ |
| for(i8=0; i8<4; i8++){ |
| const int x8 = i8&1; |
| const int y8 = i8>>1; |
| int ref0, scale; |
| const int16_t (*l1mv)[2]= l1mv0; |
| |
| if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8])) |
| continue; |
| h->sub_mb_type[i8] = sub_mb_type; |
| fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1); |
| if(IS_INTRA(mb_type_col[0])){ |
| fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1); |
| fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4); |
| fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4); |
| continue; |
| } |
| |
| ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset; |
| if(ref0 >= 0) |
| ref0 = map_col_to_list0[0][ref0]; |
| else{ |
| ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset]; |
| l1mv= l1mv1; |
| } |
| scale = dist_scale_factor[ref0]; |
| |
| fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1); |
| if(IS_SUB_8X8(sub_mb_type)){ |
| const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride]; |
| int mx = (scale * mv_col[0] + 128) >> 8; |
| int my = (scale * mv_col[1] + 128) >> 8; |
| fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4); |
| fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4); |
| }else |
| for(i4=0; i4<4; i4++){ |
| const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride]; |
| int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]]; |
| mv_l0[0] = (scale * mv_col[0] + 128) >> 8; |
| mv_l0[1] = (scale * mv_col[1] + 128) >> 8; |
| *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = |
| pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]); |
| } |
| } |
| } |
| } |
| } |
| |
| static inline void write_back_motion(H264Context *h, int mb_type){ |
| MpegEncContext * const s = &h->s; |
| const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; |
| const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride; |
| int list; |
| |
| if(!USES_LIST(mb_type, 0)) |
| fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1); |
| |
| for(list=0; list<h->list_count; list++){ |
| int y; |
| if(!USES_LIST(mb_type, list)) |
| continue; |
| |
| for(y=0; y<4; y++){ |
| *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y]; |
| *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y]; |
| } |
| if( h->pps.cabac ) { |
| if(IS_SKIP(mb_type)) |
| fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4); |
| else |
| for(y=0; y<4; y++){ |
| *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y]; |
| *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y]; |
| } |
| } |
| |
| { |
| int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy]; |
| ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]]; |
| ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]]; |
| ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]]; |
| ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]]; |
| } |
| } |
| |
| if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){ |
| if(IS_8X8(mb_type)){ |
| uint8_t *direct_table = &h->direct_table[b8_xy]; |
| direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0; |
| direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0; |
| direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0; |
| } |
| } |
| } |
| |
| const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){ |
| int i, si, di; |
| uint8_t *dst; |
| int bufidx; |
| |
| // src[0]&0x80; //forbidden bit |
| h->nal_ref_idc= src[0]>>5; |
| h->nal_unit_type= src[0]&0x1F; |
| |
| src++; length--; |
| #if 0 |
| for(i=0; i<length; i++) |
| printf("%2X ", src[i]); |
| #endif |
| |
| #if HAVE_FAST_UNALIGNED |
| # if HAVE_FAST_64BIT |
| # define RS 7 |
| for(i=0; i+1<length; i+=9){ |
| if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL)) |
| # else |
| # define RS 3 |
| for(i=0; i+1<length; i+=5){ |
| if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U)) |
| # endif |
| continue; |
| if(i>0 && !src[i]) i--; |
| while(src[i]) i++; |
| #else |
| # define RS 0 |
| for(i=0; i+1<length; i+=2){ |
| if(src[i]) continue; |
| if(i>0 && src[i-1]==0) i--; |
| #endif |
| if(i+2<length && src[i+1]==0 && src[i+2]<=3){ |
| if(src[i+2]!=3){ |
| /* startcode, so we must be past the end */ |
| length=i; |
| } |
| break; |
| } |
| i-= RS; |
| } |
| |
| if(i>=length-1){ //no escaped 0 |
| *dst_length= length; |
| *consumed= length+1; //+1 for the header |
| return src; |
| } |
| |
| bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data |
| h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE); |
| dst= h->rbsp_buffer[bufidx]; |
| |
| if (dst == NULL){ |
| return NULL; |
| } |
| |
| //printf("decoding esc\n"); |
| memcpy(dst, src, i); |
| si=di=i; |
| while(si+2<length){ |
| //remove escapes (very rare 1:2^22) |
| if(src[si+2]>3){ |
| dst[di++]= src[si++]; |
| dst[di++]= src[si++]; |
| }else if(src[si]==0 && src[si+1]==0){ |
| if(src[si+2]==3){ //escape |
| dst[di++]= 0; |
| dst[di++]= 0; |
| si+=3; |
| continue; |
| }else //next start code |
| goto nsc; |
| } |
| |
| dst[di++]= src[si++]; |
| } |
| while(si<length) |
| dst[di++]= src[si++]; |
| nsc: |
| |
| memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE); |
| |
| *dst_length= di; |
| *consumed= si + 1;//+1 for the header |
| //FIXME store exact number of bits in the getbitcontext (it is needed for decoding) |
| return dst; |
| } |
| |
| int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){ |
| int v= *src; |
| int r; |
| |
| tprintf(h->s.avctx, "rbsp trailing %X\n", v); |
| |
| for(r=1; r<9; r++){ |
| if(v&1) return r; |
| v>>=1; |
| } |
| return 0; |
| } |
| |
| /** |
| * IDCT transforms the 16 dc values and dequantizes them. |
| * @param qp quantization parameter |
| */ |
| static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){ |
| #define stride 16 |
| int i; |
| int temp[16]; //FIXME check if this is a good idea |
| static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride}; |
| static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride}; |
| |
| //memset(block, 64, 2*256); |
| //return; |
| for(i=0; i<4; i++){ |
| const int offset= y_offset[i]; |
| const int z0= block[offset+stride*0] + block[offset+stride*4]; |
| const int z1= block[offset+stride*0] - block[offset+stride*4]; |
| const int z2= block[offset+stride*1] - block[offset+stride*5]; |
| const int z3= block[offset+stride*1] + block[offset+stride*5]; |
| |
| temp[4*i+0]= z0+z3; |
| temp[4*i+1]= z1+z2; |
| temp[4*i+2]= z1-z2; |
| temp[4*i+3]= z0-z3; |
| } |
| |
| for(i=0; i<4; i++){ |
| const int offset= x_offset[i]; |
| const int z0= temp[4*0+i] + temp[4*2+i]; |
| const int z1= temp[4*0+i] - temp[4*2+i]; |
| const int z2= temp[4*1+i] - temp[4*3+i]; |
| const int z3= temp[4*1+i] + temp[4*3+i]; |
| |
| block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual |
| block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8)); |
| block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8)); |
| block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8)); |
| } |
| } |
| |
| #if 0 |
| /** |
| * DCT transforms the 16 dc values. |
| * @param qp quantization parameter ??? FIXME |
| */ |
| static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){ |
| // const int qmul= dequant_coeff[qp][0]; |
| int i; |
| int temp[16]; //FIXME check if this is a good idea |
| static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride}; |
| static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride}; |
| |
| for(i=0; i<4; i++){ |
| const int offset= y_offset[i]; |
| const int z0= block[offset+stride*0] + block[offset+stride*4]; |
| const int z1= block[offset+stride*0] - block[offset+stride*4]; |
| const int z2= block[offset+stride*1] - block[offset+stride*5]; |
| const int z3= block[offset+stride*1] + block[offset+stride*5]; |
| |
| temp[4*i+0]= z0+z3; |
| temp[4*i+1]= z1+z2; |
| temp[4*i+2]= z1-z2; |
| temp[4*i+3]= z0-z3; |
| } |
| |
| for(i=0; i<4; i++){ |
| const int offset= x_offset[i]; |
| const int z0= temp[4*0+i] + temp[4*2+i]; |
| const int z1= temp[4*0+i] - temp[4*2+i]; |
| const int z2= temp[4*1+i] - temp[4*3+i]; |
| const int z3= temp[4*1+i] + temp[4*3+i]; |
| |
| block[stride*0 +offset]= (z0 + z3)>>1; |
| block[stride*2 +offset]= (z1 + z2)>>1; |
| block[stride*8 +offset]= (z1 - z2)>>1; |
| block[stride*10+offset]= (z0 - z3)>>1; |
| } |
| } |
| #endif |
| |
| #undef xStride |
| #undef stride |
| |
| static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){ |
| const int stride= 16*2; |
| const int xStride= 16; |
| int a,b,c,d,e; |
| |
| a= block[stride*0 + xStride*0]; |
| b= block[stride*0 + xStride*1]; |
| c= block[stride*1 + xStride*0]; |
| d= block[stride*1 + xStride*1]; |
| |
| e= a-b; |
| a= a+b; |
| b= c-d; |
| c= c+d; |
| |
| block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7; |
| block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7; |
| block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7; |
| block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7; |
| } |
| |
| #if 0 |
| static void chroma_dc_dct_c(DCTELEM *block){ |
| const int stride= 16*2; |
| const int xStride= 16; |
| int a,b,c,d,e; |
| |
| a= block[stride*0 + xStride*0]; |
| b= block[stride*0 + xStride*1]; |
| c= block[stride*1 + xStride*0]; |
| d= block[stride*1 + xStride*1]; |
| |
| e= a-b; |
| a= a+b; |
| b= c-d; |
| c= c+d; |
| |
| block[stride*0 + xStride*0]= (a+c); |
| block[stride*0 + xStride*1]= (e+b); |
| block[stride*1 + xStride*0]= (a-c); |
| block[stride*1 + xStride*1]= (e-b); |
| } |
| #endif |
| |
| /** |
| * gets the chroma qp. |
| */ |
| static inline int get_chroma_qp(H264Context *h, int t, int qscale){ |
| return h->pps.chroma_qp_table[t][qscale]; |
| } |
| |
| static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list, |
| uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, |
| int src_x_offset, int src_y_offset, |
| qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){ |
| MpegEncContext * const s = &h->s; |
| const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8; |
| int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8; |
| const int luma_xy= (mx&3) + ((my&3)<<2); |
| uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize; |
| uint8_t * src_cb, * src_cr; |
| int extra_width= h->emu_edge_width; |
| int extra_height= h->emu_edge_height; |
| int emu=0; |
| const int full_mx= mx>>2; |
| const int full_my= my>>2; |
| const int pic_width = 16*s->mb_width; |
| const int pic_height = 16*s->mb_height >> MB_FIELD; |
| |
| if(mx&7) extra_width -= 3; |
| if(my&7) extra_height -= 3; |
| |
| if( full_mx < 0-extra_width |
| || full_my < 0-extra_height |
| || full_mx + 16/*FIXME*/ > pic_width + extra_width |
| || full_my + 16/*FIXME*/ > pic_height + extra_height){ |
| ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); |
| src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize; |
| emu=1; |
| } |
| |
| qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps? |
| if(!square){ |
| qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize); |
| } |
| |
| if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return; |
| |
| if(MB_FIELD){ |
| // chroma offset when predicting from a field of opposite parity |
| my += 2 * ((s->mb_y & 1) - (pic->reference - 1)); |
| emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1); |
| } |
| src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize; |
| src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize; |
| |
| if(emu){ |
| ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); |
| src_cb= s->edge_emu_buffer; |
| } |
| chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7); |
| |
| if(emu){ |
| ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); |
| src_cr= s->edge_emu_buffer; |
| } |
| chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7); |
| } |
| |
| static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta, |
| uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, |
| int x_offset, int y_offset, |
| qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, |
| qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, |
| int list0, int list1){ |
| MpegEncContext * const s = &h->s; |
| qpel_mc_func *qpix_op= qpix_put; |
| h264_chroma_mc_func chroma_op= chroma_put; |
| |
| dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize; |
| dest_cb += x_offset + y_offset*h->mb_uvlinesize; |
| dest_cr += x_offset + y_offset*h->mb_uvlinesize; |
| x_offset += 8*s->mb_x; |
| y_offset += 8*(s->mb_y >> MB_FIELD); |
| |
| if(list0){ |
| Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ]; |
| mc_dir_part(h, ref, n, square, chroma_height, delta, 0, |
| dest_y, dest_cb, dest_cr, x_offset, y_offset, |
| qpix_op, chroma_op); |
| |
| qpix_op= qpix_avg; |
| chroma_op= chroma_avg; |
| } |
| |
| if(list1){ |
| Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ]; |
| mc_dir_part(h, ref, n, square, chroma_height, delta, 1, |
| dest_y, dest_cb, dest_cr, x_offset, y_offset, |
| qpix_op, chroma_op); |
| } |
| } |
| |
| static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta, |
| uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, |
| int x_offset, int y_offset, |
| qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, |
| h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op, |
| h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, |
| int list0, int list1){ |
| MpegEncContext * const s = &h->s; |
| |
| dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize; |
| dest_cb += x_offset + y_offset*h->mb_uvlinesize; |
| dest_cr += x_offset + y_offset*h->mb_uvlinesize; |
| x_offset += 8*s->mb_x; |
| y_offset += 8*(s->mb_y >> MB_FIELD); |
| |
| if(list0 && list1){ |
| /* don't optimize for luma-only case, since B-frames usually |
| * use implicit weights => chroma too. */ |
| uint8_t *tmp_cb = s->obmc_scratchpad; |
| uint8_t *tmp_cr = s->obmc_scratchpad + 8; |
| uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize; |
| int refn0 = h->ref_cache[0][ scan8[n] ]; |
| int refn1 = h->ref_cache[1][ scan8[n] ]; |
| |
| mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0, |
| dest_y, dest_cb, dest_cr, |
| x_offset, y_offset, qpix_put, chroma_put); |
| mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1, |
| tmp_y, tmp_cb, tmp_cr, |
| x_offset, y_offset, qpix_put, chroma_put); |
| |
| if(h->use_weight == 2){ |
| int weight0 = h->implicit_weight[refn0][refn1]; |
| int weight1 = 64 - weight0; |
| luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0); |
| chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0); |
| chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0); |
| }else{ |
| luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom, |
| h->luma_weight[0][refn0], h->luma_weight[1][refn1], |
| h->luma_offset[0][refn0] + h->luma_offset[1][refn1]); |
| chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, |
| h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0], |
| h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]); |
| chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, |
| h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1], |
| h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]); |
| } |
| }else{ |
| int list = list1 ? 1 : 0; |
| int refn = h->ref_cache[list][ scan8[n] ]; |
| Picture *ref= &h->ref_list[list][refn]; |
| mc_dir_part(h, ref, n, square, chroma_height, delta, list, |
| dest_y, dest_cb, dest_cr, x_offset, y_offset, |
| qpix_put, chroma_put); |
| |
| luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom, |
| h->luma_weight[list][refn], h->luma_offset[list][refn]); |
| if(h->use_weight_chroma){ |
| chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, |
| h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]); |
| chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, |
| h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]); |
| } |
| } |
| } |
| |
| static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta, |
| uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, |
| int x_offset, int y_offset, |
| qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, |
| qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, |
| h264_weight_func *weight_op, h264_biweight_func *weight_avg, |
| int list0, int list1){ |
| if((h->use_weight==2 && list0 && list1 |
| && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32)) |
| || h->use_weight==1) |
| mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, |
| x_offset, y_offset, qpix_put, chroma_put, |
| weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1); |
| else |
| mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, |
| x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1); |
| } |
| |
| static inline void prefetch_motion(H264Context *h, int list){ |
| /* fetch pixels for estimated mv 4 macroblocks ahead |
| * optimized for 64byte cache lines */ |
| MpegEncContext * const s = &h->s; |
| const int refn = h->ref_cache[list][scan8[0]]; |
| if(refn >= 0){ |
| const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8; |
| const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y; |
| uint8_t **src= h->ref_list[list][refn].data; |
| int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64; |
| s->dsp.prefetch(src[0]+off, s->linesize, 4); |
| off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64; |
| s->dsp.prefetch(src[1]+off, src[2]-src[1], 2); |
| } |
| } |
| |
| static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, |
| qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), |
| qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), |
| h264_weight_func *weight_op, h264_biweight_func *weight_avg){ |
| MpegEncContext * const s = &h->s; |
| const int mb_xy= h->mb_xy; |
| const int mb_type= s->current_picture.mb_type[mb_xy]; |
| |
| assert(IS_INTER(mb_type)); |
| |
| prefetch_motion(h, 0); |
| |
| if(IS_16X16(mb_type)){ |
| mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0, |
| qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], |
| &weight_op[0], &weight_avg[0], |
| IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); |
| }else if(IS_16X8(mb_type)){ |
| mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0, |
| qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], |
| &weight_op[1], &weight_avg[1], |
| IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); |
| mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4, |
| qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], |
| &weight_op[1], &weight_avg[1], |
| IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); |
| }else if(IS_8X16(mb_type)){ |
| mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0, |
| qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], |
| &weight_op[2], &weight_avg[2], |
| IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); |
| mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0, |
| qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], |
| &weight_op[2], &weight_avg[2], |
| IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); |
| }else{ |
| int i; |
| |
| assert(IS_8X8(mb_type)); |
| |
| for(i=0; i<4; i++){ |
| const int sub_mb_type= h->sub_mb_type[i]; |
| const int n= 4*i; |
| int x_offset= (i&1)<<2; |
| int y_offset= (i&2)<<1; |
| |
| if(IS_SUB_8X8(sub_mb_type)){ |
| mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, |
| qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], |
| &weight_op[3], &weight_avg[3], |
| IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); |
| }else if(IS_SUB_8X4(sub_mb_type)){ |
| mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset, |
| qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], |
| &weight_op[4], &weight_avg[4], |
| IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); |
| mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2, |
| qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], |
| &weight_op[4], &weight_avg[4], |
| IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); |
| }else if(IS_SUB_4X8(sub_mb_type)){ |
| mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, |
| qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], |
| &weight_op[5], &weight_avg[5], |
| IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); |
| mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset, |
| qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], |
| &weight_op[5], &weight_avg[5], |
| IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); |
| }else{ |
| int j; |
| assert(IS_SUB_4X4(sub_mb_type)); |
| for(j=0; j<4; j++){ |
| int sub_x_offset= x_offset + 2*(j&1); |
| int sub_y_offset= y_offset + (j&2); |
| mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, |
| qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], |
| &weight_op[6], &weight_avg[6], |
| IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); |
| } |
| } |
| } |
| } |
| |
| prefetch_motion(h, 1); |
| } |
| |
| static av_cold void init_cavlc_level_tab(void){ |
| int suffix_length, mask; |
| unsigned int i; |
| |
| for(suffix_length=0; suffix_length<7; suffix_length++){ |
| for(i=0; i<(1<<LEVEL_TAB_BITS); i++){ |
| int prefix= LEVEL_TAB_BITS - av_log2(2*i); |
| int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length); |
| |
| mask= -(level_code&1); |
| level_code= (((2+level_code)>>1) ^ mask) - mask; |
| if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){ |
| cavlc_level_tab[suffix_length][i][0]= level_code; |
| cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length; |
| }else if(prefix + 1 <= LEVEL_TAB_BITS){ |
| cavlc_level_tab[suffix_length][i][0]= prefix+100; |
| cavlc_level_tab[suffix_length][i][1]= prefix + 1; |
| }else{ |
| cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100; |
| cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS; |
| } |
| } |
| } |
| } |
| |
| static av_cold void decode_init_vlc(void){ |
| static int done = 0; |
| |
| if (!done) { |
| int i; |
| int offset; |
| done = 1; |
| |
| chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table; |
| chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size; |
| init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5, |
| &chroma_dc_coeff_token_len [0], 1, 1, |
| &chroma_dc_coeff_token_bits[0], 1, 1, |
| INIT_VLC_USE_NEW_STATIC); |
| |
| offset = 0; |
| for(i=0; i<4; i++){ |
| coeff_token_vlc[i].table = coeff_token_vlc_tables+offset; |
| coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i]; |
| init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17, |
| &coeff_token_len [i][0], 1, 1, |
| &coeff_token_bits[i][0], 1, 1, |
| INIT_VLC_USE_NEW_STATIC); |
| offset += coeff_token_vlc_tables_size[i]; |
| } |
| /* |
| * This is a one time safety check to make sure that |
| * the packed static coeff_token_vlc table sizes |
| * were initialized correctly. |
| */ |
| assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables)); |
| |
| for(i=0; i<3; i++){ |
| chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i]; |
| chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size; |
| init_vlc(&chroma_dc_total_zeros_vlc[i], |
| CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4, |
| &chroma_dc_total_zeros_len [i][0], 1, 1, |
| &chroma_dc_total_zeros_bits[i][0], 1, 1, |
| INIT_VLC_USE_NEW_STATIC); |
| } |
| for(i=0; i<15; i++){ |
| total_zeros_vlc[i].table = total_zeros_vlc_tables[i]; |
| total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size; |
| init_vlc(&total_zeros_vlc[i], |
| TOTAL_ZEROS_VLC_BITS, 16, |
| &total_zeros_len [i][0], 1, 1, |
| &total_zeros_bits[i][0], 1, 1, |
| INIT_VLC_USE_NEW_STATIC); |
| } |
| |
| for(i=0; i<6; i++){ |
| run_vlc[i].table = run_vlc_tables[i]; |
| run_vlc[i].table_allocated = run_vlc_tables_size; |
| init_vlc(&run_vlc[i], |
| RUN_VLC_BITS, 7, |
| &run_len [i][0], 1, 1, |
| &run_bits[i][0], 1, 1, |
| INIT_VLC_USE_NEW_STATIC); |
| } |
| run7_vlc.table = run7_vlc_table, |
| run7_vlc.table_allocated = run7_vlc_table_size; |
| init_vlc(&run7_vlc, RUN7_VLC_BITS, 16, |
| &run_len [6][0], 1, 1, |
| &run_bits[6][0], 1, 1, |
| INIT_VLC_USE_NEW_STATIC); |
| |
| init_cavlc_level_tab(); |
| } |
| } |
| |
| static void free_tables(H264Context *h){ |
| int i; |
| H264Context *hx; |
| av_freep(&h->intra4x4_pred_mode); |
| av_freep(&h->chroma_pred_mode_table); |
| av_freep(&h->cbp_table); |
| av_freep(&h->mvd_table[0]); |
| av_freep(&h->mvd_table[1]); |
| av_freep(&h->direct_table); |
| av_freep(&h->non_zero_count); |
| av_freep(&h->slice_table_base); |
| h->slice_table= NULL; |
| |
| av_freep(&h->mb2b_xy); |
| av_freep(&h->mb2b8_xy); |
| |
| for(i = 0; i < h->s.avctx->thread_count; i++) { |
| hx = h->thread_context[i]; |
| if(!hx) continue; |
| av_freep(&hx->top_borders[1]); |
| av_freep(&hx->top_borders[0]); |
| av_freep(&hx->s.obmc_scratchpad); |
| } |
| } |
| |
| static void init_dequant8_coeff_table(H264Context *h){ |
| int i,q,x; |
| const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly |
| h->dequant8_coeff[0] = h->dequant8_buffer[0]; |
| h->dequant8_coeff[1] = h->dequant8_buffer[1]; |
| |
| for(i=0; i<2; i++ ){ |
| if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){ |
| h->dequant8_coeff[1] = h->dequant8_buffer[0]; |
| break; |
| } |
| |
| for(q=0; q<52; q++){ |
| int shift = div6[q]; |
| int idx = rem6[q]; |
| for(x=0; x<64; x++) |
| h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] = |
| ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * |
| h->pps.scaling_matrix8[i][x]) << shift; |
| } |
| } |
| } |
| |
| static void init_dequant4_coeff_table(H264Context *h){ |
| int i,j,q,x; |
| const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly |
| for(i=0; i<6; i++ ){ |
| h->dequant4_coeff[i] = h->dequant4_buffer[i]; |
| for(j=0; j<i; j++){ |
| if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){ |
| h->dequant4_coeff[i] = h->dequant4_buffer[j]; |
| break; |
| } |
| } |
| if(j<i) |
| continue; |
| |
| for(q=0; q<52; q++){ |
| int shift = div6[q] + 2; |
| int idx = rem6[q]; |
| for(x=0; x<16; x++) |
| h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] = |
| ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] * |
| h->pps.scaling_matrix4[i][x]) << shift; |
| } |
| } |
| } |
| |
| static void init_dequant_tables(H264Context *h){ |
| int i,x; |
| init_dequant4_coeff_table(h); |
| if(h->pps.transform_8x8_mode) |
| init_dequant8_coeff_table(h); |
| if(h->sps.transform_bypass){ |
| for(i=0; i<6; i++) |
| for(x=0; x<16; x++) |
| h->dequant4_coeff[i][0][x] = 1<<6; |
| if(h->pps.transform_8x8_mode) |
| for(i=0; i<2; i++) |
| for(x=0; x<64; x++) |
| h->dequant8_coeff[i][0][x] = 1<<6; |
| } |
| } |
| |
| |
| /** |
| * allocates tables. |
| * needs width/height |
| */ |
| static int alloc_tables(H264Context *h){ |
| MpegEncContext * const s = &h->s; |
| const int big_mb_num= s->mb_stride * (s->mb_height+1); |
| int x,y; |
| |
| CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t)) |
| |
| CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t)) |
| CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base)) |
| CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t)) |
| |
| CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t)) |
| CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t)); |
| CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t)); |
| CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t)); |
| |
| memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base)); |
| h->slice_table= h->slice_table_base + s->mb_stride*2 + 1; |
| |
| CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t)); |
| CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t)); |
| for(y=0; y<s->mb_height; y++){ |
| for(x=0; x<s->mb_width; x++){ |
| const int |