libavcodec/h264.h
Go to the documentation of this file.
00001 /*
00002  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
00003  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
00004  *
00005  * This file is part of Libav.
00006  *
00007  * Libav is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * Libav is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with Libav; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #ifndef AVCODEC_H264_H
00029 #define AVCODEC_H264_H
00030 
00031 #include "libavutil/intreadwrite.h"
00032 #include "dsputil.h"
00033 #include "cabac.h"
00034 #include "mpegvideo.h"
00035 #include "h264dsp.h"
00036 #include "h264pred.h"
00037 #include "rectangle.h"
00038 
00039 #define interlaced_dct interlaced_dct_is_a_bad_name
00040 #define mb_intra mb_intra_is_not_initialized_see_mb_type
00041 
00042 #define MAX_SPS_COUNT 32
00043 #define MAX_PPS_COUNT 256
00044 
00045 #define MAX_MMCO_COUNT 66
00046 
00047 #define MAX_DELAYED_PIC_COUNT 16
00048 
00049 /* Compiling in interlaced support reduces the speed
00050  * of progressive decoding by about 2%. */
00051 #define ALLOW_INTERLACE
00052 
00053 #define FMO 0
00054 
00059 #define MAX_SLICES 16
00060 
00061 #ifdef ALLOW_INTERLACE
00062 #define MB_MBAFF h->mb_mbaff
00063 #define MB_FIELD h->mb_field_decoding_flag
00064 #define FRAME_MBAFF h->mb_aff_frame
00065 #define FIELD_PICTURE (s->picture_structure != PICT_FRAME)
00066 #define LEFT_MBS 2
00067 #define LTOP 0
00068 #define LBOT 1
00069 #define LEFT(i) (i)
00070 #else
00071 #define MB_MBAFF 0
00072 #define MB_FIELD 0
00073 #define FRAME_MBAFF 0
00074 #define FIELD_PICTURE 0
00075 #undef  IS_INTERLACED
00076 #define IS_INTERLACED(mb_type) 0
00077 #define LEFT_MBS 1
00078 #define LTOP 0
00079 #define LBOT 0
00080 #define LEFT(i) 0
00081 #endif
00082 #define FIELD_OR_MBAFF_PICTURE (FRAME_MBAFF || FIELD_PICTURE)
00083 
00084 #ifndef CABAC
00085 #define CABAC h->pps.cabac
00086 #endif
00087 
00088 #define CHROMA422 (h->sps.chroma_format_idc == 2)
00089 #define CHROMA444 (h->sps.chroma_format_idc == 3)
00090 
00091 #define EXTENDED_SAR          255
00092 
00093 #define MB_TYPE_REF0       MB_TYPE_ACPRED //dirty but it fits in 16 bit
00094 #define MB_TYPE_8x8DCT     0x01000000
00095 #define IS_REF0(a)         ((a) & MB_TYPE_REF0)
00096 #define IS_8x8DCT(a)       ((a) & MB_TYPE_8x8DCT)
00097 
00102 #define DELAYED_PIC_REF 4
00103 
00104 #define QP_MAX_NUM (51 + 2*6)           // The maximum supported qp
00105 
00106 /* NAL unit types */
00107 enum {
00108     NAL_SLICE=1,
00109     NAL_DPA,
00110     NAL_DPB,
00111     NAL_DPC,
00112     NAL_IDR_SLICE,
00113     NAL_SEI,
00114     NAL_SPS,
00115     NAL_PPS,
00116     NAL_AUD,
00117     NAL_END_SEQUENCE,
00118     NAL_END_STREAM,
00119     NAL_FILLER_DATA,
00120     NAL_SPS_EXT,
00121     NAL_AUXILIARY_SLICE=19
00122 };
00123 
00127 typedef enum {
00128     SEI_BUFFERING_PERIOD             =  0, 
00129     SEI_TYPE_PIC_TIMING              =  1, 
00130     SEI_TYPE_USER_DATA_UNREGISTERED  =  5, 
00131     SEI_TYPE_RECOVERY_POINT          =  6  
00132 } SEI_Type;
00133 
00137 typedef enum {
00138     SEI_PIC_STRUCT_FRAME             = 0, 
00139     SEI_PIC_STRUCT_TOP_FIELD         = 1, 
00140     SEI_PIC_STRUCT_BOTTOM_FIELD      = 2, 
00141     SEI_PIC_STRUCT_TOP_BOTTOM        = 3, 
00142     SEI_PIC_STRUCT_BOTTOM_TOP        = 4, 
00143     SEI_PIC_STRUCT_TOP_BOTTOM_TOP    = 5, 
00144     SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM = 6, 
00145     SEI_PIC_STRUCT_FRAME_DOUBLING    = 7, 
00146     SEI_PIC_STRUCT_FRAME_TRIPLING    = 8  
00147 } SEI_PicStructType;
00148 
00152 typedef struct SPS{
00153 
00154     int profile_idc;
00155     int level_idc;
00156     int chroma_format_idc;
00157     int transform_bypass;              
00158     int log2_max_frame_num;            
00159     int poc_type;                      
00160     int log2_max_poc_lsb;              
00161     int delta_pic_order_always_zero_flag;
00162     int offset_for_non_ref_pic;
00163     int offset_for_top_to_bottom_field;
00164     int poc_cycle_length;              
00165     int ref_frame_count;               
00166     int gaps_in_frame_num_allowed_flag;
00167     int mb_width;                      
00168     int mb_height;                     
00169     int frame_mbs_only_flag;
00170     int mb_aff;                        
00171     int direct_8x8_inference_flag;
00172     int crop;                   
00173     unsigned int crop_left;            
00174     unsigned int crop_right;           
00175     unsigned int crop_top;             
00176     unsigned int crop_bottom;          
00177     int vui_parameters_present_flag;
00178     AVRational sar;
00179     int video_signal_type_present_flag;
00180     int full_range;
00181     int colour_description_present_flag;
00182     enum AVColorPrimaries color_primaries;
00183     enum AVColorTransferCharacteristic color_trc;
00184     enum AVColorSpace colorspace;
00185     int timing_info_present_flag;
00186     uint32_t num_units_in_tick;
00187     uint32_t time_scale;
00188     int fixed_frame_rate_flag;
00189     short offset_for_ref_frame[256]; //FIXME dyn aloc?
00190     int bitstream_restriction_flag;
00191     int num_reorder_frames;
00192     int scaling_matrix_present;
00193     uint8_t scaling_matrix4[6][16];
00194     uint8_t scaling_matrix8[6][64];
00195     int nal_hrd_parameters_present_flag;
00196     int vcl_hrd_parameters_present_flag;
00197     int pic_struct_present_flag;
00198     int time_offset_length;
00199     int cpb_cnt;                       
00200     int initial_cpb_removal_delay_length; 
00201     int cpb_removal_delay_length;      
00202     int dpb_output_delay_length;       
00203     int bit_depth_luma;                
00204     int bit_depth_chroma;              
00205     int residual_color_transform_flag; 
00206     int constraint_set_flags;          
00207     int new;                           
00208 }SPS;
00209 
00213 typedef struct PPS{
00214     unsigned int sps_id;
00215     int cabac;                  
00216     int pic_order_present;      
00217     int slice_group_count;      
00218     int mb_slice_group_map_type;
00219     unsigned int ref_count[2];  
00220     int weighted_pred;          
00221     int weighted_bipred_idc;
00222     int init_qp;                
00223     int init_qs;                
00224     int chroma_qp_index_offset[2];
00225     int deblocking_filter_parameters_present; 
00226     int constrained_intra_pred; 
00227     int redundant_pic_cnt_present; 
00228     int transform_8x8_mode;     
00229     uint8_t scaling_matrix4[6][16];
00230     uint8_t scaling_matrix8[6][64];
00231     uint8_t chroma_qp_table[2][64];  
00232     int chroma_qp_diff;
00233 }PPS;
00234 
00238 typedef enum MMCOOpcode{
00239     MMCO_END=0,
00240     MMCO_SHORT2UNUSED,
00241     MMCO_LONG2UNUSED,
00242     MMCO_SHORT2LONG,
00243     MMCO_SET_MAX_LONG,
00244     MMCO_RESET,
00245     MMCO_LONG,
00246 } MMCOOpcode;
00247 
00251 typedef struct MMCO{
00252     MMCOOpcode opcode;
00253     int short_pic_num;  
00254     int long_arg;       
00255 } MMCO;
00256 
00260 typedef struct H264Context{
00261     MpegEncContext s;
00262     H264DSPContext h264dsp;
00263     int pixel_shift;    
00264     int chroma_qp[2]; //QPc
00265 
00266     int qp_thresh;      
00267 
00268     int prev_mb_skipped;
00269     int next_mb_skipped;
00270 
00271     //prediction stuff
00272     int chroma_pred_mode;
00273     int intra16x16_pred_mode;
00274 
00275     int topleft_mb_xy;
00276     int top_mb_xy;
00277     int topright_mb_xy;
00278     int left_mb_xy[LEFT_MBS];
00279 
00280     int topleft_type;
00281     int top_type;
00282     int topright_type;
00283     int left_type[LEFT_MBS];
00284 
00285     const uint8_t * left_block;
00286     int topleft_partition;
00287 
00288     int8_t intra4x4_pred_mode_cache[5*8];
00289     int8_t (*intra4x4_pred_mode);
00290     H264PredContext hpc;
00291     unsigned int topleft_samples_available;
00292     unsigned int top_samples_available;
00293     unsigned int topright_samples_available;
00294     unsigned int left_samples_available;
00295     uint8_t (*top_borders[2])[(16*3)*2];
00296 
00301     DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15*8];
00302 
00303     uint8_t (*non_zero_count)[48];
00304 
00308     DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5*8][2];
00309     DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5*8];
00310 #define LIST_NOT_USED -1 //FIXME rename?
00311 #define PART_NOT_AVAILABLE -2
00312 
00316     int neighbor_transform_size;
00317 
00322     int block_offset[2*(16*3)];
00323 
00324     uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
00325     uint32_t *mb2br_xy;
00326     int b_stride; //FIXME use s->b4_stride
00327 
00328     int mb_linesize;   
00329     int mb_uvlinesize;
00330 
00331     int emu_edge_width;
00332     int emu_edge_height;
00333 
00334     unsigned current_sps_id; 
00335     SPS sps; 
00336 
00340     PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
00341 
00342     uint32_t dequant4_buffer[6][QP_MAX_NUM+1][16]; //FIXME should these be moved down?
00343     uint32_t dequant8_buffer[6][QP_MAX_NUM+1][64];
00344     uint32_t (*dequant4_coeff[6])[16];
00345     uint32_t (*dequant8_coeff[6])[64];
00346 
00347     int slice_num;
00348     uint16_t *slice_table;     
00349     int slice_type;
00350     int slice_type_nos;        
00351     int slice_type_fixed;
00352 
00353     //interlacing specific flags
00354     int mb_aff_frame;
00355     int mb_field_decoding_flag;
00356     int mb_mbaff;              
00357 
00358     DECLARE_ALIGNED(8, uint16_t, sub_mb_type)[4];
00359 
00360     //Weighted pred stuff
00361     int use_weight;
00362     int use_weight_chroma;
00363     int luma_log2_weight_denom;
00364     int chroma_log2_weight_denom;
00365     //The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss
00366     int luma_weight[48][2][2];
00367     int chroma_weight[48][2][2][2];
00368     int implicit_weight[48][48][2];
00369 
00370     int direct_spatial_mv_pred;
00371     int col_parity;
00372     int col_fieldoff;
00373     int dist_scale_factor[16];
00374     int dist_scale_factor_field[2][32];
00375     int map_col_to_list0[2][16+32];
00376     int map_col_to_list0_field[2][2][16+32];
00377 
00381     unsigned int ref_count[2];   
00382     unsigned int list_count;
00383     uint8_t *list_counts;            
00384     Picture ref_list[2][48];         
00387     int ref2frm[MAX_SLICES][2][64];  
00388 
00389     //data partitioning
00390     GetBitContext intra_gb;
00391     GetBitContext inter_gb;
00392     GetBitContext *intra_gb_ptr;
00393     GetBitContext *inter_gb_ptr;
00394 
00395     DECLARE_ALIGNED(16, DCTELEM, mb)[16*48*2]; 
00396     DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[3][16*2];
00397     DCTELEM mb_padding[256*2];        
00398 
00402     CABACContext cabac;
00403     uint8_t      cabac_state[1024];
00404 
00405     /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
00406     uint16_t     *cbp_table;
00407     int cbp;
00408     int top_cbp;
00409     int left_cbp;
00410     /* chroma_pred_mode for i4x4 or i16x16, else 0 */
00411     uint8_t     *chroma_pred_mode_table;
00412     int         last_qscale_diff;
00413     uint8_t     (*mvd_table[2])[2];
00414     DECLARE_ALIGNED(16, uint8_t, mvd_cache)[2][5*8][2];
00415     uint8_t     *direct_table;
00416     uint8_t     direct_cache[5*8];
00417 
00418     uint8_t zigzag_scan[16];
00419     uint8_t zigzag_scan8x8[64];
00420     uint8_t zigzag_scan8x8_cavlc[64];
00421     uint8_t field_scan[16];
00422     uint8_t field_scan8x8[64];
00423     uint8_t field_scan8x8_cavlc[64];
00424     const uint8_t *zigzag_scan_q0;
00425     const uint8_t *zigzag_scan8x8_q0;
00426     const uint8_t *zigzag_scan8x8_cavlc_q0;
00427     const uint8_t *field_scan_q0;
00428     const uint8_t *field_scan8x8_q0;
00429     const uint8_t *field_scan8x8_cavlc_q0;
00430 
00431     int x264_build;
00432 
00433     int mb_xy;
00434 
00435     int is_complex;
00436 
00437     //deblock
00438     int deblocking_filter;         
00439     int slice_alpha_c0_offset;
00440     int slice_beta_offset;
00441 
00442 //=============================================================
00443     //Things below are not used in the MB or more inner code
00444 
00445     int nal_ref_idc;
00446     int nal_unit_type;
00447     uint8_t *rbsp_buffer[2];
00448     unsigned int rbsp_buffer_size[2];
00449 
00453     int is_avc; 
00454     int nal_length_size; 
00455     int got_first; 
00456 
00457     SPS *sps_buffers[MAX_SPS_COUNT];
00458     PPS *pps_buffers[MAX_PPS_COUNT];
00459 
00460     int dequant_coeff_pps;     
00461 
00462     uint16_t *slice_table_base;
00463 
00464 
00465     //POC stuff
00466     int poc_lsb;
00467     int poc_msb;
00468     int delta_poc_bottom;
00469     int delta_poc[2];
00470     int frame_num;
00471     int prev_poc_msb;             
00472     int prev_poc_lsb;             
00473     int frame_num_offset;         
00474     int prev_frame_num_offset;    
00475     int prev_frame_num;           
00476 
00480     int curr_pic_num;
00481 
00485     int max_pic_num;
00486 
00487     int redundant_pic_count;
00488 
00489     Picture *short_ref[32];
00490     Picture *long_ref[32];
00491     Picture default_ref_list[2][32]; 
00492     Picture *delayed_pic[MAX_DELAYED_PIC_COUNT+2]; //FIXME size?
00493     int last_pocs[MAX_DELAYED_PIC_COUNT];
00494     Picture *next_output_pic;
00495     int outputed_poc;
00496     int next_outputed_poc;
00497 
00501     MMCO mmco[MAX_MMCO_COUNT];
00502     int mmco_index;
00503     int mmco_reset;
00504 
00505     int long_ref_count;  
00506     int short_ref_count; 
00507 
00508     int          cabac_init_idc;
00509 
00514     struct H264Context *thread_context[MAX_THREADS];
00515 
00519     int current_slice;
00520 
00527     int max_contexts;
00528 
00533     int single_decode_warning;
00534 
00535     int last_slice_type;
00541     SEI_PicStructType sei_pic_struct;
00542 
00549     int prev_interlaced_frame;
00550 
00556     int sei_ct_type;
00557 
00561     int sei_dpb_output_delay;
00562 
00566     int sei_cpb_removal_delay;
00567 
00575     int sei_recovery_frame_cnt;
00576 
00577     int luma_weight_flag[2];   
00578     int chroma_weight_flag[2]; 
00579 
00580     // Timestamp stuff
00581     int sei_buffering_period_present;  
00582     int initial_cpb_removal_delay[32]; 
00583 
00584     int cur_chroma_format_idc;
00585 }H264Context;
00586 
00587 
00588 extern const uint8_t ff_h264_chroma_qp[3][QP_MAX_NUM+1]; 
00589 
00593 int ff_h264_decode_sei(H264Context *h);
00594 
00598 int ff_h264_decode_seq_parameter_set(H264Context *h);
00599 
00603 int ff_h264_get_profile(SPS *sps);
00604 
00608 int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length);
00609 
00617 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length);
00618 
00622 av_cold void ff_h264_free_context(H264Context *h);
00623 
00627 int ff_h264_get_slice_type(const H264Context *h);
00628 
00633 int ff_h264_alloc_tables(H264Context *h);
00634 
00638 int ff_h264_fill_default_ref_list(H264Context *h);
00639 
00640 int ff_h264_decode_ref_pic_list_reordering(H264Context *h);
00641 void ff_h264_fill_mbaff_ref_list(H264Context *h);
00642 void ff_h264_remove_all_refs(H264Context *h);
00643 
00647 int ff_h264_execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count);
00648 
00649 int ff_h264_decode_ref_pic_marking(H264Context *h, GetBitContext *gb);
00650 
00651 void ff_generate_sliding_window_mmcos(H264Context *h);
00652 
00653 
00657 int ff_h264_check_intra4x4_pred_mode(H264Context *h);
00658 
00662 int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma);
00663 
00664 void ff_h264_hl_decode_mb(H264Context *h);
00665 int ff_h264_frame_start(H264Context *h);
00666 int ff_h264_decode_extradata(H264Context *h);
00667 av_cold int ff_h264_decode_init(AVCodecContext *avctx);
00668 av_cold int ff_h264_decode_end(AVCodecContext *avctx);
00669 av_cold void ff_h264_decode_init_vlc(void);
00670 
00675 int ff_h264_decode_mb_cavlc(H264Context *h);
00676 
00681 int ff_h264_decode_mb_cabac(H264Context *h);
00682 
00683 void ff_h264_init_cabac_states(H264Context *h);
00684 
00685 void ff_h264_direct_dist_scale_factor(H264Context * const h);
00686 void ff_h264_direct_ref_list_init(H264Context * const h);
00687 void ff_h264_pred_direct_motion(H264Context * const h, int *mb_type);
00688 
00689 void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
00690 void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
00691 
00697 void ff_h264_reset_sei(H264Context *h);
00698 
00699 
00700 /*
00701 o-o o-o
00702  / / /
00703 o-o o-o
00704  ,---'
00705 o-o o-o
00706  / / /
00707 o-o o-o
00708 */
00709 
00710 /* Scan8 organization:
00711  *    0 1 2 3 4 5 6 7
00712  * 0  DY    y y y y y
00713  * 1        y Y Y Y Y
00714  * 2        y Y Y Y Y
00715  * 3        y Y Y Y Y
00716  * 4        y Y Y Y Y
00717  * 5  DU    u u u u u
00718  * 6        u U U U U
00719  * 7        u U U U U
00720  * 8        u U U U U
00721  * 9        u U U U U
00722  * 10 DV    v v v v v
00723  * 11       v V V V V
00724  * 12       v V V V V
00725  * 13       v V V V V
00726  * 14       v V V V V
00727  * DY/DU/DV are for luma/chroma DC.
00728  */
00729 
00730 #define LUMA_DC_BLOCK_INDEX   48
00731 #define CHROMA_DC_BLOCK_INDEX 49
00732 
00733 //This table must be here because scan8[constant] must be known at compiletime
00734 static const uint8_t scan8[16*3 + 3]={
00735  4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8,
00736  6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8,
00737  4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8,
00738  6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8,
00739  4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8,
00740  6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8,
00741  4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8,
00742  6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8,
00743  4+11*8, 5+11*8, 4+12*8, 5+12*8,
00744  6+11*8, 7+11*8, 6+12*8, 7+12*8,
00745  4+13*8, 5+13*8, 4+14*8, 5+14*8,
00746  6+13*8, 7+13*8, 6+14*8, 7+14*8,
00747  0+ 0*8, 0+ 5*8, 0+10*8
00748 };
00749 
00750 static av_always_inline uint32_t pack16to32(int a, int b){
00751 #if HAVE_BIGENDIAN
00752    return (b&0xFFFF) + (a<<16);
00753 #else
00754    return (a&0xFFFF) + (b<<16);
00755 #endif
00756 }
00757 
00758 static av_always_inline uint16_t pack8to16(int a, int b){
00759 #if HAVE_BIGENDIAN
00760    return (b&0xFF) + (a<<8);
00761 #else
00762    return (a&0xFF) + (b<<8);
00763 #endif
00764 }
00765 
00769 static av_always_inline int get_chroma_qp(H264Context *h, int t, int qscale){
00770     return h->pps.chroma_qp_table[t][qscale];
00771 }
00772 
00776 static av_always_inline int pred_intra_mode(H264Context *h, int n){
00777     const int index8= scan8[n];
00778     const int left= h->intra4x4_pred_mode_cache[index8 - 1];
00779     const int top = h->intra4x4_pred_mode_cache[index8 - 8];
00780     const int min= FFMIN(left, top);
00781 
00782     tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
00783 
00784     if(min<0) return DC_PRED;
00785     else      return min;
00786 }
00787 
00788 static av_always_inline void write_back_intra_pred_mode(H264Context *h){
00789     int8_t *i4x4= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
00790     int8_t *i4x4_cache= h->intra4x4_pred_mode_cache;
00791 
00792     AV_COPY32(i4x4, i4x4_cache + 4 + 8*4);
00793     i4x4[4]= i4x4_cache[7+8*3];
00794     i4x4[5]= i4x4_cache[7+8*2];
00795     i4x4[6]= i4x4_cache[7+8*1];
00796 }
00797 
00798 static av_always_inline void write_back_non_zero_count(H264Context *h){
00799     const int mb_xy= h->mb_xy;
00800     uint8_t *nnz = h->non_zero_count[mb_xy];
00801     uint8_t *nnz_cache = h->non_zero_count_cache;
00802 
00803     AV_COPY32(&nnz[ 0], &nnz_cache[4+8* 1]);
00804     AV_COPY32(&nnz[ 4], &nnz_cache[4+8* 2]);
00805     AV_COPY32(&nnz[ 8], &nnz_cache[4+8* 3]);
00806     AV_COPY32(&nnz[12], &nnz_cache[4+8* 4]);
00807     AV_COPY32(&nnz[16], &nnz_cache[4+8* 6]);
00808     AV_COPY32(&nnz[20], &nnz_cache[4+8* 7]);
00809     AV_COPY32(&nnz[32], &nnz_cache[4+8*11]);
00810     AV_COPY32(&nnz[36], &nnz_cache[4+8*12]);
00811 
00812     if(!h->s.chroma_y_shift){
00813         AV_COPY32(&nnz[24], &nnz_cache[4+8* 8]);
00814         AV_COPY32(&nnz[28], &nnz_cache[4+8* 9]);
00815         AV_COPY32(&nnz[40], &nnz_cache[4+8*13]);
00816         AV_COPY32(&nnz[44], &nnz_cache[4+8*14]);
00817     }
00818 }
00819 
00820 static av_always_inline void write_back_motion_list(H264Context *h, MpegEncContext * const s, int b_stride,
00821                                                     int b_xy, int b8_xy, int mb_type, int list )
00822 {
00823     int16_t (*mv_dst)[2] = &s->current_picture.f.motion_val[list][b_xy];
00824     int16_t (*mv_src)[2] = &h->mv_cache[list][scan8[0]];
00825     AV_COPY128(mv_dst + 0*b_stride, mv_src + 8*0);
00826     AV_COPY128(mv_dst + 1*b_stride, mv_src + 8*1);
00827     AV_COPY128(mv_dst + 2*b_stride, mv_src + 8*2);
00828     AV_COPY128(mv_dst + 3*b_stride, mv_src + 8*3);
00829     if( CABAC ) {
00830         uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8*h->mb_xy : h->mb2br_xy[h->mb_xy]];
00831         uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]];
00832         if(IS_SKIP(mb_type))
00833             AV_ZERO128(mvd_dst);
00834         else{
00835             AV_COPY64(mvd_dst, mvd_src + 8*3);
00836             AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8*0);
00837             AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8*1);
00838             AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8*2);
00839         }
00840     }
00841 
00842     {
00843         int8_t *ref_index = &s->current_picture.f.ref_index[list][b8_xy];
00844         int8_t *ref_cache = h->ref_cache[list];
00845         ref_index[0+0*2]= ref_cache[scan8[0]];
00846         ref_index[1+0*2]= ref_cache[scan8[4]];
00847         ref_index[0+1*2]= ref_cache[scan8[8]];
00848         ref_index[1+1*2]= ref_cache[scan8[12]];
00849     }
00850 }
00851 
00852 static av_always_inline void write_back_motion(H264Context *h, int mb_type){
00853     MpegEncContext * const s = &h->s;
00854     const int b_stride = h->b_stride;
00855     const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; //try mb2b(8)_xy
00856     const int b8_xy= 4*h->mb_xy;
00857 
00858     if(USES_LIST(mb_type, 0)){
00859         write_back_motion_list(h, s, b_stride, b_xy, b8_xy, mb_type, 0);
00860     }else{
00861         fill_rectangle(&s->current_picture.f.ref_index[0][b8_xy],
00862                        2, 2, 2, (uint8_t)LIST_NOT_USED, 1);
00863     }
00864     if(USES_LIST(mb_type, 1)){
00865         write_back_motion_list(h, s, b_stride, b_xy, b8_xy, mb_type, 1);
00866     }
00867 
00868     if(h->slice_type_nos == AV_PICTURE_TYPE_B && CABAC){
00869         if(IS_8X8(mb_type)){
00870             uint8_t *direct_table = &h->direct_table[4*h->mb_xy];
00871             direct_table[1] = h->sub_mb_type[1]>>1;
00872             direct_table[2] = h->sub_mb_type[2]>>1;
00873             direct_table[3] = h->sub_mb_type[3]>>1;
00874         }
00875     }
00876 }
00877 
00878 static av_always_inline int get_dct8x8_allowed(H264Context *h){
00879     if(h->sps.direct_8x8_inference_flag)
00880         return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8                )*0x0001000100010001ULL));
00881     else
00882         return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
00883 }
00884 
00885 #endif /* AVCODEC_H264_H */