libavcodec/vp8.c
Go to the documentation of this file.
00001 
00025 #include "libavutil/imgutils.h"
00026 #include "avcodec.h"
00027 #include "internal.h"
00028 #include "vp8.h"
00029 #include "vp8data.h"
00030 #include "rectangle.h"
00031 #include "thread.h"
00032 
00033 #if ARCH_ARM
00034 #   include "arm/vp8.h"
00035 #endif
00036 
00037 static void free_buffers(VP8Context *s)
00038 {
00039     av_freep(&s->macroblocks_base);
00040     av_freep(&s->filter_strength);
00041     av_freep(&s->intra4x4_pred_mode_top);
00042     av_freep(&s->top_nnz);
00043     av_freep(&s->edge_emu_buffer);
00044     av_freep(&s->top_border);
00045 
00046     s->macroblocks = NULL;
00047 }
00048 
00049 static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
00050 {
00051     int ret;
00052     if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
00053         return ret;
00054     if (s->num_maps_to_be_freed && !s->maps_are_invalid) {
00055         f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
00056     } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
00057         ff_thread_release_buffer(s->avctx, f);
00058         return AVERROR(ENOMEM);
00059     }
00060     return 0;
00061 }
00062 
00063 static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free)
00064 {
00065     if (f->ref_index[0]) {
00066         if (prefer_delayed_free) {
00067             /* Upon a size change, we want to free the maps but other threads may still
00068              * be using them, so queue them. Upon a seek, all threads are inactive so
00069              * we want to cache one to prevent re-allocation in the next decoding
00070              * iteration, but the rest we can free directly. */
00071             int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps);
00072             if (s->num_maps_to_be_freed < max_queued_maps) {
00073                 s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
00074             } else if (can_direct_free) /* vp8_decode_flush(), but our queue is full */ {
00075                 av_free(f->ref_index[0]);
00076             } /* else: MEMLEAK (should never happen, but better that than crash) */
00077             f->ref_index[0] = NULL;
00078         } else /* vp8_decode_free() */ {
00079             av_free(f->ref_index[0]);
00080         }
00081     }
00082     ff_thread_release_buffer(s->avctx, f);
00083 }
00084 
00085 static void vp8_decode_flush_impl(AVCodecContext *avctx,
00086                                   int prefer_delayed_free, int can_direct_free, int free_mem)
00087 {
00088     VP8Context *s = avctx->priv_data;
00089     int i;
00090 
00091     if (!avctx->internal->is_copy) {
00092         for (i = 0; i < 5; i++)
00093             if (s->frames[i].data[0])
00094                 vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free);
00095     }
00096     memset(s->framep, 0, sizeof(s->framep));
00097 
00098     if (free_mem) {
00099         free_buffers(s);
00100         s->maps_are_invalid = 1;
00101     }
00102 }
00103 
00104 static void vp8_decode_flush(AVCodecContext *avctx)
00105 {
00106     vp8_decode_flush_impl(avctx, 1, 1, 0);
00107 }
00108 
00109 static int update_dimensions(VP8Context *s, int width, int height)
00110 {
00111     if (width  != s->avctx->width ||
00112         height != s->avctx->height) {
00113         if (av_image_check_size(width, height, 0, s->avctx))
00114             return AVERROR_INVALIDDATA;
00115 
00116         vp8_decode_flush_impl(s->avctx, 1, 0, 1);
00117 
00118         avcodec_set_dimensions(s->avctx, width, height);
00119     }
00120 
00121     s->mb_width  = (s->avctx->coded_width +15) / 16;
00122     s->mb_height = (s->avctx->coded_height+15) / 16;
00123 
00124     s->macroblocks_base        = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
00125     s->filter_strength         = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
00126     s->intra4x4_pred_mode_top  = av_mallocz(s->mb_width*4);
00127     s->top_nnz                 = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
00128     s->top_border              = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
00129 
00130     if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
00131         !s->top_nnz || !s->top_border)
00132         return AVERROR(ENOMEM);
00133 
00134     s->macroblocks        = s->macroblocks_base + 1;
00135 
00136     return 0;
00137 }
00138 
00139 static void parse_segment_info(VP8Context *s)
00140 {
00141     VP56RangeCoder *c = &s->c;
00142     int i;
00143 
00144     s->segmentation.update_map = vp8_rac_get(c);
00145 
00146     if (vp8_rac_get(c)) { // update segment feature data
00147         s->segmentation.absolute_vals = vp8_rac_get(c);
00148 
00149         for (i = 0; i < 4; i++)
00150             s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);
00151 
00152         for (i = 0; i < 4; i++)
00153             s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
00154     }
00155     if (s->segmentation.update_map)
00156         for (i = 0; i < 3; i++)
00157             s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
00158 }
00159 
00160 static void update_lf_deltas(VP8Context *s)
00161 {
00162     VP56RangeCoder *c = &s->c;
00163     int i;
00164 
00165     for (i = 0; i < 4; i++)
00166         s->lf_delta.ref[i]  = vp8_rac_get_sint(c, 6);
00167 
00168     for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++)
00169         s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6);
00170 }
00171 
00172 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
00173 {
00174     const uint8_t *sizes = buf;
00175     int i;
00176 
00177     s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
00178 
00179     buf      += 3*(s->num_coeff_partitions-1);
00180     buf_size -= 3*(s->num_coeff_partitions-1);
00181     if (buf_size < 0)
00182         return -1;
00183 
00184     for (i = 0; i < s->num_coeff_partitions-1; i++) {
00185         int size = AV_RL24(sizes + 3*i);
00186         if (buf_size - size < 0)
00187             return -1;
00188 
00189         ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
00190         buf      += size;
00191         buf_size -= size;
00192     }
00193     ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
00194 
00195     return 0;
00196 }
00197 
00198 static void get_quants(VP8Context *s)
00199 {
00200     VP56RangeCoder *c = &s->c;
00201     int i, base_qi;
00202 
00203     int yac_qi     = vp8_rac_get_uint(c, 7);
00204     int ydc_delta  = vp8_rac_get_sint(c, 4);
00205     int y2dc_delta = vp8_rac_get_sint(c, 4);
00206     int y2ac_delta = vp8_rac_get_sint(c, 4);
00207     int uvdc_delta = vp8_rac_get_sint(c, 4);
00208     int uvac_delta = vp8_rac_get_sint(c, 4);
00209 
00210     for (i = 0; i < 4; i++) {
00211         if (s->segmentation.enabled) {
00212             base_qi = s->segmentation.base_quant[i];
00213             if (!s->segmentation.absolute_vals)
00214                 base_qi += yac_qi;
00215         } else
00216             base_qi = yac_qi;
00217 
00218         s->qmat[i].luma_qmul[0]    =       vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
00219         s->qmat[i].luma_qmul[1]    =       vp8_ac_qlookup[av_clip_uintp2(base_qi             , 7)];
00220         s->qmat[i].luma_dc_qmul[0] =   2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
00221         s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
00222         s->qmat[i].chroma_qmul[0]  =       vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
00223         s->qmat[i].chroma_qmul[1]  =       vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
00224 
00225         s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
00226         s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
00227     }
00228 }
00229 
00243 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
00244 {
00245     VP56RangeCoder *c = &s->c;
00246 
00247     if (update)
00248         return VP56_FRAME_CURRENT;
00249 
00250     switch (vp8_rac_get_uint(c, 2)) {
00251     case 1:
00252         return VP56_FRAME_PREVIOUS;
00253     case 2:
00254         return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
00255     }
00256     return VP56_FRAME_NONE;
00257 }
00258 
00259 static void update_refs(VP8Context *s)
00260 {
00261     VP56RangeCoder *c = &s->c;
00262 
00263     int update_golden = vp8_rac_get(c);
00264     int update_altref = vp8_rac_get(c);
00265 
00266     s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
00267     s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
00268 }
00269 
00270 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
00271 {
00272     VP56RangeCoder *c = &s->c;
00273     int header_size, hscale, vscale, i, j, k, l, m, ret;
00274     int width  = s->avctx->width;
00275     int height = s->avctx->height;
00276 
00277     s->keyframe  = !(buf[0] & 1);
00278     s->profile   =  (buf[0]>>1) & 7;
00279     s->invisible = !(buf[0] & 0x10);
00280     header_size  = AV_RL24(buf) >> 5;
00281     buf      += 3;
00282     buf_size -= 3;
00283 
00284     if (s->profile > 3)
00285         av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
00286 
00287     if (!s->profile)
00288         memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
00289     else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
00290         memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
00291 
00292     if (header_size > buf_size - 7*s->keyframe) {
00293         av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
00294         return AVERROR_INVALIDDATA;
00295     }
00296 
00297     if (s->keyframe) {
00298         if (AV_RL24(buf) != 0x2a019d) {
00299             av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
00300             return AVERROR_INVALIDDATA;
00301         }
00302         width  = AV_RL16(buf+3) & 0x3fff;
00303         height = AV_RL16(buf+5) & 0x3fff;
00304         hscale = buf[4] >> 6;
00305         vscale = buf[6] >> 6;
00306         buf      += 7;
00307         buf_size -= 7;
00308 
00309         if (hscale || vscale)
00310             av_log_missing_feature(s->avctx, "Upscaling", 1);
00311 
00312         s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
00313         for (i = 0; i < 4; i++)
00314             for (j = 0; j < 16; j++)
00315                 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
00316                        sizeof(s->prob->token[i][j]));
00317         memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
00318         memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
00319         memcpy(s->prob->mvc      , vp8_mv_default_prob     , sizeof(s->prob->mvc));
00320         memset(&s->segmentation, 0, sizeof(s->segmentation));
00321         memset(&s->lf_delta, 0, sizeof(s->lf_delta));
00322     }
00323 
00324     if (!s->macroblocks_base || /* first frame */
00325         width != s->avctx->width || height != s->avctx->height) {
00326         if ((ret = update_dimensions(s, width, height)) < 0)
00327             return ret;
00328     }
00329 
00330     ff_vp56_init_range_decoder(c, buf, header_size);
00331     buf      += header_size;
00332     buf_size -= header_size;
00333 
00334     if (s->keyframe) {
00335         if (vp8_rac_get(c))
00336             av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
00337         vp8_rac_get(c); // whether we can skip clamping in dsp functions
00338     }
00339 
00340     if ((s->segmentation.enabled = vp8_rac_get(c)))
00341         parse_segment_info(s);
00342     else
00343         s->segmentation.update_map = 0; // FIXME: move this to some init function?
00344 
00345     s->filter.simple    = vp8_rac_get(c);
00346     s->filter.level     = vp8_rac_get_uint(c, 6);
00347     s->filter.sharpness = vp8_rac_get_uint(c, 3);
00348 
00349     if ((s->lf_delta.enabled = vp8_rac_get(c)))
00350         if (vp8_rac_get(c))
00351             update_lf_deltas(s);
00352 
00353     if (setup_partitions(s, buf, buf_size)) {
00354         av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
00355         return AVERROR_INVALIDDATA;
00356     }
00357 
00358     get_quants(s);
00359 
00360     if (!s->keyframe) {
00361         update_refs(s);
00362         s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
00363         s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
00364     }
00365 
00366     // if we aren't saving this frame's probabilities for future frames,
00367     // make a copy of the current probabilities
00368     if (!(s->update_probabilities = vp8_rac_get(c)))
00369         s->prob[1] = s->prob[0];
00370 
00371     s->update_last = s->keyframe || vp8_rac_get(c);
00372 
00373     for (i = 0; i < 4; i++)
00374         for (j = 0; j < 8; j++)
00375             for (k = 0; k < 3; k++)
00376                 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
00377                     if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
00378                         int prob = vp8_rac_get_uint(c, 8);
00379                         for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
00380                             s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
00381                     }
00382 
00383     if ((s->mbskip_enabled = vp8_rac_get(c)))
00384         s->prob->mbskip = vp8_rac_get_uint(c, 8);
00385 
00386     if (!s->keyframe) {
00387         s->prob->intra  = vp8_rac_get_uint(c, 8);
00388         s->prob->last   = vp8_rac_get_uint(c, 8);
00389         s->prob->golden = vp8_rac_get_uint(c, 8);
00390 
00391         if (vp8_rac_get(c))
00392             for (i = 0; i < 4; i++)
00393                 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
00394         if (vp8_rac_get(c))
00395             for (i = 0; i < 3; i++)
00396                 s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);
00397 
00398         // 17.2 MV probability update
00399         for (i = 0; i < 2; i++)
00400             for (j = 0; j < 19; j++)
00401                 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
00402                     s->prob->mvc[i][j] = vp8_rac_get_nn(c);
00403     }
00404 
00405     return 0;
00406 }
00407 
00408 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
00409 {
00410     dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
00411     dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
00412 }
00413 
00417 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
00418 {
00419     int bit, x = 0;
00420 
00421     if (vp56_rac_get_prob_branchy(c, p[0])) {
00422         int i;
00423 
00424         for (i = 0; i < 3; i++)
00425             x += vp56_rac_get_prob(c, p[9 + i]) << i;
00426         for (i = 9; i > 3; i--)
00427             x += vp56_rac_get_prob(c, p[9 + i]) << i;
00428         if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
00429             x += 8;
00430     } else {
00431         // small_mvtree
00432         const uint8_t *ps = p+2;
00433         bit = vp56_rac_get_prob(c, *ps);
00434         ps += 1 + 3*bit;
00435         x  += 4*bit;
00436         bit = vp56_rac_get_prob(c, *ps);
00437         ps += 1 + bit;
00438         x  += 2*bit;
00439         x  += vp56_rac_get_prob(c, *ps);
00440     }
00441 
00442     return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
00443 }
00444 
00445 static av_always_inline
00446 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
00447 {
00448     if (left == top)
00449         return vp8_submv_prob[4-!!left];
00450     if (!top)
00451         return vp8_submv_prob[2];
00452     return vp8_submv_prob[1-!!left];
00453 }
00454 
00459 static av_always_inline
00460 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
00461 {
00462     int part_idx;
00463     int n, num;
00464     VP8Macroblock *top_mb  = &mb[2];
00465     VP8Macroblock *left_mb = &mb[-1];
00466     const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
00467                   *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
00468                   *mbsplits_cur, *firstidx;
00469     VP56mv *top_mv  = top_mb->bmv;
00470     VP56mv *left_mv = left_mb->bmv;
00471     VP56mv *cur_mv  = mb->bmv;
00472 
00473     if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
00474         if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
00475             part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
00476         } else {
00477             part_idx = VP8_SPLITMVMODE_8x8;
00478         }
00479     } else {
00480         part_idx = VP8_SPLITMVMODE_4x4;
00481     }
00482 
00483     num = vp8_mbsplit_count[part_idx];
00484     mbsplits_cur = vp8_mbsplits[part_idx],
00485     firstidx = vp8_mbfirstidx[part_idx];
00486     mb->partitioning = part_idx;
00487 
00488     for (n = 0; n < num; n++) {
00489         int k = firstidx[n];
00490         uint32_t left, above;
00491         const uint8_t *submv_prob;
00492 
00493         if (!(k & 3))
00494             left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
00495         else
00496             left  = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
00497         if (k <= 3)
00498             above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
00499         else
00500             above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
00501 
00502         submv_prob = get_submv_prob(left, above);
00503 
00504         if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
00505             if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
00506                 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
00507                     mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
00508                     mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
00509                 } else {
00510                     AV_ZERO32(&mb->bmv[n]);
00511                 }
00512             } else {
00513                 AV_WN32A(&mb->bmv[n], above);
00514             }
00515         } else {
00516             AV_WN32A(&mb->bmv[n], left);
00517         }
00518     }
00519 
00520     return num;
00521 }
00522 
00523 static av_always_inline
00524 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
00525 {
00526     VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
00527                                   mb - 1 /* left */,
00528                                   mb + 1 /* top-left */ };
00529     enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
00530     enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
00531     int idx = CNT_ZERO;
00532     int cur_sign_bias = s->sign_bias[mb->ref_frame];
00533     int8_t *sign_bias = s->sign_bias;
00534     VP56mv near_mv[4];
00535     uint8_t cnt[4] = { 0 };
00536     VP56RangeCoder *c = &s->c;
00537 
00538     AV_ZERO32(&near_mv[0]);
00539     AV_ZERO32(&near_mv[1]);
00540     AV_ZERO32(&near_mv[2]);
00541 
00542     /* Process MB on top, left and top-left */
00543     #define MV_EDGE_CHECK(n)\
00544     {\
00545         VP8Macroblock *edge = mb_edge[n];\
00546         int edge_ref = edge->ref_frame;\
00547         if (edge_ref != VP56_FRAME_CURRENT) {\
00548             uint32_t mv = AV_RN32A(&edge->mv);\
00549             if (mv) {\
00550                 if (cur_sign_bias != sign_bias[edge_ref]) {\
00551                     /* SWAR negate of the values in mv. */\
00552                     mv = ~mv;\
00553                     mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
00554                 }\
00555                 if (!n || mv != AV_RN32A(&near_mv[idx]))\
00556                     AV_WN32A(&near_mv[++idx], mv);\
00557                 cnt[idx]      += 1 + (n != 2);\
00558             } else\
00559                 cnt[CNT_ZERO] += 1 + (n != 2);\
00560         }\
00561     }
00562 
00563     MV_EDGE_CHECK(0)
00564     MV_EDGE_CHECK(1)
00565     MV_EDGE_CHECK(2)
00566 
00567     mb->partitioning = VP8_SPLITMVMODE_NONE;
00568     if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
00569         mb->mode = VP8_MVMODE_MV;
00570 
00571         /* If we have three distinct MVs, merge first and last if they're the same */
00572         if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
00573             cnt[CNT_NEAREST] += 1;
00574 
00575         /* Swap near and nearest if necessary */
00576         if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
00577             FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
00578             FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
00579         }
00580 
00581         if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
00582             if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
00583 
00584                 /* Choose the best mv out of 0,0 and the nearest mv */
00585                 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
00586                 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
00587                                     (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
00588                                     (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
00589 
00590                 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
00591                     mb->mode = VP8_MVMODE_SPLIT;
00592                     mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
00593                 } else {
00594                     mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
00595                     mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
00596                     mb->bmv[0] = mb->mv;
00597                 }
00598             } else {
00599                 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
00600                 mb->bmv[0] = mb->mv;
00601             }
00602         } else {
00603             clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
00604             mb->bmv[0] = mb->mv;
00605         }
00606     } else {
00607         mb->mode = VP8_MVMODE_ZERO;
00608         AV_ZERO32(&mb->mv);
00609         mb->bmv[0] = mb->mv;
00610     }
00611 }
00612 
00613 static av_always_inline
00614 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
00615                            int mb_x, int keyframe)
00616 {
00617     uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
00618     if (keyframe) {
00619         int x, y;
00620         uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
00621         uint8_t* const left = s->intra4x4_pred_mode_left;
00622         for (y = 0; y < 4; y++) {
00623             for (x = 0; x < 4; x++) {
00624                 const uint8_t *ctx;
00625                 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
00626                 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
00627                 left[y] = top[x] = *intra4x4;
00628                 intra4x4++;
00629             }
00630         }
00631     } else {
00632         int i;
00633         for (i = 0; i < 16; i++)
00634             intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
00635     }
00636 }
00637 
00638 static av_always_inline
00639 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref)
00640 {
00641     VP56RangeCoder *c = &s->c;
00642 
00643     if (s->segmentation.update_map)
00644         *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
00645     else if (s->segmentation.enabled)
00646         *segment = ref ? *ref : *segment;
00647     s->segment = *segment;
00648 
00649     mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
00650 
00651     if (s->keyframe) {
00652         mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
00653 
00654         if (mb->mode == MODE_I4x4) {
00655             decode_intra4x4_modes(s, c, mb_x, 1);
00656         } else {
00657             const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
00658             AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
00659             AV_WN32A(s->intra4x4_pred_mode_left, modes);
00660         }
00661 
00662         s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
00663         mb->ref_frame = VP56_FRAME_CURRENT;
00664     } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
00665         // inter MB, 16.2
00666         if (vp56_rac_get_prob_branchy(c, s->prob->last))
00667             mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
00668                 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
00669         else
00670             mb->ref_frame = VP56_FRAME_PREVIOUS;
00671         s->ref_count[mb->ref_frame-1]++;
00672 
00673         // motion vectors, 16.3
00674         decode_mvs(s, mb, mb_x, mb_y);
00675     } else {
00676         // intra MB, 16.1
00677         mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
00678 
00679         if (mb->mode == MODE_I4x4)
00680             decode_intra4x4_modes(s, c, mb_x, 0);
00681 
00682         s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
00683         mb->ref_frame = VP56_FRAME_CURRENT;
00684         mb->partitioning = VP8_SPLITMVMODE_NONE;
00685         AV_ZERO32(&mb->bmv[0]);
00686     }
00687 }
00688 
00689 #ifndef decode_block_coeffs_internal
00690 
00699 static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16],
00700                                         uint8_t probs[16][3][NUM_DCT_TOKENS-1],
00701                                         int i, uint8_t *token_prob, int16_t qmul[2])
00702 {
00703     goto skip_eob;
00704     do {
00705         int coeff;
00706         if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
00707             return i;
00708 
00709 skip_eob:
00710         if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
00711             if (++i == 16)
00712                 return i; // invalid input; blocks should end with EOB
00713             token_prob = probs[i][0];
00714             goto skip_eob;
00715         }
00716 
00717         if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
00718             coeff = 1;
00719             token_prob = probs[i+1][1];
00720         } else {
00721             if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
00722                 coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);
00723                 if (coeff)
00724                     coeff += vp56_rac_get_prob(c, token_prob[5]);
00725                 coeff += 2;
00726             } else {
00727                 // DCT_CAT*
00728                 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
00729                     if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
00730                         coeff  = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
00731                     } else {                                    // DCT_CAT2
00732                         coeff  = 7;
00733                         coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
00734                         coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
00735                     }
00736                 } else {    // DCT_CAT3 and up
00737                     int a = vp56_rac_get_prob(c, token_prob[8]);
00738                     int b = vp56_rac_get_prob(c, token_prob[9+a]);
00739                     int cat = (a<<1) + b;
00740                     coeff  = 3 + (8<<cat);
00741                     coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]);
00742                 }
00743             }
00744             token_prob = probs[i+1][2];
00745         }
00746         block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
00747     } while (++i < 16);
00748 
00749     return i;
00750 }
00751 #endif
00752 
00764 static av_always_inline
00765 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
00766                         uint8_t probs[16][3][NUM_DCT_TOKENS-1],
00767                         int i, int zero_nhood, int16_t qmul[2])
00768 {
00769     uint8_t *token_prob = probs[i][zero_nhood];
00770     if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
00771         return 0;
00772     return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
00773 }
00774 
00775 static av_always_inline
00776 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
00777                       uint8_t t_nnz[9], uint8_t l_nnz[9])
00778 {
00779     int i, x, y, luma_start = 0, luma_ctx = 3;
00780     int nnz_pred, nnz, nnz_total = 0;
00781     int segment = s->segment;
00782     int block_dc = 0;
00783 
00784     if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
00785         nnz_pred = t_nnz[8] + l_nnz[8];
00786 
00787         // decode DC values and do hadamard
00788         nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
00789                                   s->qmat[segment].luma_dc_qmul);
00790         l_nnz[8] = t_nnz[8] = !!nnz;
00791         if (nnz) {
00792             nnz_total += nnz;
00793             block_dc = 1;
00794             if (nnz == 1)
00795                 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
00796             else
00797                 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
00798         }
00799         luma_start = 1;
00800         luma_ctx = 0;
00801     }
00802 
00803     // luma blocks
00804     for (y = 0; y < 4; y++)
00805         for (x = 0; x < 4; x++) {
00806             nnz_pred = l_nnz[y] + t_nnz[x];
00807             nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
00808                                       nnz_pred, s->qmat[segment].luma_qmul);
00809             // nnz+block_dc may be one more than the actual last index, but we don't care
00810             s->non_zero_count_cache[y][x] = nnz + block_dc;
00811             t_nnz[x] = l_nnz[y] = !!nnz;
00812             nnz_total += nnz;
00813         }
00814 
00815     // chroma blocks
00816     // TODO: what to do about dimensions? 2nd dim for luma is x,
00817     // but for chroma it's (y<<1)|x
00818     for (i = 4; i < 6; i++)
00819         for (y = 0; y < 2; y++)
00820             for (x = 0; x < 2; x++) {
00821                 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
00822                 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
00823                                           nnz_pred, s->qmat[segment].chroma_qmul);
00824                 s->non_zero_count_cache[i][(y<<1)+x] = nnz;
00825                 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
00826                 nnz_total += nnz;
00827             }
00828 
00829     // if there were no coded coeffs despite the macroblock not being marked skip,
00830     // we MUST not do the inner loop filter and should not do IDCT
00831     // Since skip isn't used for bitstream prediction, just manually set it.
00832     if (!nnz_total)
00833         mb->skip = 1;
00834 }
00835 
00836 static av_always_inline
00837 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
00838                       int linesize, int uvlinesize, int simple)
00839 {
00840     AV_COPY128(top_border, src_y + 15*linesize);
00841     if (!simple) {
00842         AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
00843         AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
00844     }
00845 }
00846 
00847 static av_always_inline
00848 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
00849                     int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
00850                     int simple, int xchg)
00851 {
00852     uint8_t *top_border_m1 = top_border-32;     // for TL prediction
00853     src_y  -=   linesize;
00854     src_cb -= uvlinesize;
00855     src_cr -= uvlinesize;
00856 
00857 #define XCHG(a,b,xchg) do {                     \
00858         if (xchg) AV_SWAP64(b,a);               \
00859         else      AV_COPY64(b,a);               \
00860     } while (0)
00861 
00862     XCHG(top_border_m1+8, src_y-8, xchg);
00863     XCHG(top_border,      src_y,   xchg);
00864     XCHG(top_border+8,    src_y+8, 1);
00865     if (mb_x < mb_width-1)
00866         XCHG(top_border+32, src_y+16, 1);
00867 
00868     // only copy chroma for normal loop filter
00869     // or to initialize the top row to 127
00870     if (!simple || !mb_y) {
00871         XCHG(top_border_m1+16, src_cb-8, xchg);
00872         XCHG(top_border_m1+24, src_cr-8, xchg);
00873         XCHG(top_border+16,    src_cb, 1);
00874         XCHG(top_border+24,    src_cr, 1);
00875     }
00876 }
00877 
00878 static av_always_inline
00879 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
00880 {
00881     if (!mb_x) {
00882         return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
00883     } else {
00884         return mb_y ? mode : LEFT_DC_PRED8x8;
00885     }
00886 }
00887 
00888 static av_always_inline
00889 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
00890 {
00891     if (!mb_x) {
00892         return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
00893     } else {
00894         return mb_y ? mode : HOR_PRED8x8;
00895     }
00896 }
00897 
00898 static av_always_inline
00899 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
00900 {
00901     if (mode == DC_PRED8x8) {
00902         return check_dc_pred8x8_mode(mode, mb_x, mb_y);
00903     } else {
00904         return mode;
00905     }
00906 }
00907 
00908 static av_always_inline
00909 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
00910 {
00911     switch (mode) {
00912     case DC_PRED8x8:
00913         return check_dc_pred8x8_mode(mode, mb_x, mb_y);
00914     case VERT_PRED8x8:
00915         return !mb_y ? DC_127_PRED8x8 : mode;
00916     case HOR_PRED8x8:
00917         return !mb_x ? DC_129_PRED8x8 : mode;
00918     case PLANE_PRED8x8 /*TM*/:
00919         return check_tm_pred8x8_mode(mode, mb_x, mb_y);
00920     }
00921     return mode;
00922 }
00923 
00924 static av_always_inline
00925 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
00926 {
00927     if (!mb_x) {
00928         return mb_y ? VERT_VP8_PRED : DC_129_PRED;
00929     } else {
00930         return mb_y ? mode : HOR_VP8_PRED;
00931     }
00932 }
00933 
00934 static av_always_inline
00935 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
00936 {
00937     switch (mode) {
00938     case VERT_PRED:
00939         if (!mb_x && mb_y) {
00940             *copy_buf = 1;
00941             return mode;
00942         }
00943         /* fall-through */
00944     case DIAG_DOWN_LEFT_PRED:
00945     case VERT_LEFT_PRED:
00946         return !mb_y ? DC_127_PRED : mode;
00947     case HOR_PRED:
00948         if (!mb_y) {
00949             *copy_buf = 1;
00950             return mode;
00951         }
00952         /* fall-through */
00953     case HOR_UP_PRED:
00954         return !mb_x ? DC_129_PRED : mode;
00955     case TM_VP8_PRED:
00956         return check_tm_pred4x4_mode(mode, mb_x, mb_y);
00957     case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
00958     case DIAG_DOWN_RIGHT_PRED:
00959     case VERT_RIGHT_PRED:
00960     case HOR_DOWN_PRED:
00961         if (!mb_y || !mb_x)
00962             *copy_buf = 1;
00963         return mode;
00964     }
00965     return mode;
00966 }
00967 
00968 static av_always_inline
00969 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
00970                    int mb_x, int mb_y)
00971 {
00972     AVCodecContext *avctx = s->avctx;
00973     int x, y, mode, nnz;
00974     uint32_t tr;
00975 
00976     // for the first row, we need to run xchg_mb_border to init the top edge to 127
00977     // otherwise, skip it if we aren't going to deblock
00978     if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
00979         xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
00980                        s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
00981                        s->filter.simple, 1);
00982 
00983     if (mb->mode < MODE_I4x4) {
00984         if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
00985             mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
00986         } else {
00987             mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
00988         }
00989         s->hpc.pred16x16[mode](dst[0], s->linesize);
00990     } else {
00991         uint8_t *ptr = dst[0];
00992         uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
00993         uint8_t tr_top[4] = { 127, 127, 127, 127 };
00994 
00995         // all blocks on the right edge of the macroblock use bottom edge
00996         // the top macroblock for their topright edge
00997         uint8_t *tr_right = ptr - s->linesize + 16;
00998 
00999         // if we're on the right edge of the frame, said edge is extended
01000         // from the top macroblock
01001         if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
01002             mb_x == s->mb_width-1) {
01003             tr = tr_right[-1]*0x01010101u;
01004             tr_right = (uint8_t *)&tr;
01005         }
01006 
01007         if (mb->skip)
01008             AV_ZERO128(s->non_zero_count_cache);
01009 
01010         for (y = 0; y < 4; y++) {
01011             uint8_t *topright = ptr + 4 - s->linesize;
01012             for (x = 0; x < 4; x++) {
01013                 int copy = 0, linesize = s->linesize;
01014                 uint8_t *dst = ptr+4*x;
01015                 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
01016 
01017                 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
01018                     topright = tr_top;
01019                 } else if (x == 3)
01020                     topright = tr_right;
01021 
01022                 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
01023                     mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
01024                     if (copy) {
01025                         dst = copy_dst + 12;
01026                         linesize = 8;
01027                         if (!(mb_y + y)) {
01028                             copy_dst[3] = 127U;
01029                             AV_WN32A(copy_dst+4, 127U * 0x01010101U);
01030                         } else {
01031                             AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
01032                             if (!(mb_x + x)) {
01033                                 copy_dst[3] = 129U;
01034                             } else {
01035                                 copy_dst[3] = ptr[4*x-s->linesize-1];
01036                             }
01037                         }
01038                         if (!(mb_x + x)) {
01039                             copy_dst[11] =
01040                             copy_dst[19] =
01041                             copy_dst[27] =
01042                             copy_dst[35] = 129U;
01043                         } else {
01044                             copy_dst[11] = ptr[4*x              -1];
01045                             copy_dst[19] = ptr[4*x+s->linesize  -1];
01046                             copy_dst[27] = ptr[4*x+s->linesize*2-1];
01047                             copy_dst[35] = ptr[4*x+s->linesize*3-1];
01048                         }
01049                     }
01050                 } else {
01051                     mode = intra4x4[x];
01052                 }
01053                 s->hpc.pred4x4[mode](dst, topright, linesize);
01054                 if (copy) {
01055                     AV_COPY32(ptr+4*x              , copy_dst+12);
01056                     AV_COPY32(ptr+4*x+s->linesize  , copy_dst+20);
01057                     AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
01058                     AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
01059                 }
01060 
01061                 nnz = s->non_zero_count_cache[y][x];
01062                 if (nnz) {
01063                     if (nnz == 1)
01064                         s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
01065                     else
01066                         s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
01067                 }
01068                 topright += 4;
01069             }
01070 
01071             ptr   += 4*s->linesize;
01072             intra4x4 += 4;
01073         }
01074     }
01075 
01076     if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
01077         mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
01078     } else {
01079         mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
01080     }
01081     s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
01082     s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
01083 
01084     if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
01085         xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
01086                        s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
01087                        s->filter.simple, 0);
01088 }
01089 
01090 static const uint8_t subpel_idx[3][8] = {
01091     { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
01092                                 // also function pointer index
01093     { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
01094     { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
01095 };
01096 
01113 static av_always_inline
01114 void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
01115                  int x_off, int y_off, int block_w, int block_h,
01116                  int width, int height, int linesize,
01117                  vp8_mc_func mc_func[3][3])
01118 {
01119     uint8_t *src = ref->data[0];
01120 
01121     if (AV_RN32A(mv)) {
01122 
01123         int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
01124         int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
01125 
01126         x_off += mv->x >> 2;
01127         y_off += mv->y >> 2;
01128 
01129         // edge emulation
01130         ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
01131         src += y_off * linesize + x_off;
01132         if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
01133             y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
01134             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
01135                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01136                                     x_off - mx_idx, y_off - my_idx, width, height);
01137             src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
01138         }
01139         mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
01140     } else {
01141         ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
01142         mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
01143     }
01144 }
01145 
01163 static av_always_inline
01164 void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
01165                    const VP56mv *mv, int x_off, int y_off,
01166                    int block_w, int block_h, int width, int height, int linesize,
01167                    vp8_mc_func mc_func[3][3])
01168 {
01169     uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
01170 
01171     if (AV_RN32A(mv)) {
01172         int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
01173         int my = mv->y&7, my_idx = subpel_idx[0][my];
01174 
01175         x_off += mv->x >> 3;
01176         y_off += mv->y >> 3;
01177 
01178         // edge emulation
01179         src1 += y_off * linesize + x_off;
01180         src2 += y_off * linesize + x_off;
01181         ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
01182         if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
01183             y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
01184             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
01185                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01186                                     x_off - mx_idx, y_off - my_idx, width, height);
01187             src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
01188             mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
01189 
01190             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
01191                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01192                                     x_off - mx_idx, y_off - my_idx, width, height);
01193             src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
01194             mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
01195         } else {
01196             mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
01197             mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
01198         }
01199     } else {
01200         ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
01201         mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
01202         mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
01203     }
01204 }
01205 
01206 static av_always_inline
01207 void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
01208                  AVFrame *ref_frame, int x_off, int y_off,
01209                  int bx_off, int by_off,
01210                  int block_w, int block_h,
01211                  int width, int height, VP56mv *mv)
01212 {
01213     VP56mv uvmv = *mv;
01214 
01215     /* Y */
01216     vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
01217                 ref_frame, mv, x_off + bx_off, y_off + by_off,
01218                 block_w, block_h, width, height, s->linesize,
01219                 s->put_pixels_tab[block_w == 8]);
01220 
01221     /* U/V */
01222     if (s->profile == 3) {
01223         uvmv.x &= ~7;
01224         uvmv.y &= ~7;
01225     }
01226     x_off   >>= 1; y_off   >>= 1;
01227     bx_off  >>= 1; by_off  >>= 1;
01228     width   >>= 1; height  >>= 1;
01229     block_w >>= 1; block_h >>= 1;
01230     vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
01231                   dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
01232                   &uvmv, x_off + bx_off, y_off + by_off,
01233                   block_w, block_h, width, height, s->uvlinesize,
01234                   s->put_pixels_tab[1 + (block_w == 4)]);
01235 }
01236 
01237 /* Fetch pixels for estimated mv 4 macroblocks ahead.
01238  * Optimized for 64-byte cache lines.  Inspired by ffh264 prefetch_motion. */
01239 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
01240 {
01241     /* Don't prefetch refs that haven't been used very often this frame. */
01242     if (s->ref_count[ref-1] > (mb_xy >> 5)) {
01243         int x_off = mb_x << 4, y_off = mb_y << 4;
01244         int mx = (mb->mv.x>>2) + x_off + 8;
01245         int my = (mb->mv.y>>2) + y_off;
01246         uint8_t **src= s->framep[ref]->data;
01247         int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
01248         /* For threading, a ff_thread_await_progress here might be useful, but
01249          * it actually slows down the decoder. Since a bad prefetch doesn't
01250          * generate bad decoder output, we don't run it here. */
01251         s->dsp.prefetch(src[0]+off, s->linesize, 4);
01252         off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
01253         s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
01254     }
01255 }
01256 
01260 static av_always_inline
01261 void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
01262                    int mb_x, int mb_y)
01263 {
01264     int x_off = mb_x << 4, y_off = mb_y << 4;
01265     int width = 16*s->mb_width, height = 16*s->mb_height;
01266     AVFrame *ref = s->framep[mb->ref_frame];
01267     VP56mv *bmv = mb->bmv;
01268 
01269     switch (mb->partitioning) {
01270     case VP8_SPLITMVMODE_NONE:
01271         vp8_mc_part(s, dst, ref, x_off, y_off,
01272                     0, 0, 16, 16, width, height, &mb->mv);
01273         break;
01274     case VP8_SPLITMVMODE_4x4: {
01275         int x, y;
01276         VP56mv uvmv;
01277 
01278         /* Y */
01279         for (y = 0; y < 4; y++) {
01280             for (x = 0; x < 4; x++) {
01281                 vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
01282                             ref, &bmv[4*y + x],
01283                             4*x + x_off, 4*y + y_off, 4, 4,
01284                             width, height, s->linesize,
01285                             s->put_pixels_tab[2]);
01286             }
01287         }
01288 
01289         /* U/V */
01290         x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
01291         for (y = 0; y < 2; y++) {
01292             for (x = 0; x < 2; x++) {
01293                 uvmv.x = mb->bmv[ 2*y    * 4 + 2*x  ].x +
01294                          mb->bmv[ 2*y    * 4 + 2*x+1].x +
01295                          mb->bmv[(2*y+1) * 4 + 2*x  ].x +
01296                          mb->bmv[(2*y+1) * 4 + 2*x+1].x;
01297                 uvmv.y = mb->bmv[ 2*y    * 4 + 2*x  ].y +
01298                          mb->bmv[ 2*y    * 4 + 2*x+1].y +
01299                          mb->bmv[(2*y+1) * 4 + 2*x  ].y +
01300                          mb->bmv[(2*y+1) * 4 + 2*x+1].y;
01301                 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
01302                 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
01303                 if (s->profile == 3) {
01304                     uvmv.x &= ~7;
01305                     uvmv.y &= ~7;
01306                 }
01307                 vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
01308                               dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
01309                               4*x + x_off, 4*y + y_off, 4, 4,
01310                               width, height, s->uvlinesize,
01311                               s->put_pixels_tab[2]);
01312             }
01313         }
01314         break;
01315     }
01316     case VP8_SPLITMVMODE_16x8:
01317         vp8_mc_part(s, dst, ref, x_off, y_off,
01318                     0, 0, 16, 8, width, height, &bmv[0]);
01319         vp8_mc_part(s, dst, ref, x_off, y_off,
01320                     0, 8, 16, 8, width, height, &bmv[1]);
01321         break;
01322     case VP8_SPLITMVMODE_8x16:
01323         vp8_mc_part(s, dst, ref, x_off, y_off,
01324                     0, 0, 8, 16, width, height, &bmv[0]);
01325         vp8_mc_part(s, dst, ref, x_off, y_off,
01326                     8, 0, 8, 16, width, height, &bmv[1]);
01327         break;
01328     case VP8_SPLITMVMODE_8x8:
01329         vp8_mc_part(s, dst, ref, x_off, y_off,
01330                     0, 0, 8, 8, width, height, &bmv[0]);
01331         vp8_mc_part(s, dst, ref, x_off, y_off,
01332                     8, 0, 8, 8, width, height, &bmv[1]);
01333         vp8_mc_part(s, dst, ref, x_off, y_off,
01334                     0, 8, 8, 8, width, height, &bmv[2]);
01335         vp8_mc_part(s, dst, ref, x_off, y_off,
01336                     8, 8, 8, 8, width, height, &bmv[3]);
01337         break;
01338     }
01339 }
01340 
01341 static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
01342 {
01343     int x, y, ch;
01344 
01345     if (mb->mode != MODE_I4x4) {
01346         uint8_t *y_dst = dst[0];
01347         for (y = 0; y < 4; y++) {
01348             uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
01349             if (nnz4) {
01350                 if (nnz4&~0x01010101) {
01351                     for (x = 0; x < 4; x++) {
01352                         if ((uint8_t)nnz4 == 1)
01353                             s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
01354                         else if((uint8_t)nnz4 > 1)
01355                             s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
01356                         nnz4 >>= 8;
01357                         if (!nnz4)
01358                             break;
01359                     }
01360                 } else {
01361                     s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
01362                 }
01363             }
01364             y_dst += 4*s->linesize;
01365         }
01366     }
01367 
01368     for (ch = 0; ch < 2; ch++) {
01369         uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
01370         if (nnz4) {
01371             uint8_t *ch_dst = dst[1+ch];
01372             if (nnz4&~0x01010101) {
01373                 for (y = 0; y < 2; y++) {
01374                     for (x = 0; x < 2; x++) {
01375                         if ((uint8_t)nnz4 == 1)
01376                             s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
01377                         else if((uint8_t)nnz4 > 1)
01378                             s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
01379                         nnz4 >>= 8;
01380                         if (!nnz4)
01381                             goto chroma_idct_end;
01382                     }
01383                     ch_dst += 4*s->uvlinesize;
01384                 }
01385             } else {
01386                 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
01387             }
01388         }
01389 chroma_idct_end: ;
01390     }
01391 }
01392 
01393 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
01394 {
01395     int interior_limit, filter_level;
01396 
01397     if (s->segmentation.enabled) {
01398         filter_level = s->segmentation.filter_level[s->segment];
01399         if (!s->segmentation.absolute_vals)
01400             filter_level += s->filter.level;
01401     } else
01402         filter_level = s->filter.level;
01403 
01404     if (s->lf_delta.enabled) {
01405         filter_level += s->lf_delta.ref[mb->ref_frame];
01406         filter_level += s->lf_delta.mode[mb->mode];
01407     }
01408 
01409     filter_level = av_clip_uintp2(filter_level, 6);
01410 
01411     interior_limit = filter_level;
01412     if (s->filter.sharpness) {
01413         interior_limit >>= (s->filter.sharpness + 3) >> 2;
01414         interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
01415     }
01416     interior_limit = FFMAX(interior_limit, 1);
01417 
01418     f->filter_level = filter_level;
01419     f->inner_limit = interior_limit;
01420     f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
01421 }
01422 
01423 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
01424 {
01425     int mbedge_lim, bedge_lim, hev_thresh;
01426     int filter_level = f->filter_level;
01427     int inner_limit = f->inner_limit;
01428     int inner_filter = f->inner_filter;
01429     int linesize = s->linesize;
01430     int uvlinesize = s->uvlinesize;
01431     static const uint8_t hev_thresh_lut[2][64] = {
01432         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
01433           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
01434           3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
01435           3, 3, 3, 3 },
01436         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
01437           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01438           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
01439           2, 2, 2, 2 }
01440     };
01441 
01442     if (!filter_level)
01443         return;
01444 
01445      bedge_lim = 2*filter_level + inner_limit;
01446     mbedge_lim = bedge_lim + 4;
01447 
01448     hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
01449 
01450     if (mb_x) {
01451         s->vp8dsp.vp8_h_loop_filter16y(dst[0],     linesize,
01452                                        mbedge_lim, inner_limit, hev_thresh);
01453         s->vp8dsp.vp8_h_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
01454                                        mbedge_lim, inner_limit, hev_thresh);
01455     }
01456 
01457     if (inner_filter) {
01458         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
01459                                              inner_limit, hev_thresh);
01460         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
01461                                              inner_limit, hev_thresh);
01462         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
01463                                              inner_limit, hev_thresh);
01464         s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
01465                                              uvlinesize,  bedge_lim,
01466                                              inner_limit, hev_thresh);
01467     }
01468 
01469     if (mb_y) {
01470         s->vp8dsp.vp8_v_loop_filter16y(dst[0],     linesize,
01471                                        mbedge_lim, inner_limit, hev_thresh);
01472         s->vp8dsp.vp8_v_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
01473                                        mbedge_lim, inner_limit, hev_thresh);
01474     }
01475 
01476     if (inner_filter) {
01477         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
01478                                              linesize,    bedge_lim,
01479                                              inner_limit, hev_thresh);
01480         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
01481                                              linesize,    bedge_lim,
01482                                              inner_limit, hev_thresh);
01483         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
01484                                              linesize,    bedge_lim,
01485                                              inner_limit, hev_thresh);
01486         s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
01487                                              dst[2] + 4 * uvlinesize,
01488                                              uvlinesize,  bedge_lim,
01489                                              inner_limit, hev_thresh);
01490     }
01491 }
01492 
01493 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
01494 {
01495     int mbedge_lim, bedge_lim;
01496     int filter_level = f->filter_level;
01497     int inner_limit = f->inner_limit;
01498     int inner_filter = f->inner_filter;
01499     int linesize = s->linesize;
01500 
01501     if (!filter_level)
01502         return;
01503 
01504      bedge_lim = 2*filter_level + inner_limit;
01505     mbedge_lim = bedge_lim + 4;
01506 
01507     if (mb_x)
01508         s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
01509     if (inner_filter) {
01510         s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
01511         s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
01512         s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
01513     }
01514 
01515     if (mb_y)
01516         s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
01517     if (inner_filter) {
01518         s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
01519         s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
01520         s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
01521     }
01522 }
01523 
01524 static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y)
01525 {
01526     VP8FilterStrength *f = s->filter_strength;
01527     uint8_t *dst[3] = {
01528         curframe->data[0] + 16*mb_y*s->linesize,
01529         curframe->data[1] +  8*mb_y*s->uvlinesize,
01530         curframe->data[2] +  8*mb_y*s->uvlinesize
01531     };
01532     int mb_x;
01533 
01534     for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
01535         backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
01536         filter_mb(s, dst, f++, mb_x, mb_y);
01537         dst[0] += 16;
01538         dst[1] += 8;
01539         dst[2] += 8;
01540     }
01541 }
01542 
01543 static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
01544 {
01545     VP8FilterStrength *f = s->filter_strength;
01546     uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize;
01547     int mb_x;
01548 
01549     for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
01550         backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
01551         filter_mb_simple(s, dst, f++, mb_x, mb_y);
01552         dst += 16;
01553     }
01554 }
01555 
01556 static void release_queued_segmaps(VP8Context *s, int is_close)
01557 {
01558     int leave_behind = is_close ? 0 : !s->maps_are_invalid;
01559     while (s->num_maps_to_be_freed > leave_behind)
01560         av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
01561     s->maps_are_invalid = 0;
01562 }
01563 
01564 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
01565                             AVPacket *avpkt)
01566 {
01567     VP8Context *s = avctx->priv_data;
01568     int ret, mb_x, mb_y, i, y, referenced;
01569     enum AVDiscard skip_thresh;
01570     AVFrame *av_uninit(curframe), *prev_frame;
01571 
01572     release_queued_segmaps(s, 0);
01573 
01574     if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
01575         return ret;
01576 
01577     prev_frame = s->framep[VP56_FRAME_CURRENT];
01578 
01579     referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
01580                                 || s->update_altref == VP56_FRAME_CURRENT;
01581 
01582     skip_thresh = !referenced ? AVDISCARD_NONREF :
01583                     !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
01584 
01585     if (avctx->skip_frame >= skip_thresh) {
01586         s->invisible = 1;
01587         goto skip_decode;
01588     }
01589     s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
01590 
01591     // release no longer referenced frames
01592     for (i = 0; i < 5; i++)
01593         if (s->frames[i].data[0] &&
01594             &s->frames[i] != prev_frame &&
01595             &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
01596             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
01597             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
01598             vp8_release_frame(s, &s->frames[i], 1, 0);
01599 
01600     // find a free buffer
01601     for (i = 0; i < 5; i++)
01602         if (&s->frames[i] != prev_frame &&
01603             &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
01604             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
01605             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
01606             curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
01607             break;
01608         }
01609     if (i == 5) {
01610         av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
01611         abort();
01612     }
01613     if (curframe->data[0])
01614         vp8_release_frame(s, curframe, 1, 0);
01615 
01616     curframe->key_frame = s->keyframe;
01617     curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
01618     curframe->reference = referenced ? 3 : 0;
01619     if ((ret = vp8_alloc_frame(s, curframe))) {
01620         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
01621         return ret;
01622     }
01623 
01624     // check if golden and altref are swapped
01625     if (s->update_altref != VP56_FRAME_NONE) {
01626         s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[s->update_altref];
01627     } else {
01628         s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[VP56_FRAME_GOLDEN2];
01629     }
01630     if (s->update_golden != VP56_FRAME_NONE) {
01631         s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[s->update_golden];
01632     } else {
01633         s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[VP56_FRAME_GOLDEN];
01634     }
01635     if (s->update_last) {
01636         s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
01637     } else {
01638         s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
01639     }
01640     s->next_framep[VP56_FRAME_CURRENT]      = curframe;
01641 
01642     ff_thread_finish_setup(avctx);
01643 
01644     // Given that arithmetic probabilities are updated every frame, it's quite likely
01645     // that the values we have on a random interframe are complete junk if we didn't
01646     // start decode on a keyframe. So just don't display anything rather than junk.
01647     if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
01648                          !s->framep[VP56_FRAME_GOLDEN] ||
01649                          !s->framep[VP56_FRAME_GOLDEN2])) {
01650         av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
01651         return AVERROR_INVALIDDATA;
01652     }
01653 
01654     s->linesize   = curframe->linesize[0];
01655     s->uvlinesize = curframe->linesize[1];
01656 
01657     if (!s->edge_emu_buffer)
01658         s->edge_emu_buffer = av_malloc(21*s->linesize);
01659 
01660     memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
01661 
01662     /* Zero macroblock structures for top/top-left prediction from outside the frame. */
01663     memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
01664 
01665     // top edge of 127 for intra prediction
01666     if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
01667         s->top_border[0][15] = s->top_border[0][23] = 127;
01668         memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
01669     }
01670     memset(s->ref_count, 0, sizeof(s->ref_count));
01671     if (s->keyframe)
01672         memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
01673 
01674 #define MARGIN (16 << 2)
01675     s->mv_min.y = -MARGIN;
01676     s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
01677 
01678     for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
01679         VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
01680         VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
01681         int mb_xy = mb_y*s->mb_width;
01682         uint8_t *dst[3] = {
01683             curframe->data[0] + 16*mb_y*s->linesize,
01684             curframe->data[1] +  8*mb_y*s->uvlinesize,
01685             curframe->data[2] +  8*mb_y*s->uvlinesize
01686         };
01687 
01688         memset(mb - 1, 0, sizeof(*mb));   // zero left macroblock
01689         memset(s->left_nnz, 0, sizeof(s->left_nnz));
01690         AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
01691 
01692         // left edge of 129 for intra prediction
01693         if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
01694             for (i = 0; i < 3; i++)
01695                 for (y = 0; y < 16>>!!i; y++)
01696                     dst[i][y*curframe->linesize[i]-1] = 129;
01697             if (mb_y == 1) // top left edge is also 129
01698                 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
01699         }
01700 
01701         s->mv_min.x = -MARGIN;
01702         s->mv_max.x = ((s->mb_width  - 1) << 6) + MARGIN;
01703         if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
01704             ff_thread_await_progress(prev_frame, mb_y, 0);
01705 
01706         for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
01707             /* Prefetch the current frame, 4 MBs ahead */
01708             s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
01709             s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
01710 
01711             decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
01712                            prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL);
01713 
01714             prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
01715 
01716             if (!mb->skip)
01717                 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
01718 
01719             if (mb->mode <= MODE_I4x4)
01720                 intra_predict(s, dst, mb, mb_x, mb_y);
01721             else
01722                 inter_predict(s, dst, mb, mb_x, mb_y);
01723 
01724             prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
01725 
01726             if (!mb->skip) {
01727                 idct_mb(s, dst, mb);
01728             } else {
01729                 AV_ZERO64(s->left_nnz);
01730                 AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned
01731 
01732                 // Reset DC block predictors if they would exist if the mb had coefficients
01733                 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
01734                     s->left_nnz[8]      = 0;
01735                     s->top_nnz[mb_x][8] = 0;
01736                 }
01737             }
01738 
01739             if (s->deblock_filter)
01740                 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
01741 
01742             prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
01743 
01744             dst[0] += 16;
01745             dst[1] += 8;
01746             dst[2] += 8;
01747             s->mv_min.x -= 64;
01748             s->mv_max.x -= 64;
01749         }
01750         if (s->deblock_filter) {
01751             if (s->filter.simple)
01752                 filter_mb_row_simple(s, curframe, mb_y);
01753             else
01754                 filter_mb_row(s, curframe, mb_y);
01755         }
01756         s->mv_min.y -= 64;
01757         s->mv_max.y -= 64;
01758 
01759         ff_thread_report_progress(curframe, mb_y, 0);
01760     }
01761 
01762     ff_thread_report_progress(curframe, INT_MAX, 0);
01763 skip_decode:
01764     // if future frames don't use the updated probabilities,
01765     // reset them to the values we saved
01766     if (!s->update_probabilities)
01767         s->prob[0] = s->prob[1];
01768 
01769     memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
01770 
01771     if (!s->invisible) {
01772         *(AVFrame*)data = *curframe;
01773         *data_size = sizeof(AVFrame);
01774     }
01775 
01776     return avpkt->size;
01777 }
01778 
01779 static av_cold int vp8_decode_init(AVCodecContext *avctx)
01780 {
01781     VP8Context *s = avctx->priv_data;
01782 
01783     s->avctx = avctx;
01784     avctx->pix_fmt = PIX_FMT_YUV420P;
01785 
01786     dsputil_init(&s->dsp, avctx);
01787     ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1);
01788     ff_vp8dsp_init(&s->vp8dsp);
01789 
01790     return 0;
01791 }
01792 
01793 static av_cold int vp8_decode_free(AVCodecContext *avctx)
01794 {
01795     vp8_decode_flush_impl(avctx, 0, 1, 1);
01796     release_queued_segmaps(avctx->priv_data, 1);
01797     return 0;
01798 }
01799 
01800 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
01801 {
01802     VP8Context *s = avctx->priv_data;
01803 
01804     s->avctx = avctx;
01805 
01806     return 0;
01807 }
01808 
01809 #define REBASE(pic) \
01810     pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
01811 
01812 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
01813 {
01814     VP8Context *s = dst->priv_data, *s_src = src->priv_data;
01815 
01816     if (s->macroblocks_base &&
01817         (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
01818         free_buffers(s);
01819         s->maps_are_invalid = 1;
01820     }
01821 
01822     s->prob[0] = s_src->prob[!s_src->update_probabilities];
01823     s->segmentation = s_src->segmentation;
01824     s->lf_delta = s_src->lf_delta;
01825     memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
01826 
01827     memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
01828     s->framep[0] = REBASE(s_src->next_framep[0]);
01829     s->framep[1] = REBASE(s_src->next_framep[1]);
01830     s->framep[2] = REBASE(s_src->next_framep[2]);
01831     s->framep[3] = REBASE(s_src->next_framep[3]);
01832 
01833     return 0;
01834 }
01835 
01836 AVCodec ff_vp8_decoder = {
01837     .name           = "vp8",
01838     .type           = AVMEDIA_TYPE_VIDEO,
01839     .id             = CODEC_ID_VP8,
01840     .priv_data_size = sizeof(VP8Context),
01841     .init           = vp8_decode_init,
01842     .close          = vp8_decode_free,
01843     .decode         = vp8_decode_frame,
01844     .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
01845     .flush = vp8_decode_flush,
01846     .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
01847     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
01848     .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
01849 };