| // SPDX-License-Identifier: GPL-2.0+ |
| /* |
| * Copyright (C) 2018 Maxime Jourdan <mjourdan@baylibre.com> |
| * Copyright (C) 2015 Amlogic, Inc. All rights reserved. |
| */ |
| |
| #include <media/v4l2-mem2mem.h> |
| #include <media/videobuf2-dma-contig.h> |
| |
| #include "dos_regs.h" |
| #include "hevc_regs.h" |
| #include "codec_vp9.h" |
| #include "vdec_helpers.h" |
| #include "codec_hevc_common.h" |
| |
| /* HEVC reg mapping */ |
| #define VP9_DEC_STATUS_REG HEVC_ASSIST_SCRATCH_0 |
| #define VP9_10B_DECODE_SLICE 5 |
| #define VP9_HEAD_PARSER_DONE 0xf0 |
| #define VP9_RPM_BUFFER HEVC_ASSIST_SCRATCH_1 |
| #define VP9_SHORT_TERM_RPS HEVC_ASSIST_SCRATCH_2 |
| #define VP9_ADAPT_PROB_REG HEVC_ASSIST_SCRATCH_3 |
| #define VP9_MMU_MAP_BUFFER HEVC_ASSIST_SCRATCH_4 |
| #define VP9_PPS_BUFFER HEVC_ASSIST_SCRATCH_5 |
| #define VP9_SAO_UP HEVC_ASSIST_SCRATCH_6 |
| #define VP9_STREAM_SWAP_BUFFER HEVC_ASSIST_SCRATCH_7 |
| #define VP9_STREAM_SWAP_BUFFER2 HEVC_ASSIST_SCRATCH_8 |
| #define VP9_PROB_SWAP_BUFFER HEVC_ASSIST_SCRATCH_9 |
| #define VP9_COUNT_SWAP_BUFFER HEVC_ASSIST_SCRATCH_A |
| #define VP9_SEG_MAP_BUFFER HEVC_ASSIST_SCRATCH_B |
| #define VP9_SCALELUT HEVC_ASSIST_SCRATCH_D |
| #define VP9_WAIT_FLAG HEVC_ASSIST_SCRATCH_E |
| #define LMEM_DUMP_ADR HEVC_ASSIST_SCRATCH_F |
| #define NAL_SEARCH_CTL HEVC_ASSIST_SCRATCH_I |
| #define VP9_DECODE_MODE HEVC_ASSIST_SCRATCH_J |
| #define DECODE_MODE_SINGLE 0 |
| #define DECODE_STOP_POS HEVC_ASSIST_SCRATCH_K |
| #define HEVC_DECODE_COUNT HEVC_ASSIST_SCRATCH_M |
| #define HEVC_DECODE_SIZE HEVC_ASSIST_SCRATCH_N |
| |
| /* VP9 Constants */ |
| #define LCU_SIZE 64 |
| #define MAX_REF_PIC_NUM 24 |
| #define REFS_PER_FRAME 3 |
| #define REF_FRAMES 8 |
| #define MV_MEM_UNIT 0x240 |
| #define ADAPT_PROB_SIZE 0xf80 |
| |
| enum FRAME_TYPE { |
| KEY_FRAME = 0, |
| INTER_FRAME = 1, |
| FRAME_TYPES, |
| }; |
| |
| /* VP9 Workspace layout */ |
| #define MPRED_MV_BUF_SIZE 0x120000 |
| |
| #define IPP_SIZE 0x4000 |
| #define SAO_ABV_SIZE 0x30000 |
| #define SAO_VB_SIZE 0x30000 |
| #define SH_TM_RPS_SIZE 0x800 |
| #define VPS_SIZE 0x800 |
| #define SPS_SIZE 0x800 |
| #define PPS_SIZE 0x2000 |
| #define SAO_UP_SIZE 0x2800 |
| #define SWAP_BUF_SIZE 0x800 |
| #define SWAP_BUF2_SIZE 0x800 |
| #define SCALELUT_SIZE 0x8000 |
| #define DBLK_PARA_SIZE 0x80000 |
| #define DBLK_DATA_SIZE 0x80000 |
| #define SEG_MAP_SIZE 0xd800 |
| #define PROB_SIZE 0x5000 |
| #define COUNT_SIZE 0x3000 |
| #define MMU_VBH_SIZE 0x5000 |
| #define MPRED_ABV_SIZE 0x10000 |
| #define MPRED_MV_SIZE (MPRED_MV_BUF_SIZE * MAX_REF_PIC_NUM) |
| #define RPM_BUF_SIZE 0x100 |
| #define LMEM_SIZE 0x800 |
| |
| #define IPP_OFFSET 0x00 |
| #define SAO_ABV_OFFSET (IPP_OFFSET + IPP_SIZE) |
| #define SAO_VB_OFFSET (SAO_ABV_OFFSET + SAO_ABV_SIZE) |
| #define SH_TM_RPS_OFFSET (SAO_VB_OFFSET + SAO_VB_SIZE) |
| #define VPS_OFFSET (SH_TM_RPS_OFFSET + SH_TM_RPS_SIZE) |
| #define SPS_OFFSET (VPS_OFFSET + VPS_SIZE) |
| #define PPS_OFFSET (SPS_OFFSET + SPS_SIZE) |
| #define SAO_UP_OFFSET (PPS_OFFSET + PPS_SIZE) |
| #define SWAP_BUF_OFFSET (SAO_UP_OFFSET + SAO_UP_SIZE) |
| #define SWAP_BUF2_OFFSET (SWAP_BUF_OFFSET + SWAP_BUF_SIZE) |
| #define SCALELUT_OFFSET (SWAP_BUF2_OFFSET + SWAP_BUF2_SIZE) |
| #define DBLK_PARA_OFFSET (SCALELUT_OFFSET + SCALELUT_SIZE) |
| #define DBLK_DATA_OFFSET (DBLK_PARA_OFFSET + DBLK_PARA_SIZE) |
| #define SEG_MAP_OFFSET (DBLK_DATA_OFFSET + DBLK_DATA_SIZE) |
| #define PROB_OFFSET (SEG_MAP_OFFSET + SEG_MAP_SIZE) |
| #define COUNT_OFFSET (PROB_OFFSET + PROB_SIZE) |
| #define MMU_VBH_OFFSET (COUNT_OFFSET + COUNT_SIZE) |
| #define MPRED_ABV_OFFSET (MMU_VBH_OFFSET + MMU_VBH_SIZE) |
| #define MPRED_MV_OFFSET (MPRED_ABV_OFFSET + MPRED_ABV_SIZE) |
| #define RPM_OFFSET (MPRED_MV_OFFSET + MPRED_MV_SIZE) |
| #define LMEM_OFFSET (RPM_OFFSET + RPM_BUF_SIZE) |
| |
| #define SIZE_WORKSPACE ALIGN(LMEM_OFFSET + LMEM_SIZE, 64 * SZ_1K) |
| |
| #define NONE -1 |
| #define INTRA_FRAME 0 |
| #define LAST_FRAME 1 |
| #define GOLDEN_FRAME 2 |
| #define ALTREF_FRAME 3 |
| #define MAX_REF_FRAMES 4 |
| |
| /* |
| * Defines, declarations, sub-functions for vp9 de-block loop |
| filter Thr/Lvl table update |
| * - struct segmentation is for loop filter only (removed something) |
| * - function "vp9_loop_filter_init" and "vp9_loop_filter_frame_init" will |
| be instantiated in C_Entry |
| * - vp9_loop_filter_init run once before decoding start |
| * - vp9_loop_filter_frame_init run before every frame decoding start |
| * - set video format to VP9 is in vp9_loop_filter_init |
| */ |
| #define MAX_LOOP_FILTER 63 |
| #define MAX_REF_LF_DELTAS 4 |
| #define MAX_MODE_LF_DELTAS 2 |
| #define SEGMENT_DELTADATA 0 |
| #define SEGMENT_ABSDATA 1 |
| #define MAX_SEGMENTS 8 |
| |
| /* VP9 PROB processing defines */ |
| #define VP9_PARTITION_START 0 |
| #define VP9_PARTITION_SIZE_STEP (3 * 4) |
| #define VP9_PARTITION_ONE_SIZE (4 * VP9_PARTITION_SIZE_STEP) |
| #define VP9_PARTITION_KEY_START 0 |
| #define VP9_PARTITION_P_START VP9_PARTITION_ONE_SIZE |
| #define VP9_PARTITION_SIZE (2 * VP9_PARTITION_ONE_SIZE) |
| #define VP9_SKIP_START (VP9_PARTITION_START + VP9_PARTITION_SIZE) |
| #define VP9_SKIP_SIZE 4 /* only use 3*/ |
| #define VP9_TX_MODE_START (VP9_SKIP_START + VP9_SKIP_SIZE) |
| #define VP9_TX_MODE_8_0_OFFSET 0 |
| #define VP9_TX_MODE_8_1_OFFSET 1 |
| #define VP9_TX_MODE_16_0_OFFSET 2 |
| #define VP9_TX_MODE_16_1_OFFSET 4 |
| #define VP9_TX_MODE_32_0_OFFSET 6 |
| #define VP9_TX_MODE_32_1_OFFSET 9 |
| #define VP9_TX_MODE_SIZE 12 |
| #define VP9_COEF_START (VP9_TX_MODE_START + VP9_TX_MODE_SIZE) |
| #define VP9_COEF_BAND_0_OFFSET 0 |
| #define VP9_COEF_BAND_1_OFFSET (VP9_COEF_BAND_0_OFFSET + 3 * 3 + 1) |
| #define VP9_COEF_BAND_2_OFFSET (VP9_COEF_BAND_1_OFFSET + 6 * 3) |
| #define VP9_COEF_BAND_3_OFFSET (VP9_COEF_BAND_2_OFFSET + 6 * 3) |
| #define VP9_COEF_BAND_4_OFFSET (VP9_COEF_BAND_3_OFFSET + 6 * 3) |
| #define VP9_COEF_BAND_5_OFFSET (VP9_COEF_BAND_4_OFFSET + 6 * 3) |
| #define VP9_COEF_SIZE_ONE_SET 100 /* ((3 + 5 * 6) * 3 + 1 padding)*/ |
| #define VP9_COEF_4X4_START (VP9_COEF_START + 0 * VP9_COEF_SIZE_ONE_SET) |
| #define VP9_COEF_8X8_START (VP9_COEF_START + 4 * VP9_COEF_SIZE_ONE_SET) |
| #define VP9_COEF_16X16_START (VP9_COEF_START + 8 * VP9_COEF_SIZE_ONE_SET) |
| #define VP9_COEF_32X32_START (VP9_COEF_START + 12 * VP9_COEF_SIZE_ONE_SET) |
| #define VP9_COEF_SIZE_PLANE (2 * VP9_COEF_SIZE_ONE_SET) |
| #define VP9_COEF_SIZE (4 * 2 * 2 * VP9_COEF_SIZE_ONE_SET) |
| #define VP9_INTER_MODE_START (VP9_COEF_START + VP9_COEF_SIZE) |
| #define VP9_INTER_MODE_SIZE 24 /* only use 21 (# * 7)*/ |
| #define VP9_INTERP_START (VP9_INTER_MODE_START + VP9_INTER_MODE_SIZE) |
| #define VP9_INTERP_SIZE 8 |
| #define VP9_INTRA_INTER_START (VP9_INTERP_START + VP9_INTERP_SIZE) |
| #define VP9_INTRA_INTER_SIZE 4 |
| #define VP9_INTERP_INTRA_INTER_START VP9_INTERP_START |
| #define VP9_INTERP_INTRA_INTER_SIZE (VP9_INTERP_SIZE + VP9_INTRA_INTER_SIZE) |
| #define VP9_COMP_INTER_START \ |
| (VP9_INTERP_INTRA_INTER_START + VP9_INTERP_INTRA_INTER_SIZE) |
| #define VP9_COMP_INTER_SIZE 5 |
| #define VP9_COMP_REF_START (VP9_COMP_INTER_START + VP9_COMP_INTER_SIZE) |
| #define VP9_COMP_REF_SIZE 5 |
| #define VP9_SINGLE_REF_START (VP9_COMP_REF_START + VP9_COMP_REF_SIZE) |
| #define VP9_SINGLE_REF_SIZE 10 |
| #define VP9_REF_MODE_START VP9_COMP_INTER_START |
| #define VP9_REF_MODE_SIZE \ |
| (VP9_COMP_INTER_SIZE + VP9_COMP_REF_SIZE + VP9_SINGLE_REF_SIZE) |
| #define VP9_IF_Y_MODE_START (VP9_REF_MODE_START + VP9_REF_MODE_SIZE) |
| #define VP9_IF_Y_MODE_SIZE 36 |
| #define VP9_IF_UV_MODE_START (VP9_IF_Y_MODE_START + VP9_IF_Y_MODE_SIZE) |
| #define VP9_IF_UV_MODE_SIZE 92 /* only use 90*/ |
| #define VP9_MV_JOINTS_START (VP9_IF_UV_MODE_START + VP9_IF_UV_MODE_SIZE) |
| #define VP9_MV_JOINTS_SIZE 3 |
| #define VP9_MV_SIGN_0_START (VP9_MV_JOINTS_START + VP9_MV_JOINTS_SIZE) |
| #define VP9_MV_SIGN_0_SIZE 1 |
| #define VP9_MV_CLASSES_0_START (VP9_MV_SIGN_0_START + VP9_MV_SIGN_0_SIZE) |
| #define VP9_MV_CLASSES_0_SIZE 10 |
| #define VP9_MV_CLASS0_0_START \ |
| (VP9_MV_CLASSES_0_START + VP9_MV_CLASSES_0_SIZE) |
| #define VP9_MV_CLASS0_0_SIZE 1 |
| #define VP9_MV_BITS_0_START (VP9_MV_CLASS0_0_START + VP9_MV_CLASS0_0_SIZE) |
| #define VP9_MV_BITS_0_SIZE 10 |
| #define VP9_MV_SIGN_1_START (VP9_MV_BITS_0_START + VP9_MV_BITS_0_SIZE) |
| #define VP9_MV_SIGN_1_SIZE 1 |
| #define VP9_MV_CLASSES_1_START \ |
| (VP9_MV_SIGN_1_START + VP9_MV_SIGN_1_SIZE) |
| #define VP9_MV_CLASSES_1_SIZE 10 |
| #define VP9_MV_CLASS0_1_START \ |
| (VP9_MV_CLASSES_1_START + VP9_MV_CLASSES_1_SIZE) |
| #define VP9_MV_CLASS0_1_SIZE 1 |
| #define VP9_MV_BITS_1_START \ |
| (VP9_MV_CLASS0_1_START + VP9_MV_CLASS0_1_SIZE) |
| #define VP9_MV_BITS_1_SIZE 10 |
| #define VP9_MV_CLASS0_FP_0_START \ |
| (VP9_MV_BITS_1_START + VP9_MV_BITS_1_SIZE) |
| #define VP9_MV_CLASS0_FP_0_SIZE 9 |
| #define VP9_MV_CLASS0_FP_1_START \ |
| (VP9_MV_CLASS0_FP_0_START + VP9_MV_CLASS0_FP_0_SIZE) |
| #define VP9_MV_CLASS0_FP_1_SIZE 9 |
| #define VP9_MV_CLASS0_HP_0_START \ |
| (VP9_MV_CLASS0_FP_1_START + VP9_MV_CLASS0_FP_1_SIZE) |
| #define VP9_MV_CLASS0_HP_0_SIZE 2 |
| #define VP9_MV_CLASS0_HP_1_START \ |
| (VP9_MV_CLASS0_HP_0_START + VP9_MV_CLASS0_HP_0_SIZE) |
| #define VP9_MV_CLASS0_HP_1_SIZE 2 |
| #define VP9_MV_START VP9_MV_JOINTS_START |
| #define VP9_MV_SIZE 72 /*only use 69*/ |
| |
| #define VP9_TOTAL_SIZE (VP9_MV_START + VP9_MV_SIZE) |
| |
| /* VP9 COUNT mem processing defines */ |
| #define VP9_COEF_COUNT_START 0 |
| #define VP9_COEF_COUNT_BAND_0_OFFSET 0 |
| #define VP9_COEF_COUNT_BAND_1_OFFSET \ |
| (VP9_COEF_COUNT_BAND_0_OFFSET + 3 * 5) |
| #define VP9_COEF_COUNT_BAND_2_OFFSET \ |
| (VP9_COEF_COUNT_BAND_1_OFFSET + 6 * 5) |
| #define VP9_COEF_COUNT_BAND_3_OFFSET \ |
| (VP9_COEF_COUNT_BAND_2_OFFSET + 6 * 5) |
| #define VP9_COEF_COUNT_BAND_4_OFFSET \ |
| (VP9_COEF_COUNT_BAND_3_OFFSET + 6 * 5) |
| #define VP9_COEF_COUNT_BAND_5_OFFSET \ |
| (VP9_COEF_COUNT_BAND_4_OFFSET + 6 * 5) |
| #define VP9_COEF_COUNT_SIZE_ONE_SET 165 /* ((3 + 5 * 6) * 5 */ |
| #define VP9_COEF_COUNT_4X4_START \ |
| (VP9_COEF_COUNT_START + 0 * VP9_COEF_COUNT_SIZE_ONE_SET) |
| #define VP9_COEF_COUNT_8X8_START \ |
| (VP9_COEF_COUNT_START + 4 * VP9_COEF_COUNT_SIZE_ONE_SET) |
| #define VP9_COEF_COUNT_16X16_START \ |
| (VP9_COEF_COUNT_START + 8 * VP9_COEF_COUNT_SIZE_ONE_SET) |
| #define VP9_COEF_COUNT_32X32_START \ |
| (VP9_COEF_COUNT_START + 12 * VP9_COEF_COUNT_SIZE_ONE_SET) |
| #define VP9_COEF_COUNT_SIZE_PLANE (2 * VP9_COEF_COUNT_SIZE_ONE_SET) |
| #define VP9_COEF_COUNT_SIZE (4 * 2 * 2 * VP9_COEF_COUNT_SIZE_ONE_SET) |
| |
| #define VP9_INTRA_INTER_COUNT_START \ |
| (VP9_COEF_COUNT_START + VP9_COEF_COUNT_SIZE) |
| #define VP9_INTRA_INTER_COUNT_SIZE (4 * 2) |
| #define VP9_COMP_INTER_COUNT_START \ |
| (VP9_INTRA_INTER_COUNT_START + VP9_INTRA_INTER_COUNT_SIZE) |
| #define VP9_COMP_INTER_COUNT_SIZE (5 * 2) |
| #define VP9_COMP_REF_COUNT_START \ |
| (VP9_COMP_INTER_COUNT_START + VP9_COMP_INTER_COUNT_SIZE) |
| #define VP9_COMP_REF_COUNT_SIZE (5 * 2) |
| #define VP9_SINGLE_REF_COUNT_START \ |
| (VP9_COMP_REF_COUNT_START + VP9_COMP_REF_COUNT_SIZE) |
| #define VP9_SINGLE_REF_COUNT_SIZE (10 * 2) |
| #define VP9_TX_MODE_COUNT_START \ |
| (VP9_SINGLE_REF_COUNT_START + VP9_SINGLE_REF_COUNT_SIZE) |
| #define VP9_TX_MODE_COUNT_SIZE (12 * 2) |
| #define VP9_SKIP_COUNT_START \ |
| (VP9_TX_MODE_COUNT_START + VP9_TX_MODE_COUNT_SIZE) |
| #define VP9_SKIP_COUNT_SIZE (3 * 2) |
| #define VP9_MV_SIGN_0_COUNT_START \ |
| (VP9_SKIP_COUNT_START + VP9_SKIP_COUNT_SIZE) |
| #define VP9_MV_SIGN_0_COUNT_SIZE (1 * 2) |
| #define VP9_MV_SIGN_1_COUNT_START \ |
| (VP9_MV_SIGN_0_COUNT_START + VP9_MV_SIGN_0_COUNT_SIZE) |
| #define VP9_MV_SIGN_1_COUNT_SIZE (1 * 2) |
| #define VP9_MV_BITS_0_COUNT_START \ |
| (VP9_MV_SIGN_1_COUNT_START + VP9_MV_SIGN_1_COUNT_SIZE) |
| #define VP9_MV_BITS_0_COUNT_SIZE (10 * 2) |
| #define VP9_MV_BITS_1_COUNT_START \ |
| (VP9_MV_BITS_0_COUNT_START + VP9_MV_BITS_0_COUNT_SIZE) |
| #define VP9_MV_BITS_1_COUNT_SIZE (10 * 2) |
| #define VP9_MV_CLASS0_HP_0_COUNT_START \ |
| (VP9_MV_BITS_1_COUNT_START + VP9_MV_BITS_1_COUNT_SIZE) |
| #define VP9_MV_CLASS0_HP_0_COUNT_SIZE (2 * 2) |
| #define VP9_MV_CLASS0_HP_1_COUNT_START \ |
| (VP9_MV_CLASS0_HP_0_COUNT_START + VP9_MV_CLASS0_HP_0_COUNT_SIZE) |
| #define VP9_MV_CLASS0_HP_1_COUNT_SIZE (2 * 2) |
| |
| /* Start merge_tree */ |
| #define VP9_INTER_MODE_COUNT_START \ |
| (VP9_MV_CLASS0_HP_1_COUNT_START + VP9_MV_CLASS0_HP_1_COUNT_SIZE) |
| #define VP9_INTER_MODE_COUNT_SIZE (7 * 4) |
| #define VP9_IF_Y_MODE_COUNT_START \ |
| (VP9_INTER_MODE_COUNT_START + VP9_INTER_MODE_COUNT_SIZE) |
| #define VP9_IF_Y_MODE_COUNT_SIZE (10 * 4) |
| #define VP9_IF_UV_MODE_COUNT_START \ |
| (VP9_IF_Y_MODE_COUNT_START + VP9_IF_Y_MODE_COUNT_SIZE) |
| #define VP9_IF_UV_MODE_COUNT_SIZE (10 * 10) |
| #define VP9_PARTITION_P_COUNT_START \ |
| (VP9_IF_UV_MODE_COUNT_START + VP9_IF_UV_MODE_COUNT_SIZE) |
| #define VP9_PARTITION_P_COUNT_SIZE (4 * 4 * 4) |
| #define VP9_INTERP_COUNT_START \ |
| (VP9_PARTITION_P_COUNT_START + VP9_PARTITION_P_COUNT_SIZE) |
| #define VP9_INTERP_COUNT_SIZE (4 * 3) |
| #define VP9_MV_JOINTS_COUNT_START \ |
| (VP9_INTERP_COUNT_START + VP9_INTERP_COUNT_SIZE) |
| #define VP9_MV_JOINTS_COUNT_SIZE (1 * 4) |
| #define VP9_MV_CLASSES_0_COUNT_START \ |
| (VP9_MV_JOINTS_COUNT_START + VP9_MV_JOINTS_COUNT_SIZE) |
| #define VP9_MV_CLASSES_0_COUNT_SIZE (1 * 11) |
| #define VP9_MV_CLASS0_0_COUNT_START \ |
| (VP9_MV_CLASSES_0_COUNT_START + VP9_MV_CLASSES_0_COUNT_SIZE) |
| #define VP9_MV_CLASS0_0_COUNT_SIZE (1 * 2) |
| #define VP9_MV_CLASSES_1_COUNT_START \ |
| (VP9_MV_CLASS0_0_COUNT_START + VP9_MV_CLASS0_0_COUNT_SIZE) |
| #define VP9_MV_CLASSES_1_COUNT_SIZE (1 * 11) |
| #define VP9_MV_CLASS0_1_COUNT_START \ |
| (VP9_MV_CLASSES_1_COUNT_START + VP9_MV_CLASSES_1_COUNT_SIZE) |
| #define VP9_MV_CLASS0_1_COUNT_SIZE (1 * 2) |
| #define VP9_MV_CLASS0_FP_0_COUNT_START \ |
| (VP9_MV_CLASS0_1_COUNT_START + VP9_MV_CLASS0_1_COUNT_SIZE) |
| #define VP9_MV_CLASS0_FP_0_COUNT_SIZE (3 * 4) |
| #define VP9_MV_CLASS0_FP_1_COUNT_START \ |
| (VP9_MV_CLASS0_FP_0_COUNT_START + VP9_MV_CLASS0_FP_0_COUNT_SIZE) |
| #define VP9_MV_CLASS0_FP_1_COUNT_SIZE (3 * 4) |
| |
| #define DC_PRED 0 /* Average of above and left pixels */ |
| #define V_PRED 1 /* Vertical */ |
| #define H_PRED 2 /* Horizontal */ |
| #define D45_PRED 3 /* Directional 45 deg = round(arctan(1/1) * 180/pi) */ |
| #define D135_PRED 4 /* Directional 135 deg = 180 - 45 */ |
| #define D117_PRED 5 /* Directional 117 deg = 180 - 63 */ |
| #define D153_PRED 6 /* Directional 153 deg = 180 - 27 */ |
| #define D207_PRED 7 /* Directional 207 deg = 180 + 27 */ |
| #define D63_PRED 8 /* Directional 63 deg = round(arctan(2/1) * 180/pi) */ |
| #define TM_PRED 9 /* True-motion */ |
| |
| /* Use a static inline to avoid possible side effect from num being reused */ |
| static inline int round_power_of_two(int value, int num) |
| { |
| return (value + (1 << (num - 1))) >> num; |
| } |
| |
| #define MODE_MV_COUNT_SAT 20 |
| static const int count_to_update_factor[MODE_MV_COUNT_SAT + 1] = { |
| 0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64, |
| 70, 76, 83, 89, 96, 102, 108, 115, 121, 128 |
| }; |
| |
| union rpm_param { |
| struct { |
| u16 data[RPM_BUF_SIZE]; |
| } l; |
| struct { |
| u16 profile; |
| u16 show_existing_frame; |
| u16 frame_to_show_idx; |
| u16 frame_type; /*1 bit*/ |
| u16 show_frame; /*1 bit*/ |
| u16 error_resilient_mode; /*1 bit*/ |
| u16 intra_only; /*1 bit*/ |
| u16 display_size_present; /*1 bit*/ |
| u16 reset_frame_context; |
| u16 refresh_frame_flags; |
| u16 width; |
| u16 height; |
| u16 display_width; |
| u16 display_height; |
| u16 ref_info; |
| u16 same_frame_size; |
| u16 mode_ref_delta_enabled; |
| u16 ref_deltas[4]; |
| u16 mode_deltas[2]; |
| u16 filter_level; |
| u16 sharpness_level; |
| u16 bit_depth; |
| u16 seg_quant_info[8]; |
| u16 seg_enabled; |
| u16 seg_abs_delta; |
| /* bit 15: feature enabled; bit 8, sign; bit[5:0], data */ |
| u16 seg_lf_info[8]; |
| } p; |
| }; |
| |
| enum SEG_LVL_FEATURES { |
| SEG_LVL_ALT_Q = 0, /* Use alternate Quantizer */ |
| SEG_LVL_ALT_LF = 1, /* Use alternate loop filter value */ |
| SEG_LVL_REF_FRAME = 2, /* Optional Segment reference frame */ |
| SEG_LVL_SKIP = 3, /* Optional Segment (0,0) + skip mode */ |
| SEG_LVL_MAX = 4 /* Number of features supported */ |
| }; |
| |
| struct segmentation { |
| u8 enabled; |
| u8 update_map; |
| u8 update_data; |
| u8 abs_delta; |
| u8 temporal_update; |
| s16 feature_data[MAX_SEGMENTS][SEG_LVL_MAX]; |
| unsigned int feature_mask[MAX_SEGMENTS]; |
| }; |
| |
| struct loop_filter_thresh { |
| u8 mblim; |
| u8 lim; |
| u8 hev_thr; |
| }; |
| |
| struct loop_filter_info_n { |
| struct loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1]; |
| u8 lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS]; |
| }; |
| |
| struct loopfilter { |
| int filter_level; |
| |
| int sharpness_level; |
| int last_sharpness_level; |
| |
| u8 mode_ref_delta_enabled; |
| u8 mode_ref_delta_update; |
| |
| /*0 = Intra, Last, GF, ARF*/ |
| signed char ref_deltas[MAX_REF_LF_DELTAS]; |
| signed char last_ref_deltas[MAX_REF_LF_DELTAS]; |
| |
| /*0 = ZERO_MV, MV*/ |
| signed char mode_deltas[MAX_MODE_LF_DELTAS]; |
| signed char last_mode_deltas[MAX_MODE_LF_DELTAS]; |
| }; |
| |
| struct vp9_frame { |
| struct list_head list; |
| struct vb2_v4l2_buffer *vbuf; |
| int index; |
| int intra_only; |
| int show; |
| int type; |
| int done; |
| unsigned int width; |
| unsigned int height; |
| }; |
| |
| struct codec_vp9 { |
| /* VP9 context lock */ |
| struct mutex lock; |
| |
| /* Common part with the HEVC decoder */ |
| struct codec_hevc_common common; |
| |
| /* Buffer for the VP9 Workspace */ |
| void *workspace_vaddr; |
| dma_addr_t workspace_paddr; |
| |
| /* Contains many information parsed from the bitstream */ |
| union rpm_param rpm_param; |
| |
| /* Whether we detected the bitstream as 10-bit */ |
| int is_10bit; |
| |
| /* Coded resolution reported by the hardware */ |
| u32 width, height; |
| |
| /* All ref frames used by the HW at a given time */ |
| struct list_head ref_frames_list; |
| u32 frames_num; |
| |
| /* In case of downsampling (decoding with FBC but outputting in NV12M), |
| * we need to allocate additional buffers for FBC. |
| */ |
| void *fbc_buffer_vaddr[MAX_REF_PIC_NUM]; |
| dma_addr_t fbc_buffer_paddr[MAX_REF_PIC_NUM]; |
| |
| int ref_frame_map[REF_FRAMES]; |
| int next_ref_frame_map[REF_FRAMES]; |
| struct vp9_frame *frame_refs[REFS_PER_FRAME]; |
| |
| u32 lcu_total; |
| |
| /* loop filter */ |
| int default_filt_lvl; |
| struct loop_filter_info_n lfi; |
| struct loopfilter lf; |
| struct segmentation seg_4lf; |
| |
| struct vp9_frame *cur_frame; |
| struct vp9_frame *prev_frame; |
| }; |
| |
| static int div_r32(s64 m, int n) |
| { |
| s64 qu = div_s64(m, n); |
| |
| return (int)qu; |
| } |
| |
| static int clip_prob(int p) |
| { |
| return clamp_val(p, 1, 255); |
| } |
| |
| static int segfeature_active(struct segmentation *seg, int segment_id, |
| enum SEG_LVL_FEATURES feature_id) |
| { |
| return seg->enabled && |
| (seg->feature_mask[segment_id] & (1 << feature_id)); |
| } |
| |
| static int get_segdata(struct segmentation *seg, int segment_id, |
| enum SEG_LVL_FEATURES feature_id) |
| { |
| return seg->feature_data[segment_id][feature_id]; |
| } |
| |
| static void vp9_update_sharpness(struct loop_filter_info_n *lfi, |
| int sharpness_lvl) |
| { |
| int lvl; |
| |
| /* For each possible value for the loop filter fill out limits*/ |
| for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) { |
| /* Set loop filter parameters that control sharpness.*/ |
| int block_inside_limit = lvl >> ((sharpness_lvl > 0) + |
| (sharpness_lvl > 4)); |
| |
| if (sharpness_lvl > 0) { |
| if (block_inside_limit > (9 - sharpness_lvl)) |
| block_inside_limit = (9 - sharpness_lvl); |
| } |
| |
| if (block_inside_limit < 1) |
| block_inside_limit = 1; |
| |
| lfi->lfthr[lvl].lim = (u8)block_inside_limit; |
| lfi->lfthr[lvl].mblim = (u8)(2 * (lvl + 2) + |
| block_inside_limit); |
| } |
| } |
| |
| /* Instantiate this function once when decode is started */ |
| static void |
| vp9_loop_filter_init(struct amvdec_core *core, struct codec_vp9 *vp9) |
| { |
| struct loop_filter_info_n *lfi = &vp9->lfi; |
| struct loopfilter *lf = &vp9->lf; |
| struct segmentation *seg_4lf = &vp9->seg_4lf; |
| int i; |
| |
| memset(lfi, 0, sizeof(struct loop_filter_info_n)); |
| memset(lf, 0, sizeof(struct loopfilter)); |
| memset(seg_4lf, 0, sizeof(struct segmentation)); |
| lf->sharpness_level = 0; |
| vp9_update_sharpness(lfi, lf->sharpness_level); |
| lf->last_sharpness_level = lf->sharpness_level; |
| |
| for (i = 0; i < 32; i++) { |
| unsigned int thr; |
| |
| thr = ((lfi->lfthr[i * 2 + 1].lim & 0x3f) << 8) | |
| (lfi->lfthr[i * 2 + 1].mblim & 0xff); |
| thr = (thr << 16) | ((lfi->lfthr[i * 2].lim & 0x3f) << 8) | |
| (lfi->lfthr[i * 2].mblim & 0xff); |
| |
| amvdec_write_dos(core, HEVC_DBLK_CFG9, thr); |
| } |
| |
| if (core->platform->revision >= VDEC_REVISION_SM1) |
| amvdec_write_dos(core, HEVC_DBLK_CFGB, |
| (0x3 << 14) | /* dw fifo thres r and b */ |
| (0x3 << 12) | /* dw fifo thres r or b */ |
| (0x3 << 10) | /* dw fifo thres not r/b */ |
| BIT(0)); /* VP9 video format */ |
| else if (core->platform->revision >= VDEC_REVISION_G12A) |
| /* VP9 video format */ |
| amvdec_write_dos(core, HEVC_DBLK_CFGB, (0x54 << 8) | BIT(0)); |
| else |
| amvdec_write_dos(core, HEVC_DBLK_CFGB, 0x40400001); |
| } |
| |
| static void |
| vp9_loop_filter_frame_init(struct amvdec_core *core, struct segmentation *seg, |
| struct loop_filter_info_n *lfi, |
| struct loopfilter *lf, int default_filt_lvl) |
| { |
| int i; |
| int seg_id; |
| |
| /* |
| * n_shift is the multiplier for lf_deltas |
| * the multiplier is: |
| * - 1 for when filter_lvl is between 0 and 31 |
| * - 2 when filter_lvl is between 32 and 63 |
| */ |
| const int scale = 1 << (default_filt_lvl >> 5); |
| |
| /* update limits if sharpness has changed */ |
| if (lf->last_sharpness_level != lf->sharpness_level) { |
| vp9_update_sharpness(lfi, lf->sharpness_level); |
| lf->last_sharpness_level = lf->sharpness_level; |
| |
| /* Write to register */ |
| for (i = 0; i < 32; i++) { |
| unsigned int thr; |
| |
| thr = ((lfi->lfthr[i * 2 + 1].lim & 0x3f) << 8) | |
| (lfi->lfthr[i * 2 + 1].mblim & 0xff); |
| thr = (thr << 16) | |
| ((lfi->lfthr[i * 2].lim & 0x3f) << 8) | |
| (lfi->lfthr[i * 2].mblim & 0xff); |
| |
| amvdec_write_dos(core, HEVC_DBLK_CFG9, thr); |
| } |
| } |
| |
| for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) { |
| int lvl_seg = default_filt_lvl; |
| |
| if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) { |
| const int data = get_segdata(seg, seg_id, |
| SEG_LVL_ALT_LF); |
| lvl_seg = clamp_t(int, |
| seg->abs_delta == SEGMENT_ABSDATA ? |
| data : default_filt_lvl + data, |
| 0, MAX_LOOP_FILTER); |
| } |
| |
| if (!lf->mode_ref_delta_enabled) { |
| /* |
| * We could get rid of this if we assume that deltas |
| * are set to zero when not in use. |
| * encoder always uses deltas |
| */ |
| memset(lfi->lvl[seg_id], lvl_seg, |
| sizeof(lfi->lvl[seg_id])); |
| } else { |
| int ref, mode; |
| const int intra_lvl = |
| lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale; |
| lfi->lvl[seg_id][INTRA_FRAME][0] = |
| clamp_val(intra_lvl, 0, MAX_LOOP_FILTER); |
| |
| for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) { |
| for (mode = 0; mode < MAX_MODE_LF_DELTAS; |
| ++mode) { |
| const int inter_lvl = |
| lvl_seg + |
| lf->ref_deltas[ref] * scale + |
| lf->mode_deltas[mode] * scale; |
| lfi->lvl[seg_id][ref][mode] = |
| clamp_val(inter_lvl, 0, |
| MAX_LOOP_FILTER); |
| } |
| } |
| } |
| } |
| |
| for (i = 0; i < 16; i++) { |
| unsigned int level; |
| |
| level = ((lfi->lvl[i >> 1][3][i & 1] & 0x3f) << 24) | |
| ((lfi->lvl[i >> 1][2][i & 1] & 0x3f) << 16) | |
| ((lfi->lvl[i >> 1][1][i & 1] & 0x3f) << 8) | |
| (lfi->lvl[i >> 1][0][i & 1] & 0x3f); |
| if (!default_filt_lvl) |
| level = 0; |
| |
| amvdec_write_dos(core, HEVC_DBLK_CFGA, level); |
| } |
| } |
| |
| static void codec_vp9_flush_output(struct amvdec_session *sess) |
| { |
| struct codec_vp9 *vp9 = sess->priv; |
| struct vp9_frame *tmp, *n; |
| |
| mutex_lock(&vp9->lock); |
| list_for_each_entry_safe(tmp, n, &vp9->ref_frames_list, list) { |
| if (!tmp->done) { |
| if (tmp->show) |
| amvdec_dst_buf_done(sess, tmp->vbuf, |
| V4L2_FIELD_NONE); |
| else |
| v4l2_m2m_buf_queue(sess->m2m_ctx, tmp->vbuf); |
| |
| vp9->frames_num--; |
| } |
| |
| list_del(&tmp->list); |
| kfree(tmp); |
| } |
| mutex_unlock(&vp9->lock); |
| } |
| |
| static u32 codec_vp9_num_pending_bufs(struct amvdec_session *sess) |
| { |
| struct codec_vp9 *vp9 = sess->priv; |
| |
| if (!vp9) |
| return 0; |
| |
| return vp9->frames_num; |
| } |
| |
| static int codec_vp9_alloc_workspace(struct amvdec_core *core, |
| struct codec_vp9 *vp9) |
| { |
| /* Allocate some memory for the VP9 decoder's state */ |
| vp9->workspace_vaddr = dma_alloc_coherent(core->dev, SIZE_WORKSPACE, |
| &vp9->workspace_paddr, |
| GFP_KERNEL); |
| if (!vp9->workspace_vaddr) { |
| dev_err(core->dev, "Failed to allocate VP9 Workspace\n"); |
| return -ENOMEM; |
| } |
| |
| return 0; |
| } |
| |
| static void codec_vp9_setup_workspace(struct amvdec_session *sess, |
| struct codec_vp9 *vp9) |
| { |
| struct amvdec_core *core = sess->core; |
| u32 revision = core->platform->revision; |
| dma_addr_t wkaddr = vp9->workspace_paddr; |
| |
| amvdec_write_dos(core, HEVCD_IPP_LINEBUFF_BASE, wkaddr + IPP_OFFSET); |
| amvdec_write_dos(core, VP9_RPM_BUFFER, wkaddr + RPM_OFFSET); |
| amvdec_write_dos(core, VP9_SHORT_TERM_RPS, wkaddr + SH_TM_RPS_OFFSET); |
| amvdec_write_dos(core, VP9_PPS_BUFFER, wkaddr + PPS_OFFSET); |
| amvdec_write_dos(core, VP9_SAO_UP, wkaddr + SAO_UP_OFFSET); |
| |
| amvdec_write_dos(core, VP9_STREAM_SWAP_BUFFER, |
| wkaddr + SWAP_BUF_OFFSET); |
| amvdec_write_dos(core, VP9_STREAM_SWAP_BUFFER2, |
| wkaddr + SWAP_BUF2_OFFSET); |
| amvdec_write_dos(core, VP9_SCALELUT, wkaddr + SCALELUT_OFFSET); |
| |
| if (core->platform->revision >= VDEC_REVISION_G12A) |
| amvdec_write_dos(core, HEVC_DBLK_CFGE, |
| wkaddr + DBLK_PARA_OFFSET); |
| |
| amvdec_write_dos(core, HEVC_DBLK_CFG4, wkaddr + DBLK_PARA_OFFSET); |
| amvdec_write_dos(core, HEVC_DBLK_CFG5, wkaddr + DBLK_DATA_OFFSET); |
| amvdec_write_dos(core, VP9_SEG_MAP_BUFFER, wkaddr + SEG_MAP_OFFSET); |
| amvdec_write_dos(core, VP9_PROB_SWAP_BUFFER, wkaddr + PROB_OFFSET); |
| amvdec_write_dos(core, VP9_COUNT_SWAP_BUFFER, wkaddr + COUNT_OFFSET); |
| amvdec_write_dos(core, LMEM_DUMP_ADR, wkaddr + LMEM_OFFSET); |
| |
| if (codec_hevc_use_mmu(revision, sess->pixfmt_cap, vp9->is_10bit)) { |
| amvdec_write_dos(core, HEVC_SAO_MMU_VH0_ADDR, |
| wkaddr + MMU_VBH_OFFSET); |
| amvdec_write_dos(core, HEVC_SAO_MMU_VH1_ADDR, |
| wkaddr + MMU_VBH_OFFSET + (MMU_VBH_SIZE / 2)); |
| |
| if (revision >= VDEC_REVISION_G12A) |
| amvdec_write_dos(core, HEVC_ASSIST_MMU_MAP_ADDR, |
| vp9->common.mmu_map_paddr); |
| else |
| amvdec_write_dos(core, VP9_MMU_MAP_BUFFER, |
| vp9->common.mmu_map_paddr); |
| } |
| } |
| |
| static int codec_vp9_start(struct amvdec_session *sess) |
| { |
| struct amvdec_core *core = sess->core; |
| struct codec_vp9 *vp9; |
| u32 val; |
| int i; |
| int ret; |
| |
| vp9 = kzalloc(sizeof(*vp9), GFP_KERNEL); |
| if (!vp9) |
| return -ENOMEM; |
| |
| ret = codec_vp9_alloc_workspace(core, vp9); |
| if (ret) |
| goto free_vp9; |
| |
| codec_vp9_setup_workspace(sess, vp9); |
| amvdec_write_dos_bits(core, HEVC_STREAM_CONTROL, BIT(0)); |
| /* stream_fifo_hole */ |
| if (core->platform->revision >= VDEC_REVISION_G12A) |
| amvdec_write_dos_bits(core, HEVC_STREAM_FIFO_CTL, BIT(29)); |
| |
| val = amvdec_read_dos(core, HEVC_PARSER_INT_CONTROL) & 0x7fffffff; |
| val |= (3 << 29) | BIT(24) | BIT(22) | BIT(7) | BIT(4) | BIT(0); |
| amvdec_write_dos(core, HEVC_PARSER_INT_CONTROL, val); |
| amvdec_write_dos_bits(core, HEVC_SHIFT_STATUS, BIT(0)); |
| amvdec_write_dos(core, HEVC_SHIFT_CONTROL, BIT(10) | BIT(9) | |
| (3 << 6) | BIT(5) | BIT(2) | BIT(1) | BIT(0)); |
| amvdec_write_dos(core, HEVC_CABAC_CONTROL, BIT(0)); |
| amvdec_write_dos(core, HEVC_PARSER_CORE_CONTROL, BIT(0)); |
| amvdec_write_dos(core, HEVC_SHIFT_STARTCODE, 0x00000001); |
| |
| amvdec_write_dos(core, VP9_DEC_STATUS_REG, 0); |
| |
| amvdec_write_dos(core, HEVC_PARSER_CMD_WRITE, BIT(16)); |
| for (i = 0; i < ARRAY_SIZE(vdec_hevc_parser_cmd); ++i) |
| amvdec_write_dos(core, HEVC_PARSER_CMD_WRITE, |
| vdec_hevc_parser_cmd[i]); |
| |
| amvdec_write_dos(core, HEVC_PARSER_CMD_SKIP_0, PARSER_CMD_SKIP_CFG_0); |
| amvdec_write_dos(core, HEVC_PARSER_CMD_SKIP_1, PARSER_CMD_SKIP_CFG_1); |
| amvdec_write_dos(core, HEVC_PARSER_CMD_SKIP_2, PARSER_CMD_SKIP_CFG_2); |
| amvdec_write_dos(core, HEVC_PARSER_IF_CONTROL, |
| BIT(5) | BIT(2) | BIT(0)); |
| |
| amvdec_write_dos(core, HEVCD_IPP_TOP_CNTL, BIT(0)); |
| amvdec_write_dos(core, HEVCD_IPP_TOP_CNTL, BIT(1)); |
| |
| amvdec_write_dos(core, VP9_WAIT_FLAG, 1); |
| |
| /* clear mailbox interrupt */ |
| amvdec_write_dos(core, HEVC_ASSIST_MBOX1_CLR_REG, 1); |
| /* enable mailbox interrupt */ |
| amvdec_write_dos(core, HEVC_ASSIST_MBOX1_MASK, 1); |
| /* disable PSCALE for hardware sharing */ |
| amvdec_write_dos(core, HEVC_PSCALE_CTRL, 0); |
| /* Let the uCode do all the parsing */ |
| amvdec_write_dos(core, NAL_SEARCH_CTL, 0x8); |
| |
| amvdec_write_dos(core, DECODE_STOP_POS, 0); |
| amvdec_write_dos(core, VP9_DECODE_MODE, DECODE_MODE_SINGLE); |
| |
| pr_debug("decode_count: %u; decode_size: %u\n", |
| amvdec_read_dos(core, HEVC_DECODE_COUNT), |
| amvdec_read_dos(core, HEVC_DECODE_SIZE)); |
| |
| vp9_loop_filter_init(core, vp9); |
| |
| INIT_LIST_HEAD(&vp9->ref_frames_list); |
| mutex_init(&vp9->lock); |
| memset(&vp9->ref_frame_map, -1, sizeof(vp9->ref_frame_map)); |
| memset(&vp9->next_ref_frame_map, -1, sizeof(vp9->next_ref_frame_map)); |
| for (i = 0; i < REFS_PER_FRAME; ++i) |
| vp9->frame_refs[i] = NULL; |
| sess->priv = vp9; |
| |
| return 0; |
| |
| free_vp9: |
| kfree(vp9); |
| return ret; |
| } |
| |
| static int codec_vp9_stop(struct amvdec_session *sess) |
| { |
| struct amvdec_core *core = sess->core; |
| struct codec_vp9 *vp9 = sess->priv; |
| |
| mutex_lock(&vp9->lock); |
| if (vp9->workspace_vaddr) |
| dma_free_coherent(core->dev, SIZE_WORKSPACE, |
| vp9->workspace_vaddr, |
| vp9->workspace_paddr); |
| |
| codec_hevc_free_fbc_buffers(sess, &vp9->common); |
| mutex_unlock(&vp9->lock); |
| |
| return 0; |
| } |
| |
| /* |
| * Program LAST & GOLDEN frames into the motion compensation reference cache |
| * controller |
| */ |
| static void codec_vp9_set_mcrcc(struct amvdec_session *sess) |
| { |
| struct amvdec_core *core = sess->core; |
| struct codec_vp9 *vp9 = sess->priv; |
| u32 val; |
| |
| /* Reset mcrcc */ |
| amvdec_write_dos(core, HEVCD_MCRCC_CTL1, 0x2); |
| /* Disable on I-frame */ |
| if (vp9->cur_frame->type == KEY_FRAME || vp9->cur_frame->intra_only) { |
| amvdec_write_dos(core, HEVCD_MCRCC_CTL1, 0x0); |
| return; |
| } |
| |
| amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_ACCCONFIG_ADDR, BIT(1)); |
| val = amvdec_read_dos(core, HEVCD_MPP_ANC_CANVAS_DATA_ADDR) & 0xffff; |
| val |= (val << 16); |
| amvdec_write_dos(core, HEVCD_MCRCC_CTL2, val); |
| val = amvdec_read_dos(core, HEVCD_MPP_ANC_CANVAS_DATA_ADDR) & 0xffff; |
| val |= (val << 16); |
| amvdec_write_dos(core, HEVCD_MCRCC_CTL3, val); |
| |
| /* Enable mcrcc progressive-mode */ |
| amvdec_write_dos(core, HEVCD_MCRCC_CTL1, 0xff0); |
| } |
| |
| static void codec_vp9_set_sao(struct amvdec_session *sess, |
| struct vb2_buffer *vb) |
| { |
| struct amvdec_core *core = sess->core; |
| struct codec_vp9 *vp9 = sess->priv; |
| |
| dma_addr_t buf_y_paddr; |
| dma_addr_t buf_u_v_paddr; |
| u32 val; |
| |
| if (codec_hevc_use_downsample(sess->pixfmt_cap, vp9->is_10bit)) |
| buf_y_paddr = |
| vp9->common.fbc_buffer_paddr[vb->index]; |
| else |
| buf_y_paddr = |
| vb2_dma_contig_plane_dma_addr(vb, 0); |
| |
| if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) { |
| val = amvdec_read_dos(core, HEVC_SAO_CTRL5) & ~0xff0200; |
| amvdec_write_dos(core, HEVC_SAO_CTRL5, val); |
| amvdec_write_dos(core, HEVC_CM_BODY_START_ADDR, buf_y_paddr); |
| } |
| |
| if (sess->pixfmt_cap == V4L2_PIX_FMT_NV12M) { |
| buf_y_paddr = |
| vb2_dma_contig_plane_dma_addr(vb, 0); |
| buf_u_v_paddr = |
| vb2_dma_contig_plane_dma_addr(vb, 1); |
| amvdec_write_dos(core, HEVC_SAO_Y_START_ADDR, buf_y_paddr); |
| amvdec_write_dos(core, HEVC_SAO_C_START_ADDR, buf_u_v_paddr); |
| amvdec_write_dos(core, HEVC_SAO_Y_WPTR, buf_y_paddr); |
| amvdec_write_dos(core, HEVC_SAO_C_WPTR, buf_u_v_paddr); |
| } |
| |
| if (codec_hevc_use_mmu(core->platform->revision, sess->pixfmt_cap, |
| vp9->is_10bit)) { |
| amvdec_write_dos(core, HEVC_CM_HEADER_START_ADDR, |
| vp9->common.mmu_header_paddr[vb->index]); |
| /* use HEVC_CM_HEADER_START_ADDR */ |
| amvdec_write_dos_bits(core, HEVC_SAO_CTRL5, BIT(10)); |
| } |
| |
| amvdec_write_dos(core, HEVC_SAO_Y_LENGTH, |
| amvdec_get_output_size(sess)); |
| amvdec_write_dos(core, HEVC_SAO_C_LENGTH, |
| (amvdec_get_output_size(sess) / 2)); |
| |
| if (core->platform->revision >= VDEC_REVISION_G12A) { |
| amvdec_clear_dos_bits(core, HEVC_DBLK_CFGB, |
| BIT(4) | BIT(5) | BIT(8) | BIT(9)); |
| /* enable first, compressed write */ |
| if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) |
| amvdec_write_dos_bits(core, HEVC_DBLK_CFGB, BIT(8)); |
| |
| /* enable second, uncompressed write */ |
| if (sess->pixfmt_cap == V4L2_PIX_FMT_NV12M) |
| amvdec_write_dos_bits(core, HEVC_DBLK_CFGB, BIT(9)); |
| |
| /* dblk pipeline mode=1 for performance */ |
| if (sess->width >= 1280) |
| amvdec_write_dos_bits(core, HEVC_DBLK_CFGB, BIT(4)); |
| |
| pr_debug("HEVC_DBLK_CFGB: %08X\n", |
| amvdec_read_dos(core, HEVC_DBLK_CFGB)); |
| } |
| |
| val = amvdec_read_dos(core, HEVC_SAO_CTRL1) & ~0x3ff0; |
| val |= 0xff0; /* Set endianness for 2-bytes swaps (nv12) */ |
| if (core->platform->revision < VDEC_REVISION_G12A) { |
| val &= ~0x3; |
| if (!codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) |
| val |= BIT(0); /* disable cm compression */ |
| /* TOFIX: Handle Amlogic Framebuffer compression */ |
| } |
| |
| amvdec_write_dos(core, HEVC_SAO_CTRL1, val); |
| pr_debug("HEVC_SAO_CTRL1: %08X\n", val); |
| |
| /* no downscale for NV12 */ |
| val = amvdec_read_dos(core, HEVC_SAO_CTRL5) & ~0xff0000; |
| amvdec_write_dos(core, HEVC_SAO_CTRL5, val); |
| |
| val = amvdec_read_dos(core, HEVCD_IPP_AXIIF_CONFIG) & ~0x30; |
| val |= 0xf; |
| val &= ~BIT(12); /* NV12 */ |
| amvdec_write_dos(core, HEVCD_IPP_AXIIF_CONFIG, val); |
| } |
| |
| static dma_addr_t codec_vp9_get_frame_mv_paddr(struct codec_vp9 *vp9, |
| struct vp9_frame *frame) |
| { |
| return vp9->workspace_paddr + MPRED_MV_OFFSET + |
| (frame->index * MPRED_MV_BUF_SIZE); |
| } |
| |
| static void codec_vp9_set_mpred_mv(struct amvdec_core *core, |
| struct codec_vp9 *vp9) |
| { |
| int mpred_mv_rd_end_addr; |
| int use_prev_frame_mvs = vp9->prev_frame->width == |
| vp9->cur_frame->width && |
| vp9->prev_frame->height == |
| vp9->cur_frame->height && |
| !vp9->prev_frame->intra_only && |
| vp9->prev_frame->show && |
| vp9->prev_frame->type != KEY_FRAME; |
| |
| amvdec_write_dos(core, HEVC_MPRED_CTRL3, 0x24122412); |
| amvdec_write_dos(core, HEVC_MPRED_ABV_START_ADDR, |
| vp9->workspace_paddr + MPRED_ABV_OFFSET); |
| |
| amvdec_clear_dos_bits(core, HEVC_MPRED_CTRL4, BIT(6)); |
| if (use_prev_frame_mvs) |
| amvdec_write_dos_bits(core, HEVC_MPRED_CTRL4, BIT(6)); |
| |
| amvdec_write_dos(core, HEVC_MPRED_MV_WR_START_ADDR, |
| codec_vp9_get_frame_mv_paddr(vp9, vp9->cur_frame)); |
| amvdec_write_dos(core, HEVC_MPRED_MV_WPTR, |
| codec_vp9_get_frame_mv_paddr(vp9, vp9->cur_frame)); |
| |
| amvdec_write_dos(core, HEVC_MPRED_MV_RD_START_ADDR, |
| codec_vp9_get_frame_mv_paddr(vp9, vp9->prev_frame)); |
| amvdec_write_dos(core, HEVC_MPRED_MV_RPTR, |
| codec_vp9_get_frame_mv_paddr(vp9, vp9->prev_frame)); |
| |
| mpred_mv_rd_end_addr = |
| codec_vp9_get_frame_mv_paddr(vp9, vp9->prev_frame) + |
| (vp9->lcu_total * MV_MEM_UNIT); |
| amvdec_write_dos(core, HEVC_MPRED_MV_RD_END_ADDR, mpred_mv_rd_end_addr); |
| } |
| |
| static void codec_vp9_update_next_ref(struct codec_vp9 *vp9) |
| { |
| union rpm_param *param = &vp9->rpm_param; |
| u32 buf_idx = vp9->cur_frame->index; |
| int ref_index = 0; |
| int refresh_frame_flags; |
| int mask; |
| |
| refresh_frame_flags = vp9->cur_frame->type == KEY_FRAME ? |
| 0xff : param->p.refresh_frame_flags; |
| |
| for (mask = refresh_frame_flags; mask; mask >>= 1) { |
| pr_debug("mask=%08X; ref_index=%d\n", mask, ref_index); |
| if (mask & 1) |
| vp9->next_ref_frame_map[ref_index] = buf_idx; |
| else |
| vp9->next_ref_frame_map[ref_index] = |
| vp9->ref_frame_map[ref_index]; |
| |
| ++ref_index; |
| } |
| |
| for (; ref_index < REF_FRAMES; ++ref_index) |
| vp9->next_ref_frame_map[ref_index] = |
| vp9->ref_frame_map[ref_index]; |
| } |
| |
| static void codec_vp9_save_refs(struct codec_vp9 *vp9) |
| { |
| union rpm_param *param = &vp9->rpm_param; |
| int i; |
| |
| for (i = 0; i < REFS_PER_FRAME; ++i) { |
| const int ref = (param->p.ref_info >> |
| (((REFS_PER_FRAME - i - 1) * 4) + 1)) & 0x7; |
| |
| if (vp9->ref_frame_map[ref] < 0) |
| continue; |
| |
| pr_warn("%s: FIXME, would need to save ref %d\n", |
| __func__, vp9->ref_frame_map[ref]); |
| } |
| } |
| |
| static void codec_vp9_update_ref(struct codec_vp9 *vp9) |
| { |
| union rpm_param *param = &vp9->rpm_param; |
| int ref_index = 0; |
| int mask; |
| int refresh_frame_flags; |
| |
| if (!vp9->cur_frame) |
| return; |
| |
| refresh_frame_flags = vp9->cur_frame->type == KEY_FRAME ? |
| 0xff : param->p.refresh_frame_flags; |
| |
| for (mask = refresh_frame_flags; mask; mask >>= 1) { |
| vp9->ref_frame_map[ref_index] = |
| vp9->next_ref_frame_map[ref_index]; |
| ++ref_index; |
| } |
| |
| if (param->p.show_existing_frame) |
| return; |
| |
| for (; ref_index < REF_FRAMES; ++ref_index) |
| vp9->ref_frame_map[ref_index] = |
| vp9->next_ref_frame_map[ref_index]; |
| } |
| |
| static struct vp9_frame *codec_vp9_get_frame_by_idx(struct codec_vp9 *vp9, |
| int idx) |
| { |
| struct vp9_frame *frame; |
| |
| list_for_each_entry(frame, &vp9->ref_frames_list, list) { |
| if (frame->index == idx) |
| return frame; |
| } |
| |
| return NULL; |
| } |
| |
| static void codec_vp9_sync_ref(struct codec_vp9 *vp9) |
| { |
| union rpm_param *param = &vp9->rpm_param; |
| int i; |
| |
| for (i = 0; i < REFS_PER_FRAME; ++i) { |
| const int ref = (param->p.ref_info >> |
| (((REFS_PER_FRAME - i - 1) * 4) + 1)) & 0x7; |
| const int idx = vp9->ref_frame_map[ref]; |
| |
| vp9->frame_refs[i] = codec_vp9_get_frame_by_idx(vp9, idx); |
| if (!vp9->frame_refs[i]) |
| pr_warn("%s: couldn't find VP9 ref %d\n", __func__, |
| idx); |
| } |
| } |
| |
| static void codec_vp9_set_refs(struct amvdec_session *sess, |
| struct codec_vp9 *vp9) |
| { |
| struct amvdec_core *core = sess->core; |
| int i; |
| |
| for (i = 0; i < REFS_PER_FRAME; ++i) { |
| struct vp9_frame *frame = vp9->frame_refs[i]; |
| int id_y; |
| int id_u_v; |
| |
| if (!frame) |
| continue; |
| |
| if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) { |
| id_y = frame->index; |
| id_u_v = id_y; |
| } else { |
| id_y = frame->index * 2; |
| id_u_v = id_y + 1; |
| } |
| |
| amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_DATA_ADDR, |
| (id_u_v << 16) | (id_u_v << 8) | id_y); |
| } |
| } |
| |
| static void codec_vp9_set_mc(struct amvdec_session *sess, |
| struct codec_vp9 *vp9) |
| { |
| struct amvdec_core *core = sess->core; |
| u32 scale = 0; |
| u32 sz; |
| int i; |
| |
| amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_ACCCONFIG_ADDR, 1); |
| codec_vp9_set_refs(sess, vp9); |
| amvdec_write_dos(core, HEVCD_MPP_ANC_CANVAS_ACCCONFIG_ADDR, |
| (16 << 8) | 1); |
| codec_vp9_set_refs(sess, vp9); |
| |
| amvdec_write_dos(core, VP9D_MPP_REFINFO_TBL_ACCCONFIG, BIT(2)); |
| for (i = 0; i < REFS_PER_FRAME; ++i) { |
| if (!vp9->frame_refs[i]) |
| continue; |
| |
| if (vp9->frame_refs[i]->width != vp9->width || |
| vp9->frame_refs[i]->height != vp9->height) |
| scale = 1; |
| |
| sz = amvdec_am21c_body_size(vp9->frame_refs[i]->width, |
| vp9->frame_refs[i]->height); |
| |
| amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, |
| vp9->frame_refs[i]->width); |
| amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, |
| vp9->frame_refs[i]->height); |
| amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, |
| (vp9->frame_refs[i]->width << 14) / |
| vp9->width); |
| amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, |
| (vp9->frame_refs[i]->height << 14) / |
| vp9->height); |
| amvdec_write_dos(core, VP9D_MPP_REFINFO_DATA, sz >> 5); |
| } |
| |
| amvdec_write_dos(core, VP9D_MPP_REF_SCALE_ENBL, scale); |
| } |
| |
| static struct vp9_frame *codec_vp9_get_new_frame(struct amvdec_session *sess) |
| { |
| struct codec_vp9 *vp9 = sess->priv; |
| union rpm_param *param = &vp9->rpm_param; |
| struct vb2_v4l2_buffer *vbuf; |
| struct vp9_frame *new_frame; |
| |
| new_frame = kzalloc(sizeof(*new_frame), GFP_KERNEL); |
| if (!new_frame) |
| return NULL; |
| |
| vbuf = v4l2_m2m_dst_buf_remove(sess->m2m_ctx); |
| if (!vbuf) { |
| dev_err(sess->core->dev, "No dst buffer available\n"); |
| kfree(new_frame); |
| return NULL; |
| } |
| |
| while (codec_vp9_get_frame_by_idx(vp9, vbuf->vb2_buf.index)) { |
| struct vb2_v4l2_buffer *old_vbuf = vbuf; |
| |
| vbuf = v4l2_m2m_dst_buf_remove(sess->m2m_ctx); |
| v4l2_m2m_buf_queue(sess->m2m_ctx, old_vbuf); |
| if (!vbuf) { |
| dev_err(sess->core->dev, "No dst buffer available\n"); |
| kfree(new_frame); |
| return NULL; |
| } |
| } |
| |
| new_frame->vbuf = vbuf; |
| new_frame->index = vbuf->vb2_buf.index; |
| new_frame->intra_only = param->p.intra_only; |
| new_frame->show = param->p.show_frame; |
| new_frame->type = param->p.frame_type; |
| new_frame->width = vp9->width; |
| new_frame->height = vp9->height; |
| list_add_tail(&new_frame->list, &vp9->ref_frames_list); |
| vp9->frames_num++; |
| |
| return new_frame; |
| } |
| |
| static void codec_vp9_show_existing_frame(struct codec_vp9 *vp9) |
| { |
| union rpm_param *param = &vp9->rpm_param; |
| |
| if (!param->p.show_existing_frame) |
| return; |
| |
| pr_debug("showing frame %u\n", param->p.frame_to_show_idx); |
| } |
| |
| static void codec_vp9_rm_noshow_frame(struct amvdec_session *sess) |
| { |
| struct codec_vp9 *vp9 = sess->priv; |
| struct vp9_frame *tmp; |
| |
| list_for_each_entry(tmp, &vp9->ref_frames_list, list) { |
| if (tmp->show) |
| continue; |
| |
| pr_debug("rm noshow: %u\n", tmp->index); |
| v4l2_m2m_buf_queue(sess->m2m_ctx, tmp->vbuf); |
| list_del(&tmp->list); |
| kfree(tmp); |
| vp9->frames_num--; |
| return; |
| } |
| } |
| |
| static void codec_vp9_process_frame(struct amvdec_session *sess) |
| { |
| struct amvdec_core *core = sess->core; |
| struct codec_vp9 *vp9 = sess->priv; |
| union rpm_param *param = &vp9->rpm_param; |
| int intra_only; |
| |
| if (!param->p.show_frame) |
| codec_vp9_rm_noshow_frame(sess); |
| |
| vp9->cur_frame = codec_vp9_get_new_frame(sess); |
| if (!vp9->cur_frame) |
| return; |
| |
| pr_debug("frame %d: type: %08X; show_exist: %u; show: %u, intra_only: %u\n", |
| vp9->cur_frame->index, |
| param->p.frame_type, param->p.show_existing_frame, |
| param->p.show_frame, param->p.intra_only); |
| |
| if (param->p.frame_type != KEY_FRAME) |
| codec_vp9_sync_ref(vp9); |
| codec_vp9_update_next_ref(vp9); |
| codec_vp9_show_existing_frame(vp9); |
| |
| if (codec_hevc_use_mmu(core->platform->revision, sess->pixfmt_cap, |
| vp9->is_10bit)) |
| codec_hevc_fill_mmu_map(sess, &vp9->common, |
| &vp9->cur_frame->vbuf->vb2_buf); |
| |
| intra_only = param->p.show_frame ? 0 : param->p.intra_only; |
| |
| /* clear mpred (for keyframe only) */ |
| if (param->p.frame_type != KEY_FRAME && !intra_only) { |
| codec_vp9_set_mc(sess, vp9); |
| codec_vp9_set_mpred_mv(core, vp9); |
| } else { |
| amvdec_clear_dos_bits(core, HEVC_MPRED_CTRL4, BIT(6)); |
| } |
| |
| amvdec_write_dos(core, HEVC_PARSER_PICTURE_SIZE, |
| (vp9->height << 16) | vp9->width); |
| codec_vp9_set_mcrcc(sess); |
| codec_vp9_set_sao(sess, &vp9->cur_frame->vbuf->vb2_buf); |
| |
| vp9_loop_filter_frame_init(core, &vp9->seg_4lf, |
| &vp9->lfi, &vp9->lf, |
| vp9->default_filt_lvl); |
| |
| /* ask uCode to start decoding */ |
| amvdec_write_dos(core, VP9_DEC_STATUS_REG, VP9_10B_DECODE_SLICE); |
| } |
| |
| static void codec_vp9_process_lf(struct codec_vp9 *vp9) |
| { |
| union rpm_param *param = &vp9->rpm_param; |
| int i; |
| |
| vp9->lf.mode_ref_delta_enabled = param->p.mode_ref_delta_enabled; |
| vp9->lf.sharpness_level = param->p.sharpness_level; |
| vp9->default_filt_lvl = param->p.filter_level; |
| vp9->seg_4lf.enabled = param->p.seg_enabled; |
| vp9->seg_4lf.abs_delta = param->p.seg_abs_delta; |
| |
| for (i = 0; i < 4; i++) |
| vp9->lf.ref_deltas[i] = param->p.ref_deltas[i]; |
| |
| for (i = 0; i < 2; i++) |
| vp9->lf.mode_deltas[i] = param->p.mode_deltas[i]; |
| |
| for (i = 0; i < MAX_SEGMENTS; i++) |
| vp9->seg_4lf.feature_mask[i] = |
| (param->p.seg_lf_info[i] & 0x8000) ? |
| (1 << SEG_LVL_ALT_LF) : 0; |
| |
| for (i = 0; i < MAX_SEGMENTS; i++) |
| vp9->seg_4lf.feature_data[i][SEG_LVL_ALT_LF] = |
| (param->p.seg_lf_info[i] & 0x100) ? |
| -(param->p.seg_lf_info[i] & 0x3f) |
| : (param->p.seg_lf_info[i] & 0x3f); |
| } |
| |
| static void codec_vp9_resume(struct amvdec_session *sess) |
| { |
| struct codec_vp9 *vp9 = sess->priv; |
| |
| mutex_lock(&vp9->lock); |
| if (codec_hevc_setup_buffers(sess, &vp9->common, vp9->is_10bit)) { |
| mutex_unlock(&vp9->lock); |
| amvdec_abort(sess); |
| return; |
| } |
| |
| codec_vp9_setup_workspace(sess, vp9); |
| codec_hevc_setup_decode_head(sess, vp9->is_10bit); |
| codec_vp9_process_lf(vp9); |
| codec_vp9_process_frame(sess); |
| |
| mutex_unlock(&vp9->lock); |
| } |
| |
| /* |
| * The RPM section within the workspace contains |
| * many information regarding the parsed bitstream |
| */ |
| static void codec_vp9_fetch_rpm(struct amvdec_session *sess) |
| { |
| struct codec_vp9 *vp9 = sess->priv; |
| u16 *rpm_vaddr = vp9->workspace_vaddr + RPM_OFFSET; |
| int i, j; |
| |
| for (i = 0; i < RPM_BUF_SIZE; i += 4) |
| for (j = 0; j < 4; j++) |
| vp9->rpm_param.l.data[i + j] = rpm_vaddr[i + 3 - j]; |
| } |
| |
| static int codec_vp9_process_rpm(struct codec_vp9 *vp9) |
| { |
| union rpm_param *param = &vp9->rpm_param; |
| int src_changed = 0; |
| int is_10bit = 0; |
| int pic_width_64 = ALIGN(param->p.width, 64); |
| int pic_height_32 = ALIGN(param->p.height, 32); |
| int pic_width_lcu = (pic_width_64 % LCU_SIZE) ? |
| pic_width_64 / LCU_SIZE + 1 |
| : pic_width_64 / LCU_SIZE; |
| int pic_height_lcu = (pic_height_32 % LCU_SIZE) ? |
| pic_height_32 / LCU_SIZE + 1 |
| : pic_height_32 / LCU_SIZE; |
| vp9->lcu_total = pic_width_lcu * pic_height_lcu; |
| |
| if (param->p.bit_depth == 10) |
| is_10bit = 1; |
| |
| if (vp9->width != param->p.width || vp9->height != param->p.height || |
| vp9->is_10bit != is_10bit) |
| src_changed = 1; |
| |
| vp9->width = param->p.width; |
| vp9->height = param->p.height; |
| vp9->is_10bit = is_10bit; |
| |
| pr_debug("width: %u; height: %u; is_10bit: %d; src_changed: %d\n", |
| vp9->width, vp9->height, is_10bit, src_changed); |
| |
| return src_changed; |
| } |
| |
| static bool codec_vp9_is_ref(struct codec_vp9 *vp9, struct vp9_frame *frame) |
| { |
| int i; |
| |
| for (i = 0; i < REF_FRAMES; ++i) |
| if (vp9->ref_frame_map[i] == frame->index) |
| return true; |
| |
| return false; |
| } |
| |
| static void codec_vp9_show_frame(struct amvdec_session *sess) |
| { |
| struct codec_vp9 *vp9 = sess->priv; |
| struct vp9_frame *tmp, *n; |
| |
| list_for_each_entry_safe(tmp, n, &vp9->ref_frames_list, list) { |
| if (!tmp->show || tmp == vp9->cur_frame) |
| continue; |
| |
| if (!tmp->done) { |
| pr_debug("Doning %u\n", tmp->index); |
| amvdec_dst_buf_done(sess, tmp->vbuf, V4L2_FIELD_NONE); |
| tmp->done = 1; |
| vp9->frames_num--; |
| } |
| |
| if (codec_vp9_is_ref(vp9, tmp) || tmp == vp9->prev_frame) |
| continue; |
| |
| pr_debug("deleting %d\n", tmp->index); |
| list_del(&tmp->list); |
| kfree(tmp); |
| } |
| } |
| |
| static void vp9_tree_merge_probs(unsigned int *prev_prob, |
| unsigned int *cur_prob, |
| int coef_node_start, int tree_left, |
| int tree_right, |
| int tree_i, int node) |
| { |
| int prob_32, prob_res, prob_shift; |
| int pre_prob, new_prob; |
| int den, m_count, get_prob, factor; |
| |
| prob_32 = prev_prob[coef_node_start / 4 * 2]; |
| prob_res = coef_node_start & 3; |
| prob_shift = prob_res * 8; |
| pre_prob = (prob_32 >> prob_shift) & 0xff; |
| |
| den = tree_left + tree_right; |
| |
| if (den == 0) { |
| new_prob = pre_prob; |
| } else { |
| m_count = den < MODE_MV_COUNT_SAT ? den : MODE_MV_COUNT_SAT; |
| get_prob = |
| clip_prob(div_r32(((int64_t)tree_left * 256 + |
| (den >> 1)), |
| den)); |
| |
| /* weighted_prob */ |
| factor = count_to_update_factor[m_count]; |
| new_prob = round_power_of_two(pre_prob * (256 - factor) + |
| get_prob * factor, 8); |
| } |
| |
| cur_prob[coef_node_start / 4 * 2] = |
| (cur_prob[coef_node_start / 4 * 2] & (~(0xff << prob_shift))) | |
| (new_prob << prob_shift); |
| } |
| |
| static void adapt_coef_probs_cxt(unsigned int *prev_prob, |
| unsigned int *cur_prob, |
| unsigned int *count, |
| int update_factor, |
| int cxt_num, |
| int coef_cxt_start, |
| int coef_count_cxt_start) |
| { |
| int prob_32, prob_res, prob_shift; |
| int pre_prob, new_prob; |
| int num, den, m_count, get_prob, factor; |
| int node, coef_node_start; |
| int count_sat = 24; |
| int cxt; |
| |
| for (cxt = 0; cxt < cxt_num; cxt++) { |
| const int n0 = count[coef_count_cxt_start]; |
| const int n1 = count[coef_count_cxt_start + 1]; |
| const int n2 = count[coef_count_cxt_start + 2]; |
| const int neob = count[coef_count_cxt_start + 3]; |
| const int nneob = count[coef_count_cxt_start + 4]; |
| const unsigned int branch_ct[3][2] = { |
| { neob, nneob }, |
| { n0, n1 + n2 }, |
| { n1, n2 } |
| }; |
| |
| coef_node_start = coef_cxt_start; |
| for (node = 0 ; node < 3 ; node++) { |
| prob_32 = prev_prob[coef_node_start / 4 * 2]; |
| prob_res = coef_node_start & 3; |
| prob_shift = prob_res * 8; |
| pre_prob = (prob_32 >> prob_shift) & 0xff; |
| |
| /* get binary prob */ |
| num = branch_ct[node][0]; |
| den = branch_ct[node][0] + branch_ct[node][1]; |
| m_count = den < count_sat ? den : count_sat; |
| |
| get_prob = (den == 0) ? |
| 128u : |
| clip_prob(div_r32(((int64_t)num * 256 + |
| (den >> 1)), den)); |
| |
| factor = update_factor * m_count / count_sat; |
| new_prob = |
| round_power_of_two(pre_prob * (256 - factor) + |
| get_prob * factor, 8); |
| |
| cur_prob[coef_node_start / 4 * 2] = |
| (cur_prob[coef_node_start / 4 * 2] & |
| (~(0xff << prob_shift))) | |
| (new_prob << prob_shift); |
| |
| coef_node_start += 1; |
| } |
| |
| coef_cxt_start = coef_cxt_start + 3; |
| coef_count_cxt_start = coef_count_cxt_start + 5; |
| } |
| } |
| |
| static void adapt_coef_probs(int prev_kf, int cur_kf, int pre_fc, |
| unsigned int *prev_prob, unsigned int *cur_prob, |
| unsigned int *count) |
| { |
| int tx_size, coef_tx_size_start, coef_count_tx_size_start; |
| int plane, coef_plane_start, coef_count_plane_start; |
| int type, coef_type_start, coef_count_type_start; |
| int band, coef_band_start, coef_count_band_start; |
| int cxt_num; |
| int coef_cxt_start, coef_count_cxt_start; |
| int node, coef_node_start, coef_count_node_start; |
| |
| int tree_i, tree_left, tree_right; |
| int mvd_i; |
| |
| int update_factor = cur_kf ? 112 : (prev_kf ? 128 : 112); |
| |
| int prob_32; |
| int prob_res; |
| int prob_shift; |
| int pre_prob; |
| |
| int den; |
| int get_prob; |
| int m_count; |
| int factor; |
| |
| int new_prob; |
| |
| for (tx_size = 0 ; tx_size < 4 ; tx_size++) { |
| coef_tx_size_start = VP9_COEF_START + |
| tx_size * 4 * VP9_COEF_SIZE_ONE_SET; |
| coef_count_tx_size_start = VP9_COEF_COUNT_START + |
| tx_size * 4 * VP9_COEF_COUNT_SIZE_ONE_SET; |
| coef_plane_start = coef_tx_size_start; |
| coef_count_plane_start = coef_count_tx_size_start; |
| |
| for (plane = 0 ; plane < 2 ; plane++) { |
| coef_type_start = coef_plane_start; |
| coef_count_type_start = coef_count_plane_start; |
| |
| for (type = 0 ; type < 2 ; type++) { |
| coef_band_start = coef_type_start; |
| coef_count_band_start = coef_count_type_start; |
| |
| for (band = 0 ; band < 6 ; band++) { |
| if (band == 0) |
| cxt_num = 3; |
| else |
| cxt_num = 6; |
| coef_cxt_start = coef_band_start; |
| coef_count_cxt_start = |
| coef_count_band_start; |
| |
| adapt_coef_probs_cxt(prev_prob, |
| cur_prob, |
| count, |
| update_factor, |
| cxt_num, |
| coef_cxt_start, |
| coef_count_cxt_start); |
| |
| if (band == 0) { |
| coef_band_start += 10; |
| coef_count_band_start += 15; |
| } else { |
| coef_band_start += 18; |
| coef_count_band_start += 30; |
| } |
| } |
| coef_type_start += VP9_COEF_SIZE_ONE_SET; |
| coef_count_type_start += |
| VP9_COEF_COUNT_SIZE_ONE_SET; |
| } |
| |
| coef_plane_start += 2 * VP9_COEF_SIZE_ONE_SET; |
| coef_count_plane_start += |
| 2 * VP9_COEF_COUNT_SIZE_ONE_SET; |
| } |
| } |
| |
| if (cur_kf == 0) { |
| /* mode_mv_merge_probs - merge_intra_inter_prob */ |
| for (coef_count_node_start = VP9_INTRA_INTER_COUNT_START; |
| coef_count_node_start < (VP9_MV_CLASS0_HP_1_COUNT_START + |
| VP9_MV_CLASS0_HP_1_COUNT_SIZE); |
| coef_count_node_start += 2) { |
| if (coef_count_node_start == |
| VP9_INTRA_INTER_COUNT_START) |
| coef_node_start = VP9_INTRA_INTER_START; |
| else if (coef_count_node_start == |
| VP9_COMP_INTER_COUNT_START) |
| coef_node_start = VP9_COMP_INTER_START; |
| else if (coef_count_node_start == |
| VP9_TX_MODE_COUNT_START) |
| coef_node_start = VP9_TX_MODE_START; |
| else if (coef_count_node_start == |
| VP9_SKIP_COUNT_START) |
| coef_node_start = VP9_SKIP_START; |
| else if (coef_count_node_start == |
| VP9_MV_SIGN_0_COUNT_START) |
| coef_node_start = VP9_MV_SIGN_0_START; |
| else if (coef_count_node_start == |
| VP9_MV_SIGN_1_COUNT_START) |
| coef_node_start = VP9_MV_SIGN_1_START; |
| else if (coef_count_node_start == |
| VP9_MV_BITS_0_COUNT_START) |
| coef_node_start = VP9_MV_BITS_0_START; |
| else if (coef_count_node_start == |
| VP9_MV_BITS_1_COUNT_START) |
| coef_node_start = VP9_MV_BITS_1_START; |
| else if (coef_count_node_start == |
| VP9_MV_CLASS0_HP_0_COUNT_START) |
| coef_node_start = VP9_MV_CLASS0_HP_0_START; |
| |
| den = count[coef_count_node_start] + |
| count[coef_count_node_start + 1]; |
| |
| prob_32 = prev_prob[coef_node_start / 4 * 2]; |
| prob_res = coef_node_start & 3; |
| prob_shift = prob_res * 8; |
| pre_prob = (prob_32 >> prob_shift) & 0xff; |
| |
| if (den == 0) { |
| new_prob = pre_prob; |
| } else { |
| m_count = den < MODE_MV_COUNT_SAT ? |
| den : MODE_MV_COUNT_SAT; |
| get_prob = |
| clip_prob(div_r32(((int64_t) |
| count[coef_count_node_start] * 256 + |
| (den >> 1)), |
| den)); |
| |
| /* weighted prob */ |
| factor = count_to_update_factor[m_count]; |
| new_prob = |
| round_power_of_two(pre_prob * |
| (256 - factor) + |
| get_prob * factor, |
| 8); |
| } |
| |
| cur_prob[coef_node_start / 4 * 2] = |
| (cur_prob[coef_node_start / 4 * 2] & |
| (~(0xff << prob_shift))) | |
| (new_prob << prob_shift); |
| |
| coef_node_start = coef_node_start + 1; |
| } |
| |
| coef_node_start = VP9_INTER_MODE_START; |
| coef_count_node_start = VP9_INTER_MODE_COUNT_START; |
| for (tree_i = 0 ; tree_i < 7 ; tree_i++) { |
| for (node = 0 ; node < 3 ; node++) { |
| unsigned int start = coef_count_node_start; |
| |
| switch (node) { |
| case 2: |
| tree_left = count[start + 1]; |
| tree_right = count[start + 3]; |
| break; |
| case 1: |
| tree_left = count[start + 0]; |
| tree_right = count[start + 1] + |
| count[start + 3]; |
| break; |
| default: |
| tree_left = count[start + 2]; |
| tree_right = count[start + 0] + |
| count[start + 1] + |
| count[start + 3]; |
| break; |
| } |
| |
| vp9_tree_merge_probs(prev_prob, cur_prob, |
| coef_node_start, |
| tree_left, tree_right, |
| tree_i, node); |
| |
| coef_node_start = coef_node_start + 1; |
| } |
| |
| coef_count_node_start = coef_count_node_start + 4; |
| } |
| |
| coef_node_start = VP9_IF_Y_MODE_START; |
| coef_count_node_start = VP9_IF_Y_MODE_COUNT_START; |
| for (tree_i = 0 ; tree_i < 14 ; tree_i++) { |
| for (node = 0 ; node < 9 ; node++) { |
| unsigned int start = coef_count_node_start; |
| |
| switch (node) { |
| case 8: |
| tree_left = |
| count[start + D153_PRED]; |
| tree_right = |
| count[start + D207_PRED]; |
| break; |
| case 7: |
| tree_left = |
| count[start + D63_PRED]; |
| tree_right = |
| count[start + D207_PRED] + |
| count[start + D153_PRED]; |
| break; |
| case 6: |
| tree_left = |
| count[start + D45_PRED]; |
| tree_right = |
| count[start + D207_PRED] + |
| count[start + D153_PRED] + |
| count[start + D63_PRED]; |
| break; |
| case 5: |
| tree_left = |
| count[start + D135_PRED]; |
| tree_right = |
| count[start + D117_PRED]; |
| break; |
| case 4: |
| tree_left = |
| count[start + H_PRED]; |
| tree_right = |
| count[start + D117_PRED] + |
| count[start + D135_PRED]; |
| break; |
| case 3: |
| tree_left = |
| count[start + H_PRED] + |
| count[start + D117_PRED] + |
| count[start + D135_PRED]; |
| tree_right = |
| count[start + D45_PRED] + |
| count[start + D207_PRED] + |
| count[start + D153_PRED] + |
| count[start + D63_PRED]; |
| break; |
| case 2: |
| tree_left = |
| count[start + V_PRED]; |
| tree_right = |
| count[start + H_PRED] + |
| count[start + D117_PRED] + |
| count[start + D135_PRED] + |
| count[start + D45_PRED] + |
| count[start + D207_PRED] + |
| count[start + D153_PRED] + |
| count[start + D63_PRED]; |
| break; |
| case 1: |
| tree_left = |
| count[start + TM_PRED]; |
| tree_right = |
| count[start + V_PRED] + |
| count[start + H_PRED] + |
| count[start + D117_PRED] + |
| count[start + D135_PRED] + |
| count[start + D45_PRED] + |
| count[start + D207_PRED] + |
| count[start + D153_PRED] + |
| count[start + D63_PRED]; |
| break; |
| default: |
| tree_left = |
| count[start + DC_PRED]; |
| tree_right = |
| count[start + TM_PRED] + |
| count[start + V_PRED] + |
| count[start + H_PRED] + |
| count[start + D117_PRED] + |
| count[start + D135_PRED] + |
| count[start + D45_PRED] + |
| count[start + D207_PRED] + |
| count[start + D153_PRED] + |
| count[start + D63_PRED]; |
| break; |
| } |
| |
| vp9_tree_merge_probs(prev_prob, cur_prob, |
| coef_node_start, |
| tree_left, tree_right, |
| tree_i, node); |
| |
| coef_node_start = coef_node_start + 1; |
| } |
| coef_count_node_start = coef_count_node_start + 10; |
| } |
| |
| coef_node_start = VP9_PARTITION_P_START; |
| coef_count_node_start = VP9_PARTITION_P_COUNT_START; |
| for (tree_i = 0 ; tree_i < 16 ; tree_i++) { |
| for (node = 0 ; node < 3 ; node++) { |
| unsigned int start = coef_count_node_start; |
| |
| switch (node) { |
| case 2: |
| tree_left = count[start + 2]; |
| tree_right = count[start + 3]; |
| break; |
| case 1: |
| tree_left = count[start + 1]; |
| tree_right = count[start + 2] + |
| count[start + 3]; |
| break; |
| default: |
| tree_left = count[start + 0]; |
| tree_right = count[start + 1] + |
| count[start + 2] + |
| count[start + 3]; |
| break; |
| } |
| |
| vp9_tree_merge_probs(prev_prob, cur_prob, |
| coef_node_start, |
| tree_left, tree_right, |
| tree_i, node); |
| |
| coef_node_start = coef_node_start + 1; |
| } |
| |
| coef_count_node_start = coef_count_node_start + 4; |
| } |
| |
| coef_node_start = VP9_INTERP_START; |
| coef_count_node_start = VP9_INTERP_COUNT_START; |
| for (tree_i = 0 ; tree_i < 4 ; tree_i++) { |
| for (node = 0 ; node < 2 ; node++) { |
| unsigned int start = coef_count_node_start; |
| |
| switch (node) { |
| case 1: |
| tree_left = count[start + 1]; |
| tree_right = count[start + 2]; |
| break; |
| default: |
| tree_left = count[start + 0]; |
| tree_right = count[start + 1] + |
| count[start + 2]; |
| break; |
| } |
| |
| vp9_tree_merge_probs(prev_prob, cur_prob, |
| coef_node_start, |
| tree_left, tree_right, |
| tree_i, node); |
| |
| coef_node_start = coef_node_start + 1; |
| } |
| coef_count_node_start = coef_count_node_start + 3; |
| } |
| |
| coef_node_start = VP9_MV_JOINTS_START; |
| coef_count_node_start = VP9_MV_JOINTS_COUNT_START; |
| for (tree_i = 0 ; tree_i < 1 ; tree_i++) { |
| for (node = 0 ; node < 3 ; node++) { |
| unsigned int start = coef_count_node_start; |
| |
| switch (node) { |
| case 2: |
| tree_left = count[start + 2]; |
| tree_right = count[start + 3]; |
| break; |
| case 1: |
| tree_left = count[start + 1]; |
| tree_right = count[start + 2] + |
| count[start + 3]; |
| break; |
| default: |
| tree_left = count[start + 0]; |
| tree_right = count[start + 1] + |
| count[start + 2] + |
| count[start + 3]; |
| break; |
| } |
| |
| vp9_tree_merge_probs(prev_prob, cur_prob, |
| coef_node_start, |
| tree_left, tree_right, |
| tree_i, node); |
| |
| coef_node_start = coef_node_start + 1; |
| } |
| coef_count_node_start = coef_count_node_start + 4; |
| } |
| |
| for (mvd_i = 0 ; mvd_i < 2 ; mvd_i++) { |
| coef_node_start = mvd_i ? VP9_MV_CLASSES_1_START : |
| VP9_MV_CLASSES_0_START; |
| coef_count_node_start = mvd_i ? |
| VP9_MV_CLASSES_1_COUNT_START : |
| VP9_MV_CLASSES_0_COUNT_START; |
| tree_i = 0; |
| for (node = 0; node < 10; node++) { |
| unsigned int start = coef_count_node_start; |
| |
| switch (node) { |
| case 9: |
| tree_left = count[start + 9]; |
| tree_right = count[start + 10]; |
| break; |
| case 8: |
| tree_left = count[start + 7]; |
| tree_right = count[start + 8]; |
| break; |
| case 7: |
| tree_left = count[start + 7] + |
| count[start + 8]; |
| tree_right = count[start + 9] + |
| count[start + 10]; |
| break; |
| case 6: |
| tree_left = count[start + 6]; |
| tree_right = count[start + 7] + |
| count[start + 8] + |
| count[start + 9] + |
| count[start + 10]; |
| break; |
| case 5: |
| tree_left = count[start + 4]; |
| tree_right = count[start + 5]; |
| break; |
| case 4: |
| tree_left = count[start + 4] + |
| count[start + 5]; |
| tree_right = count[start + 6] + |
| count[start + 7] + |
| count[start + 8] + |
| count[start + 9] + |
| count[start + 10]; |
| break; |
| case 3: |
| tree_left = count[start + 2]; |
| tree_right = count[start + 3]; |
| break; |
| case 2: |
| tree_left = count[start + 2] + |
| count[start + 3]; |
| tree_right = count[start + 4] + |
| count[start + 5] + |
| count[start + 6] + |
| count[start + 7] + |
| count[start + 8] + |
| count[start + 9] + |
| count[start + 10]; |
| break; |
| case 1: |
| tree_left = count[start + 1]; |
| tree_right = count[start + 2] + |
| count[start + 3] + |
| count[start + 4] + |
| count[start + 5] + |
| count[start + 6] + |
| count[start + 7] + |
| count[start + 8] + |
| count[start + 9] + |
| count[start + 10]; |
| break; |
| default: |
| tree_left = count[start + 0]; |
| tree_right = count[start + 1] + |
| count[start + 2] + |
| count[start + 3] + |
| count[start + 4] + |
| count[start + 5] + |
| count[start + 6] + |
| count[start + 7] + |
| count[start + 8] + |
| count[start + 9] + |
| count[start + 10]; |
| break; |
| } |
| |
| vp9_tree_merge_probs(prev_prob, cur_prob, |
| coef_node_start, |
| tree_left, tree_right, |
| tree_i, node); |
| |
| coef_node_start = coef_node_start + 1; |
| } |
| |
| coef_node_start = mvd_i ? VP9_MV_CLASS0_1_START : |
| VP9_MV_CLASS0_0_START; |
| coef_count_node_start = mvd_i ? |
| VP9_MV_CLASS0_1_COUNT_START : |
| VP9_MV_CLASS0_0_COUNT_START; |
| tree_i = 0; |
| node = 0; |
| tree_left = count[coef_count_node_start + 0]; |
| tree_right = count[coef_count_node_start + 1]; |
| |
| vp9_tree_merge_probs(prev_prob, cur_prob, |
| coef_node_start, |
| tree_left, tree_right, |
| tree_i, node); |
| coef_node_start = mvd_i ? VP9_MV_CLASS0_FP_1_START : |
| VP9_MV_CLASS0_FP_0_START; |
| coef_count_node_start = mvd_i ? |
| VP9_MV_CLASS0_FP_1_COUNT_START : |
| VP9_MV_CLASS0_FP_0_COUNT_START; |
| |
| for (tree_i = 0; tree_i < 3; tree_i++) { |
| for (node = 0; node < 3; node++) { |
| unsigned int start = |
| coef_count_node_start; |
| switch (node) { |
| case 2: |
| tree_left = count[start + 2]; |
| tree_right = count[start + 3]; |
| break; |
| case 1: |
| tree_left = count[start + 1]; |
| tree_right = count[start + 2] + |
| count[start + 3]; |
| break; |
| default: |
| tree_left = count[start + 0]; |
| tree_right = count[start + 1] + |
| count[start + 2] + |
| count[start + 3]; |
| break; |
| } |
| |
| vp9_tree_merge_probs(prev_prob, |
| cur_prob, |
| coef_node_start, |
| tree_left, |
| tree_right, |
| tree_i, node); |
| |
| coef_node_start = coef_node_start + 1; |
| } |
| coef_count_node_start = |
| coef_count_node_start + 4; |
| } |
| } |
| } |
| } |
| |
| static irqreturn_t codec_vp9_threaded_isr(struct amvdec_session *sess) |
| { |
| struct amvdec_core *core = sess->core; |
| struct codec_vp9 *vp9 = sess->priv; |
| u32 dec_status = amvdec_read_dos(core, VP9_DEC_STATUS_REG); |
| u32 prob_status = amvdec_read_dos(core, VP9_ADAPT_PROB_REG); |
| int i; |
| |
| if (!vp9) |
| return IRQ_HANDLED; |
| |
| mutex_lock(&vp9->lock); |
| if (dec_status != VP9_HEAD_PARSER_DONE) { |
| dev_err(core->dev_dec, "Unrecognized dec_status: %08X\n", |
| dec_status); |
| amvdec_abort(sess); |
| goto unlock; |
| } |
| |
| pr_debug("ISR: %08X;%08X\n", dec_status, prob_status); |
| sess->keyframe_found = 1; |
| |
| if ((prob_status & 0xff) == 0xfd && vp9->cur_frame) { |
| /* VP9_REQ_ADAPT_PROB */ |
| u8 *prev_prob_b = ((u8 *)vp9->workspace_vaddr + |
| PROB_OFFSET) + |
| ((prob_status >> 8) * 0x1000); |
| u8 *cur_prob_b = ((u8 *)vp9->workspace_vaddr + |
| PROB_OFFSET) + 0x4000; |
| u8 *count_b = (u8 *)vp9->workspace_vaddr + |
| COUNT_OFFSET; |
| int last_frame_type = vp9->prev_frame ? |
| vp9->prev_frame->type : |
| KEY_FRAME; |
| |
| adapt_coef_probs(last_frame_type == KEY_FRAME, |
| vp9->cur_frame->type == KEY_FRAME ? 1 : 0, |
| prob_status >> 8, |
| (unsigned int *)prev_prob_b, |
| (unsigned int *)cur_prob_b, |
| (unsigned int *)count_b); |
| |
| memcpy(prev_prob_b, cur_prob_b, ADAPT_PROB_SIZE); |
| amvdec_write_dos(core, VP9_ADAPT_PROB_REG, 0); |
| } |
| |
| /* Invalidate first 3 refs */ |
| for (i = 0; i < REFS_PER_FRAME ; ++i) |
| vp9->frame_refs[i] = NULL; |
| |
| vp9->prev_frame = vp9->cur_frame; |
| codec_vp9_update_ref(vp9); |
| |
| codec_vp9_fetch_rpm(sess); |
| if (codec_vp9_process_rpm(vp9)) { |
| amvdec_src_change(sess, vp9->width, vp9->height, 16); |
| |
| /* No frame is actually processed */ |
| vp9->cur_frame = NULL; |
| |
| /* Show the remaining frame */ |
| codec_vp9_show_frame(sess); |
| |
| /* FIXME: Save refs for resized frame */ |
| if (vp9->frames_num) |
| codec_vp9_save_refs(vp9); |
| |
| goto unlock; |
| } |
| |
| codec_vp9_process_lf(vp9); |
| codec_vp9_process_frame(sess); |
| codec_vp9_show_frame(sess); |
| |
| unlock: |
| mutex_unlock(&vp9->lock); |
| return IRQ_HANDLED; |
| } |
| |
| static irqreturn_t codec_vp9_isr(struct amvdec_session *sess) |
| { |
| return IRQ_WAKE_THREAD; |
| } |
| |
| struct amvdec_codec_ops codec_vp9_ops = { |
| .start = codec_vp9_start, |
| .stop = codec_vp9_stop, |
| .isr = codec_vp9_isr, |
| .threaded_isr = codec_vp9_threaded_isr, |
| .num_pending_bufs = codec_vp9_num_pending_bufs, |
| .drain = codec_vp9_flush_output, |
| .resume = codec_vp9_resume, |
| }; |