• R/O
  • HTTP
  • SSH
  • HTTPS

Commit

Tags
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

hardware/intel/intel-driver


Commit MetaInfo

Revisão890f538f62707ec07a6accdb65bafcaffc941bb1 (tree)
Hora2015-04-17 17:14:10
AutorQu,Pengfei <Pengfei.Qu@inte...>
CommiterXiang, Haihao

Mensagem de Log

HEVC enc:Added 4K&2K support; Added Profile&Level 5.1 above

Signed-off-by: Qu,Pengfei <Pengfei.Qu@intel.com>

Mudança Sumário

Diff

--- a/src/gen9_mfc_hevc.c
+++ b/src/gen9_mfc_hevc.c
@@ -1146,20 +1146,18 @@ gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP ctx,
11461146 int qp, unsigned int *msg,
11471147 int ctb_x, int ctb_y,
11481148 int mb_x, int mb_y,
1149- int ctb_width_in_mb, int width_in_ctb, int num_cu_record, int slice_type)
1149+ int ctb_width_in_mb, int width_in_ctb, int num_cu_record, int slice_type,int cu_index,int index)
11501150 {
11511151 /* here cu == mb, so we use mb address as the cu address */
11521152 /* to fill the indirect cu by the vme out */
1153- static int mb_addr_raster_to_zigzag_64[4][4] = { {0, 1, 4, 5}, {2, 3, 6, 7}, {8, 9, 12, 13}, {10, 11, 14, 15} };
1154- static int mb_addr_raster_to_zigzag_32[2][2] = { {0, 1}, {2, 3 } };
11551153 static int intra_mode_8x8_avc2hevc[9] = {26, 10, 1, 34, 18, 24, 13, 28, 8};
11561154 static int intra_mode_16x16_avc2hevc[4] = {26, 10, 1, 34};
11571155 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
11581156 unsigned char * cu_record_ptr = NULL;
11591157 unsigned int * cu_msg = NULL;
11601158 int ctb_address = (ctb_y * width_in_ctb + ctb_x) * num_cu_record;
1161- int mb_address_in_ctb = ((ctb_width_in_mb == 4) ? mb_addr_raster_to_zigzag_64[mb_x][mb_y] : ((ctb_width_in_mb == 2) ? mb_addr_raster_to_zigzag_32[mb_x][mb_y] : 0));
1162- int cu_address = (ctb_address + mb_address_in_ctb) * 16 * 4;
1159+ int mb_address_in_ctb = 0;
1160+ int cu_address = (ctb_address + mb_address_in_ctb + cu_index) * 16 * 4;
11631161 int zero = 0;
11641162 int is_inter = 0;
11651163 int intraMbMode = 0;
@@ -1167,6 +1165,9 @@ gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP ctx,
11671165 int intraMode[4];
11681166 int inerpred_idc = 0;
11691167 int intra_chroma_mode = 5;
1168+ int cu_size = 1;
1169+ int tu_size = 0x55;
1170+ int tu_count = 4;
11701171
11711172 if (!is_inter) inerpred_idc = 0xff;
11721173
@@ -1176,29 +1177,37 @@ gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP ctx,
11761177 if (intraMbMode == AVC_INTRA_16X16) {
11771178 cu_part_mode = 0; //2Nx2N
11781179 intra_chroma_mode = 5;
1180+ cu_size = 1;
1181+ tu_size = 0x55;
1182+ tu_count = 4;
11791183 intraMode[0] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
11801184 intraMode[1] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
11811185 intraMode[2] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
11821186 intraMode[3] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
11831187 } else if (intraMbMode == AVC_INTRA_8X8) {
1184- cu_part_mode = 3; //NxN
1185- intra_chroma_mode = 0;
1186- intraMode[0] = intra_mode_8x8_avc2hevc[msg[1] & 0xf];
1187- intraMode[1] = intra_mode_8x8_avc2hevc[(msg[1] >> 4) & 0xf];
1188- intraMode[2] = intra_mode_8x8_avc2hevc[(msg[1] >> 8) & 0xf];
1189- intraMode[3] = intra_mode_8x8_avc2hevc[(msg[1] >> 12) & 0xf];
1188+ cu_part_mode = 0; //2Nx2N
1189+ intra_chroma_mode = 5;
1190+ cu_size = 0;
1191+ tu_size = 0;
1192+ tu_count = 4;
1193+ intraMode[0] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1194+ intraMode[1] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1195+ intraMode[2] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1196+ intraMode[3] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
11901197
11911198 } else { // for 4x4 to use 8x8 replace
11921199 cu_part_mode = 3; //NxN
11931200 intra_chroma_mode = 0;
1194- intraMode[0] = intra_mode_8x8_avc2hevc[0];
1195- intraMode[1] = intra_mode_8x8_avc2hevc[0];
1196- intraMode[2] = intra_mode_8x8_avc2hevc[0];
1197- intraMode[3] = intra_mode_8x8_avc2hevc[0];
1201+ cu_size = 0;
1202+ tu_size = 0;
1203+ tu_count = 4;
1204+ intraMode[0] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 0) & 0xf];
1205+ intraMode[1] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 4) & 0xf];
1206+ intraMode[2] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 8) & 0xf];
1207+ intraMode[3] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 12) & 0xf];
11981208
11991209 }
12001210
1201- dri_bo_map(mfc_context->hcp_indirect_cu_object.bo , 1);
12021211 cu_record_ptr = (unsigned char *)mfc_context->hcp_indirect_cu_object.bo->virtual;
12031212 /* get the mb info from the vme out */
12041213 cu_msg = (unsigned int *)(cu_record_ptr + cu_address);
@@ -1212,7 +1221,7 @@ gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP ctx,
12121221 cu_part_mode << 4 | /* cu_part_mode */
12131222 zero << 3 | /* cu_transquant_bypass_flag */
12141223 is_inter << 2 | /* cu_pred_mode :intra 1,inter 1*/
1215- 1 /* cu_size */
1224+ cu_size /* cu_size */
12161225 );
12171226 cu_msg[1] = (zero << 30 | /* reserved */
12181227 intraMode[3] << 24 | /* intra_mode */
@@ -1260,8 +1269,8 @@ gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP ctx,
12601269 zero /* ref_idx_l0[0] */
12611270 );
12621271
1263- cu_msg[11] = 0x55; /* tu_size 00000000 00000000 00000000 10101010 or 0x0*/
1264- cu_msg[12] = (3 << 28 | /* tu count - 1 */
1272+ cu_msg[11] = tu_size; /* tu_size 00000000 00000000 00000000 10101010 or 0x0*/
1273+ cu_msg[12] = ((tu_count - 1) << 28 | /* tu count - 1 */
12651274 zero << 16 | /* reserved */
12661275 zero /* tu_xform_Yskip[15:0] */
12671276 );
@@ -1270,9 +1279,6 @@ gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP ctx,
12701279 );
12711280 cu_msg[14] = zero ;
12721281 cu_msg[15] = zero ;
1273-
1274- dri_bo_unmap(mfc_context->hcp_indirect_cu_object.bo);
1275-
12761282 }
12771283
12781284 /* here 1 MB = 1CU = 16x16 */
@@ -1283,24 +1289,24 @@ gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP ctx,
12831289 int qp, unsigned int *msg,
12841290 int ctb_x, int ctb_y,
12851291 int mb_x, int mb_y,
1286- int ctb_width_in_mb, int width_in_ctb, int num_cu_record, int slice_type)
1292+ int ctb_width_in_mb, int width_in_ctb, int num_cu_record, int slice_type, int cu_index,int index)
12871293 {
12881294 /* here cu == mb, so we use mb address as the cu address */
12891295 /* to fill the indirect cu by the vme out */
1290- static int mb_addr_raster_to_zigzag_64[4][4] = { {0, 1, 4, 5}, {2, 3, 6, 7}, {8, 9, 12, 13}, {10, 11, 14, 15} };
1291- static int mb_addr_raster_to_zigzag_32[2][2] = { {0, 1}, {2, 3 } };
1292-
12931296 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
12941297 struct gen6_vme_context *vme_context = encoder_context->vme_context;
12951298 unsigned char * cu_record_ptr = NULL;
12961299 unsigned int * cu_msg = NULL;
12971300 int ctb_address = (ctb_y * width_in_ctb + ctb_x) * num_cu_record;
1298- int mb_address_in_ctb = ((ctb_width_in_mb == 4) ? mb_addr_raster_to_zigzag_64[mb_x][mb_y] : ((ctb_width_in_mb == 2) ? mb_addr_raster_to_zigzag_32[mb_x][mb_y] : 0));
1299- int cu_address = (ctb_address + mb_address_in_ctb) * 16 * 4;
1301+ int mb_address_in_ctb = 0;
1302+ int cu_address = (ctb_address + mb_address_in_ctb + cu_index) * 16 * 4;
13001303 int zero = 0;
13011304 int cu_part_mode = 0;
13021305 int submb_pre_mode = 0;
13031306 int is_inter = 1;
1307+ int cu_size = 1;
1308+ int tu_size = 0x55;
1309+ int tu_count = 4;
13041310
13051311 unsigned int *mv_ptr;
13061312 {
@@ -1314,7 +1320,6 @@ gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP ctx,
13141320 /* 0/2/4/6/8... : l0, 1/3/5/7...: l1 ; now it only support 16x16,16x8,8x16,8x8*/
13151321
13161322 if ((msg[0] & AVC_INTER_MODE_MASK) == AVC_INTER_16X16) {
1317- // MV[0] and MV[2] are replicated
13181323 mv_ptr[4] = mv_ptr[0];
13191324 mv_ptr[5] = mv_ptr[1];
13201325 mv_ptr[2] = mv_ptr[0];
@@ -1322,8 +1327,10 @@ gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP ctx,
13221327 mv_ptr[6] = mv_ptr[0];
13231328 mv_ptr[7] = mv_ptr[1];
13241329 cu_part_mode = 0;
1330+ cu_size = 1;
1331+ tu_size = 0x55;
1332+ tu_count = 4;
13251333 } else if ((msg[0] & AVC_INTER_MODE_MASK) == AVC_INTER_8X16) {
1326- // MV[0] and MV[2] are replicated
13271334 mv_ptr[4] = mv_ptr[0];
13281335 mv_ptr[5] = mv_ptr[1];
13291336 mv_ptr[2] = mv_ptr[8];
@@ -1331,8 +1338,10 @@ gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP ctx,
13311338 mv_ptr[6] = mv_ptr[8];
13321339 mv_ptr[7] = mv_ptr[9];
13331340 cu_part_mode = 1;
1341+ cu_size = 1;
1342+ tu_size = 0x55;
1343+ tu_count = 4;
13341344 } else if ((msg[0] & AVC_INTER_MODE_MASK) == AVC_INTER_16X8) {
1335- // MV[0] and MV[1] are replicated
13361345 mv_ptr[2] = mv_ptr[0];
13371346 mv_ptr[3] = mv_ptr[1];
13381347 mv_ptr[4] = mv_ptr[16];
@@ -1340,30 +1349,39 @@ gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP ctx,
13401349 mv_ptr[6] = mv_ptr[24];
13411350 mv_ptr[7] = mv_ptr[25];
13421351 cu_part_mode = 2;
1343- } else if (((msg[0] & AVC_INTER_MODE_MASK) == AVC_INTER_8X8) &&
1344- !(msg[1] & SUBMB_SHAPE_MASK)) {
1345- // Don't touch MV[0] or MV[1]
1346- mv_ptr[2] = mv_ptr[8];
1347- mv_ptr[3] = mv_ptr[9];
1348- mv_ptr[4] = mv_ptr[16];
1349- mv_ptr[5] = mv_ptr[17];
1350- mv_ptr[6] = mv_ptr[24];
1351- mv_ptr[7] = mv_ptr[25];
1352- cu_part_mode = 3;
1353- } else {
1354- // Don't touch MV[0] or MV[1]
1355- // default use 8x8
1356- mv_ptr[2] = mv_ptr[8];
1357- mv_ptr[3] = mv_ptr[9];
1358- mv_ptr[4] = mv_ptr[16];
1359- mv_ptr[5] = mv_ptr[17];
1360- mv_ptr[6] = mv_ptr[24];
1361- mv_ptr[7] = mv_ptr[25];
1362- cu_part_mode = 3;
1352+ cu_size = 1;
1353+ tu_size = 0x55;
1354+ tu_count = 4;
1355+ }else if((msg[0] & AVC_INTER_MODE_MASK) == AVC_INTER_8X8) {
1356+ mv_ptr[0] = mv_ptr[index * 8 + 0 ];
1357+ mv_ptr[1] = mv_ptr[index * 8 + 1 ];
1358+ mv_ptr[2] = mv_ptr[index * 8 + 0 ];
1359+ mv_ptr[3] = mv_ptr[index * 8 + 1 ];
1360+ mv_ptr[4] = mv_ptr[index * 8 + 0 ];
1361+ mv_ptr[5] = mv_ptr[index * 8 + 1 ];
1362+ mv_ptr[6] = mv_ptr[index * 8 + 0 ];
1363+ mv_ptr[7] = mv_ptr[index * 8 + 1 ];
1364+ cu_part_mode = 0;
1365+ cu_size = 0;
1366+ tu_size = 0x0;
1367+ tu_count = 4;
1368+
1369+ }else
1370+ {
1371+ mv_ptr[4] = mv_ptr[0];
1372+ mv_ptr[5] = mv_ptr[1];
1373+ mv_ptr[2] = mv_ptr[0];
1374+ mv_ptr[3] = mv_ptr[1];
1375+ mv_ptr[6] = mv_ptr[0];
1376+ mv_ptr[7] = mv_ptr[1];
1377+ cu_part_mode = 0;
1378+ cu_size = 1;
1379+ tu_size = 0x55;
1380+ tu_count = 4;
1381+
13631382 }
13641383 }
13651384
1366- dri_bo_map(mfc_context->hcp_indirect_cu_object.bo , 1);
13671385 cu_record_ptr = (unsigned char *)mfc_context->hcp_indirect_cu_object.bo->virtual;
13681386 /* get the mb info from the vme out */
13691387 cu_msg = (unsigned int *)(cu_record_ptr + cu_address);
@@ -1377,7 +1395,7 @@ gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP ctx,
13771395 cu_part_mode << 4 | /* cu_part_mode */
13781396 zero << 3 | /* cu_transquant_bypass_flag */
13791397 is_inter << 2 | /* cu_pred_mode :intra 1,inter 1*/
1380- 1 /* cu_size */
1398+ cu_size /* cu_size */
13811399 );
13821400 cu_msg[1] = (zero << 30 | /* reserved */
13831401 zero << 24 | /* intra_mode */
@@ -1425,8 +1443,8 @@ gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP ctx,
14251443 ((vme_context->ref_index_in_mb[0] >> 0) & 0xf) /* ref_idx_l0[0] */
14261444 );
14271445
1428- cu_msg[11] = 0x55; /* tu_size 00000000 00000000 00000000 10101010 or 0x0*/
1429- cu_msg[12] = (3 << 28 | /* tu count - 1 */
1446+ cu_msg[11] = tu_size; /* tu_size 00000000 00000000 00000000 10101010 or 0x0*/
1447+ cu_msg[12] = ((tu_count - 1) << 28 | /* tu count - 1 */
14301448 zero << 16 | /* reserved */
14311449 zero /* tu_xform_Yskip[15:0] */
14321450 );
@@ -1435,105 +1453,12 @@ gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP ctx,
14351453 );
14361454 cu_msg[14] = zero ;
14371455 cu_msg[15] = zero ;
1438-
1439- dri_bo_unmap(mfc_context->hcp_indirect_cu_object.bo);
1440-
1441-}
1442-
1443-static void
1444-gen9_hcpe_hevc_vmeout_to_indirect_cu_buffer(VADriverContextP ctx,
1445- struct encode_state *encode_state,
1446- struct intel_encoder_context *encoder_context,
1447- int slice_index)
1448-{
1449- /* to do */
1450- /* to fill the indirect cu by the vme out */
1451- struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1452- struct gen6_vme_context *vme_context = encoder_context->vme_context;
1453- VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1454- VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1455- VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
1456- unsigned int *msg = NULL;
1457- unsigned char *msg_ptr = NULL;
1458- int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1459- unsigned int rate_control_mode = encoder_context->rate_control_mode;
1460-
1461- int slice_type = pSliceParameter->slice_type;
1462- int is_intra = slice_type == HEVC_SLICE_I;
1463-
1464- int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1465- int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1466- int ctb_size = 1 << log2_ctb_size;
1467- int ctb_width_in_mb = (ctb_size + 15) / 16;
1468- int num_mb_in_ctb = ctb_width_in_mb * ctb_width_in_mb;
1469-
1470- int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
1471-
1472- int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1473-
1474- int num_cu_record = 64;
1475-
1476- if (log2_ctb_size == 5) num_cu_record = 16;
1477- else if (log2_ctb_size == 4) num_cu_record = 4;
1478- else if (log2_ctb_size == 6) num_cu_record = 64;
1479-
1480- int i_ctb;
1481- int ctb_x, ctb_y;
1482-
1483- int macroblock_address = 0;
1484-
1485- if (rate_control_mode == VA_RC_CBR) {
1486- qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1487- pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1488- }
1489-
1490- dri_bo_map(vme_context->vme_output.bo , 1);
1491- msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1492-
1493- for (i_ctb = pSliceParameter->slice_segment_address; i_ctb < pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice; i_ctb++) {
1494- ctb_x = i_ctb % width_in_ctb;
1495- ctb_y = i_ctb / width_in_ctb;
1496-
1497- int mb_x, mb_y;
1498- int mb_addr = 0;
1499- macroblock_address = (i_ctb - ctb_x) * num_mb_in_ctb + ctb_x * ctb_width_in_mb;
1500- for (mb_y = 0; mb_y < ctb_width_in_mb; mb_y++) {
1501- mb_addr = macroblock_address + mb_y * width_in_mbs ;
1502- for (mb_x = 0; mb_x < ctb_width_in_mb; mb_x++) {
1503- mb_addr++;
1504-
1505- /* get the mb info from the vme out */
1506- msg = (unsigned int *)(msg_ptr + mb_addr * vme_context->vme_output.size_block);
1507-
1508- /*fill to indirect cu */
1509- /*to do */
1510- if (is_intra) {
1511- /* fill intra cu */
1512- gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type);
1513- } else {
1514- int inter_rdo, intra_rdo;
1515- inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1516- intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1517- if (intra_rdo < inter_rdo) {
1518- /* fill intra cu */
1519- gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type);
1520- } else {
1521- msg += AVC_INTER_MSG_OFFSET;
1522- /* fill inter cu */
1523- gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type);
1524- }
1525- }
1526-
1527- }
1528- }
1529- }
1530-
1531- dri_bo_unmap(vme_context->vme_output.bo);
15321456 }
15331457
15341458 #define HEVC_SPLIT_CU_FLAG_64_64 ((0x1<<20)|(0xf<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
15351459 #define HEVC_SPLIT_CU_FLAG_32_32 ((0x1<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
15361460 #define HEVC_SPLIT_CU_FLAG_16_16 ((0x0<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1461+#define HEVC_SPLIT_CU_FLAG_8_8 ((0x1<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
15371462
15381463
15391464 void
@@ -1646,6 +1571,7 @@ gen9_hcpe_hevc_pipeline_slice_programing(VADriverContextP ctx,
16461571 struct intel_batchbuffer *slice_batch)
16471572 {
16481573 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1574+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
16491575 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
16501576 VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
16511577 VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
@@ -1666,7 +1592,24 @@ gen9_hcpe_hevc_pipeline_slice_programing(VADriverContextP ctx,
16661592 int num_mb_in_ctb = ctb_width_in_mb * ctb_width_in_mb;
16671593 int i_ctb, ctb_x, ctb_y;
16681594 unsigned int split_coding_unit_flag = 0;
1595+ int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1596+ int row_pad_flag = (pSequenceParameter->pic_height_in_luma_samples % ctb_size)> 0 ? 1:0;
1597+
1598+ int is_intra = (slice_type == HEVC_SLICE_I);
1599+ unsigned int *msg = NULL;
1600+ unsigned char *msg_ptr = NULL;
1601+ int macroblock_address = 0;
1602+ int num_cu_record = 64;
1603+ int cu_count = 1;
1604+ int tmp_mb_mode = 0;
1605+ int mb_x = 0, mb_y = 0;
1606+ int mb_addr = 0;
1607+ int cu_index = 0;
1608+ int inter_rdo, intra_rdo;
16691609
1610+ if (log2_ctb_size == 5) num_cu_record = 16;
1611+ else if (log2_ctb_size == 4) num_cu_record = 4;
1612+ else if (log2_ctb_size == 6) num_cu_record = 64;
16701613 if (rate_control_mode == VA_RC_CBR) {
16711614 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
16721615 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1706,16 +1649,85 @@ gen9_hcpe_hevc_pipeline_slice_programing(VADriverContextP ctx,
17061649
17071650 split_coding_unit_flag = (ctb_width_in_mb == 4) ? HEVC_SPLIT_CU_FLAG_64_64 : ((ctb_width_in_mb == 2) ? HEVC_SPLIT_CU_FLAG_32_32 : HEVC_SPLIT_CU_FLAG_16_16);
17081651
1709- for (i_ctb = pSliceParameter->slice_segment_address;
1710- i_ctb < pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice; i_ctb++) {
1652+ dri_bo_map(vme_context->vme_output.bo , 1);
1653+ msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1654+ dri_bo_map(mfc_context->hcp_indirect_cu_object.bo , 1);
1655+
1656+ for (i_ctb = pSliceParameter->slice_segment_address;i_ctb < pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice; i_ctb++) {
17111657 int last_ctb = (i_ctb == (pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice - 1));
1658+ int ctb_height_in_mb = ctb_width_in_mb;
17121659 ctb_x = i_ctb % width_in_ctb;
17131660 ctb_y = i_ctb / width_in_ctb;
1661+ if(ctb_y == (height_in_ctb - 1) && row_pad_flag) ctb_height_in_mb = 1;
1662+
1663+ mb_x = 0;
1664+ mb_y = 0;
1665+ macroblock_address = (i_ctb - ctb_x) * num_mb_in_ctb + ctb_x * ctb_width_in_mb;
1666+ split_coding_unit_flag = ((ctb_width_in_mb == 2) ? HEVC_SPLIT_CU_FLAG_32_32 : HEVC_SPLIT_CU_FLAG_16_16);
1667+ cu_count = 1;
1668+ cu_index = 0;
1669+ mb_addr = 0;
1670+ msg = NULL;
1671+ for (mb_y = 0; mb_y < ctb_height_in_mb; mb_y++)
1672+ {
1673+ mb_addr = macroblock_address + mb_y * width_in_mbs ;
1674+ for (mb_x = 0; mb_x < ctb_width_in_mb; mb_x++)
1675+ {
1676+ split_coding_unit_flag = ((ctb_width_in_mb == 2) ? HEVC_SPLIT_CU_FLAG_32_32 : HEVC_SPLIT_CU_FLAG_16_16);
1677+ /* get the mb info from the vme out */
1678+ msg = (unsigned int *)(msg_ptr + mb_addr * vme_context->vme_output.size_block);
17141679
1715- gen9_hcpe_hevc_pak_object(ctx, ctb_x, ctb_y, last_ctb, encoder_context, num_mb_in_ctb, split_coding_unit_flag, slice_batch);
1680+ inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1681+ intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1682+ /*fill to indirect cu */
1683+ /*to do */
1684+ if (is_intra || intra_rdo < inter_rdo) {
1685+ /* fill intra cu */
1686+ tmp_mb_mode = (msg[0] & AVC_INTRA_MODE_MASK) >> 4;
1687+ if (tmp_mb_mode == AVC_INTRA_16X16) {
1688+ gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1689+ } else { // for 4x4 to use 8x8 replace
1690+ gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1691+ gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,1);
1692+ gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,2);
1693+ gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,3);
1694+ if(ctb_width_in_mb == 2)
1695+ split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1696+ else if(ctb_width_in_mb == 1)
1697+ split_coding_unit_flag |= 0x1 << 20;
1698+ }
1699+ } else {
1700+ msg += AVC_INTER_MSG_OFFSET;
1701+ /* fill inter cu */
1702+ tmp_mb_mode = msg[0] & AVC_INTER_MODE_MASK;
1703+ if (tmp_mb_mode == AVC_INTER_8X8){
1704+ gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1705+ gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,1);
1706+ gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,2);
1707+ gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,3);
1708+ if(ctb_width_in_mb == 2)
1709+ split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1710+ else if(ctb_width_in_mb == 1)
1711+ split_coding_unit_flag |= 0x1 << 20;
1712+
1713+ }else if(tmp_mb_mode == AVC_INTER_16X16 ||
1714+ tmp_mb_mode == AVC_INTER_8X16 ||
1715+ tmp_mb_mode == AVC_INTER_16X8) {
1716+ gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1717+ }
1718+ }
1719+ mb_addr++;
1720+ }
1721+ }
17161722
1723+ cu_count = cu_index;
1724+ // PAK object fill accordingly.
1725+ gen9_hcpe_hevc_pak_object(ctx, ctb_x, ctb_y, last_ctb, encoder_context, cu_count, split_coding_unit_flag, slice_batch);
17171726 }
17181727
1728+ dri_bo_unmap(mfc_context->hcp_indirect_cu_object.bo);
1729+ dri_bo_unmap(vme_context->vme_output.bo);
1730+
17191731 if (last_slice) {
17201732 mfc_context->insert_object(ctx, encoder_context,
17211733 tail_data, 2, 8,
@@ -1741,7 +1753,6 @@ gen9_hcpe_hevc_software_batchbuffer(VADriverContextP ctx,
17411753 batch_bo = batch->buffer;
17421754
17431755 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1744- gen9_hcpe_hevc_vmeout_to_indirect_cu_buffer(ctx, encode_state, encoder_context, i);
17451756 gen9_hcpe_hevc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
17461757 }
17471758