From 4ab81f113bdf1ca8c3b0d53c777885aa33ed27f3 Mon Sep 17 00:00:00 2001
From: John Cox <jc@kynesim.co.uk>
Date: Thu, 29 Apr 2021 19:17:06 +0100
Subject: [PATCH] media: rpivid: Make slice ctrl dynamic

Allows the user to submit a whole frames worth of slice headers in
one lump along with a single bitstream dmabuf for the whole lot.
This saves potentially a lot of bitstream copying.

Signed-off-by: John Cox <jc@kynesim.co.uk>
---
 drivers/staging/media/rpivid/rpivid.c      |   4 +
 drivers/staging/media/rpivid/rpivid_dec.c  |  18 ++-
 drivers/staging/media/rpivid/rpivid_h265.c | 151 +++++++++++----------
 3 files changed, 99 insertions(+), 74 deletions(-)

--- a/drivers/staging/media/rpivid/rpivid.c
+++ b/drivers/staging/media/rpivid/rpivid.c
@@ -63,7 +63,11 @@ static const struct rpivid_control rpivi
 	},
 	{
 		.cfg = {
+			.name	= "Slice param array",
 			.id	= V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS,
+			.type	= V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS,
+			.flags	= V4L2_CTRL_FLAG_DYNAMIC_ARRAY,
+			.dims	= { 0x1000 },
 		},
 		.required	= true,
 	},
--- a/drivers/staging/media/rpivid/rpivid_dec.c
+++ b/drivers/staging/media/rpivid/rpivid_dec.c
@@ -46,22 +46,34 @@ void rpivid_device_run(void *priv)
 
 	switch (ctx->src_fmt.pixelformat) {
 	case V4L2_PIX_FMT_HEVC_SLICE:
+	{
+		const struct v4l2_ctrl *ctrl;
+
 		run.h265.sps =
 			rpivid_find_control_data(ctx,
 						 V4L2_CID_MPEG_VIDEO_HEVC_SPS);
 		run.h265.pps =
 			rpivid_find_control_data(ctx,
 						 V4L2_CID_MPEG_VIDEO_HEVC_PPS);
-		run.h265.slice_params =
-			rpivid_find_control_data(ctx,
-						 V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS);
 		run.h265.dec =
 			rpivid_find_control_data(ctx,
 						 V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS);
+
+		ctrl = rpivid_find_ctrl(ctx,
+					V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS);
+		if (!ctrl || !ctrl->elems) {
+			v4l2_err(&dev->v4l2_dev, "%s: Missing slice params\n",
+				 __func__);
+			goto fail;
+		}
+		run.h265.slice_ents = ctrl->elems;
+		run.h265.slice_params = ctrl->p_cur.p;
+
 		run.h265.scaling_matrix =
 			rpivid_find_control_data(ctx,
 						 V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX);
 		break;
+	}
 
 	default:
 		break;
--- a/drivers/staging/media/rpivid/rpivid_h265.c
+++ b/drivers/staging/media/rpivid/rpivid_h265.c
@@ -245,7 +245,6 @@ struct rpivid_dec_state {
 
 	// Slice vars
 	unsigned int slice_idx;
-	bool frame_end;
 	bool slice_temporal_mvp;  /* Slice flag but constant for frame */
 
 	// Temp vars per run - don't actually need to persist
@@ -740,7 +739,8 @@ static void new_slice_segment(struct rpi
 				V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED))
 						<< 24));
 
-	if ((sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) != 0)
+	if (!s->start_ts &&
+	    (sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) != 0)
 		write_scaling_factors(de);
 
 	if (!s->dependent_slice_segment_flag) {
@@ -1111,7 +1111,8 @@ static int wpp_end_previous_slice(struct
  * next chunk code simpler
  */
 static int wpp_decode_slice(struct rpivid_dec_env *const de,
-			    const struct rpivid_dec_state *const s)
+			    const struct rpivid_dec_state *const s,
+			    bool last_slice)
 {
 	bool reset_qp_y = true;
 	const bool indep = !s->dependent_slice_segment_flag;
@@ -1150,7 +1151,7 @@ static int wpp_decode_slice(struct rpivi
 			0, 0, s->start_ctb_x, s->start_ctb_y,
 			s->slice_qp, slice_reg_const(s));
 
-	if (s->frame_end) {
+	if (last_slice) {
 		rv = wpp_entry_fill(de, s, s->ctb_height - 1);
 		if (rv)
 			return rv;
@@ -1229,7 +1230,8 @@ static int end_previous_slice(struct rpi
 }
 
 static int decode_slice(struct rpivid_dec_env *const de,
-			const struct rpivid_dec_state *const s)
+			const struct rpivid_dec_state *const s,
+			bool last_slice)
 {
 	bool reset_qp_y;
 	unsigned int tile_x = ctb_to_tile_x(s, s->start_ctb_x);
@@ -1275,7 +1277,7 @@ static int decode_slice(struct rpivid_de
 	 * now, otherwise this will be done at the start of the next slice
 	 * when it will be known where this slice finishes
 	 */
-	if (s->frame_end) {
+	if (last_slice) {
 		rv = tile_entry_fill(de, s,
 				     s->tile_width - 1,
 				     s->tile_height - 1);
@@ -1670,11 +1672,13 @@ static u32 mk_config2(const struct rpivi
 static void rpivid_h265_setup(struct rpivid_ctx *ctx, struct rpivid_run *run)
 {
 	struct rpivid_dev *const dev = ctx->dev;
-	const struct v4l2_ctrl_hevc_slice_params *const sh =
-						run->h265.slice_params;
 	const struct v4l2_ctrl_hevc_decode_params *const dec =
 						run->h265.dec;
-//	const struct v4l2_hevc_pred_weight_table *pred_weight_table;
+	/* sh0 used where slice header contents should be constant over all
+	 * slices, or first slice of frame
+	 */
+	const struct v4l2_ctrl_hevc_slice_params *const sh0 =
+					run->h265.slice_params;
 	struct rpivid_q_aux *dpb_q_aux[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
 	struct rpivid_dec_state *const s = ctx->state;
 	struct vb2_queue *vq;
@@ -1684,20 +1688,18 @@ static void rpivid_h265_setup(struct rpi
 	int use_aux;
 	int rv;
 	bool slice_temporal_mvp;
+	bool frame_end;
 
 	xtrace_in(dev, de);
+	s->sh = NULL;  // Avoid use until in the slice loop
 
-//	pred_weight_table = &sh->pred_weight_table;
-
-	s->frame_end =
+	frame_end =
 		((run->src->flags & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF) == 0);
 
-	slice_temporal_mvp = (sh->flags &
+	slice_temporal_mvp = (sh0->flags &
 		   V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED);
 
 	if (de && de->state != RPIVID_DECODE_END) {
-		++s->slice_idx;
-
 		switch (de->state) {
 		case RPIVID_DECODE_SLICE_CONTINUE:
 			// Expected state
@@ -1830,7 +1832,7 @@ static void rpivid_h265_setup(struct rpi
 		de->rpi_config2 = mk_config2(s);
 		de->rpi_framesize = (s->sps.pic_height_in_luma_samples << 16) |
 				    s->sps.pic_width_in_luma_samples;
-		de->rpi_currpoc = sh->slice_pic_order_cnt;
+		de->rpi_currpoc = sh0->slice_pic_order_cnt;
 
 		if (s->sps.flags &
 		    V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED) {
@@ -1839,17 +1841,17 @@ static void rpivid_h265_setup(struct rpi
 
 		s->slice_idx = 0;
 
-		if (sh->slice_segment_addr != 0) {
+		if (sh0->slice_segment_addr != 0) {
 			v4l2_warn(&dev->v4l2_dev,
 				  "New frame but segment_addr=%d\n",
-				  sh->slice_segment_addr);
+				  sh0->slice_segment_addr);
 			goto fail;
 		}
 
 		/* Allocate a bitbuf if we need one - don't need one if single
 		 * slice as we can use the src buf directly
 		 */
-		if (!s->frame_end && !de->bit_copy_gptr->ptr) {
+		if (!frame_end && !de->bit_copy_gptr->ptr) {
 			size_t bits_alloc;
 			bits_alloc = rpivid_bit_buf_size(s->sps.pic_width_in_luma_samples,
 							 s->sps.pic_height_in_luma_samples,
@@ -1873,21 +1875,7 @@ static void rpivid_h265_setup(struct rpi
 	s->src_addr = 0;
 	s->src_buf = NULL;
 
-	if (run->src->planes[0].bytesused < (sh->bit_size + 7) / 8) {
-		v4l2_warn(&dev->v4l2_dev,
-			  "Bit size %d > bytesused %d\n",
-			  sh->bit_size, run->src->planes[0].bytesused);
-		goto fail;
-	}
-	if (sh->data_bit_offset >= sh->bit_size ||
-	    sh->bit_size - sh->data_bit_offset < 8) {
-		v4l2_warn(&dev->v4l2_dev,
-			  "Bit size %d < Bit offset %d + 8\n",
-			  sh->bit_size, sh->data_bit_offset);
-		goto fail;
-	}
-
-	if (s->frame_end)
+	if (frame_end)
 		s->src_addr = vb2_dma_contig_plane_dma_addr(&run->src->vb2_buf,
 							    0);
 	if (!s->src_addr)
@@ -1898,44 +1886,65 @@ static void rpivid_h265_setup(struct rpi
 	}
 
 	// Pre calc a few things
-	s->sh = sh;
 	s->dec = dec;
-	s->slice_qp = 26 + s->pps.init_qp_minus26 + s->sh->slice_qp_delta;
-	s->max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ?
+	for (i = 0; i != run->h265.slice_ents; ++i) {
+		const struct v4l2_ctrl_hevc_slice_params *const sh = sh0 + i;
+		const bool last_slice = frame_end && i + 1 == run->h265.slice_ents;
+
+		s->sh = sh;
+
+		if (run->src->planes[0].bytesused < (sh->bit_size + 7) / 8) {
+			v4l2_warn(&dev->v4l2_dev,
+				  "Bit size %d > bytesused %d\n",
+				  sh->bit_size, run->src->planes[0].bytesused);
+			goto fail;
+		}
+		if (sh->data_bit_offset >= sh->bit_size ||
+		    sh->bit_size - sh->data_bit_offset < 8) {
+			v4l2_warn(&dev->v4l2_dev,
+				  "Bit size %d < Bit offset %d + 8\n",
+				  sh->bit_size, sh->data_bit_offset);
+			goto fail;
+		}
+
+		s->slice_qp = 26 + s->pps.init_qp_minus26 + sh->slice_qp_delta;
+		s->max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ?
+						0 :
+						(5 - sh->five_minus_max_num_merge_cand);
+		s->dependent_slice_segment_flag =
+			((sh->flags &
+			  V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT) != 0);
+
+		s->nb_refs[0] = (sh->slice_type == HEVC_SLICE_I) ?
+					0 :
+					sh->num_ref_idx_l0_active_minus1 + 1;
+		s->nb_refs[1] = (sh->slice_type != HEVC_SLICE_B) ?
 					0 :
-					(5 - sh->five_minus_max_num_merge_cand);
-	// * SH DSS flag invented by me - but clearly needed
-	s->dependent_slice_segment_flag =
-		((sh->flags &
-		  V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT) != 0);
-
-	s->nb_refs[0] = (sh->slice_type == HEVC_SLICE_I) ?
-				0 :
-				sh->num_ref_idx_l0_active_minus1 + 1;
-	s->nb_refs[1] = (sh->slice_type != HEVC_SLICE_B) ?
-				0 :
-				sh->num_ref_idx_l1_active_minus1 + 1;
-
-	if (s->sps.flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED)
-		populate_scaling_factors(run, de, s);
-
-	// Calc all the random coord info to avoid repeated conversion in/out
-	s->start_ts = s->ctb_addr_rs_to_ts[sh->slice_segment_addr];
-	s->start_ctb_x = sh->slice_segment_addr % de->pic_width_in_ctbs_y;
-	s->start_ctb_y = sh->slice_segment_addr / de->pic_width_in_ctbs_y;
-	// Last CTB of previous slice
-	prev_rs = !s->start_ts ? 0 : s->ctb_addr_ts_to_rs[s->start_ts - 1];
-	s->prev_ctb_x = prev_rs % de->pic_width_in_ctbs_y;
-	s->prev_ctb_y = prev_rs / de->pic_width_in_ctbs_y;
+					sh->num_ref_idx_l1_active_minus1 + 1;
 
-	if ((s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED))
-		rv = wpp_decode_slice(de, s);
-	else
-		rv = decode_slice(de, s);
-	if (rv)
-		goto fail;
+		if (s->sps.flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED)
+			populate_scaling_factors(run, de, s);
+
+		/* Calc all the random coord info to avoid repeated conversion in/out */
+		s->start_ts = s->ctb_addr_rs_to_ts[sh->slice_segment_addr];
+		s->start_ctb_x = sh->slice_segment_addr % de->pic_width_in_ctbs_y;
+		s->start_ctb_y = sh->slice_segment_addr / de->pic_width_in_ctbs_y;
+		/* Last CTB of previous slice */
+		prev_rs = !s->start_ts ? 0 : s->ctb_addr_ts_to_rs[s->start_ts - 1];
+		s->prev_ctb_x = prev_rs % de->pic_width_in_ctbs_y;
+		s->prev_ctb_y = prev_rs / de->pic_width_in_ctbs_y;
+
+		if ((s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED))
+			rv = wpp_decode_slice(de, s, last_slice);
+		else
+			rv = decode_slice(de, s, last_slice);
+		if (rv)
+			goto fail;
+
+		++s->slice_idx;
+	}
 
-	if (!s->frame_end) {
+	if (!frame_end) {
 		xtrace_ok(dev, de);
 		return;
 	}
@@ -2054,8 +2063,8 @@ static void rpivid_h265_setup(struct rpi
 fail:
 	if (de)
 		// Actual error reporting happens in Trigger
-		de->state = s->frame_end ? RPIVID_DECODE_ERROR_DONE :
-					   RPIVID_DECODE_ERROR_CONTINUE;
+		de->state = frame_end ? RPIVID_DECODE_ERROR_DONE :
+					RPIVID_DECODE_ERROR_CONTINUE;
 	xtrace_fail(dev, de);
 }
 
